Skip to content

Commit

Permalink
bayesian cc now inherits from the new abstract class WithConfidenceAB…
Browse files Browse the repository at this point in the history
…C, just like AggregativeBootstrap
  • Loading branch information
AlexMoreo committed Nov 29, 2024
1 parent a0c84c5 commit 2728dfb
Show file tree
Hide file tree
Showing 6 changed files with 158 additions and 125 deletions.
5 changes: 2 additions & 3 deletions TODO.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
- [TODO] adapt BayesianCC to WithConfidence interface
- [TODO] document confidence
- [TODO] Test the return_type="index" in protocols and finish the "distributin_samples.py" example
- [TODO] document confidence in manuals
- [TODO] Test the return_type="index" in protocols and finish the "distributing_samples.py" example
- [TODO] Add EDy (an implementation is available at quantificationlib)
- [TODO] add ensemble methods SC-MQ, MC-SQ, MC-MQ
- [TODO] add HistNetQ
Expand Down
3 changes: 2 additions & 1 deletion examples/13.bayesian_quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@

from sklearn.ensemble import RandomForestClassifier

from quapy.method.aggregative import BayesianCC, ACC, PACC
from quapy.method.aggregative import ACC, PACC
from method.confidence import BayesianCC
from quapy.data import LabelledCollection, Dataset


Expand Down
6 changes: 5 additions & 1 deletion examples/15.confidence_regions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from quapy.method.confidence import BayesianCC
from quapy.method.confidence import AggregativeBootstrap
from quapy.method.aggregative import PACC
import quapy.functional as F
Expand All @@ -23,7 +24,8 @@

# by simply wrapping an aggregative quantifier within the AggregativeBootstrap class, we can obtain confidence
# intervals around the point estimate, in this case, at 95% of confidence
pacc = AggregativeBootstrap(PACC(), confidence_level=0.95)
pacc = AggregativeBootstrap(PACC(), n_test_samples=500, confidence_level=0.95)


with qp.util.temp_seed(0):
# we train the quantifier the usual way
Expand Down Expand Up @@ -73,6 +75,8 @@
- confidence ellipse in the Centered-Log Ratio (CLR) space: creates an ellipse in the CLR space (this should be
convenient for taking into account the inner structure of the probability simplex)
use: AggregativeBootstrap(PACC(), confidence_level=0.95, method='ellipse-clr')
Other methods that return confidence regions in QuaPy include the BayesianCC method.
"""


5 changes: 3 additions & 2 deletions quapy/method/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import method.confidence
from . import base
from . import aggregative
from . import non_aggregative
Expand All @@ -22,7 +23,7 @@
aggregative.KDEyML,
aggregative.KDEyCS,
aggregative.KDEyHD,
aggregative.BayesianCC
method.confidence.BayesianCC
}

BINARY_METHODS = {
Expand All @@ -45,7 +46,7 @@
aggregative.KDEyML,
aggregative.KDEyCS,
aggregative.KDEyHD,
aggregative.BayesianCC
method.confidence.BayesianCC
}

NON_AGGREGATIVE_METHODS = {
Expand Down
95 changes: 0 additions & 95 deletions quapy/method/aggregative.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
from quapy.classification.svmperf import SVMperf
from quapy.data import LabelledCollection
from quapy.method.base import BaseQuantifier, BinaryQuantifier, OneVsAllGeneric
from quapy.method import _bayesian



# Abstract classes
Expand Down Expand Up @@ -808,99 +806,6 @@ def EM(cls, tr_prev, posterior_probabilities, epsilon=EPSILON):
return qs, ps


class BayesianCC(AggregativeCrispQuantifier):
"""
`Bayesian quantification <https://arxiv.org/abs/2302.09159>`_ method,
which is a variant of :class:`ACC` that calculates the posterior probability distribution
over the prevalence vectors, rather than providing a point estimate obtained
by matrix inversion.
Can be used to diagnose degeneracy in the predictions visible when the confusion
matrix has high condition number or to quantify uncertainty around the point estimate.
This method relies on extra dependencies, which have to be installed via:
`$ pip install quapy[bayes]`
:param classifier: a sklearn's Estimator that generates a classifier
:param val_split: a float in (0, 1) indicating the proportion of the training data to be used,
as a stratified held-out validation set, for generating classifier predictions.
:param num_warmup: number of warmup iterations for the MCMC sampler (default 500)
:param num_samples: number of samples to draw from the posterior (default 1000)
:param mcmc_seed: random seed for the MCMC sampler (default 0)
"""
def __init__(self,
classifier: BaseEstimator=None,
val_split: float = 0.75,
num_warmup: int = 500,
num_samples: int = 1_000,
mcmc_seed: int = 0):

if num_warmup <= 0:
raise ValueError(f'parameter {num_warmup=} must be a positive integer')
if num_samples <= 0:
raise ValueError(f'parameter {num_samples=} must be a positive integer')

if (not isinstance(val_split, float)) or val_split <= 0 or val_split >= 1:
raise ValueError(f'val_split must be a float in (0, 1), got {val_split}')

if _bayesian.DEPENDENCIES_INSTALLED is False:
raise ImportError("Auxiliary dependencies are required. Run `$ pip install quapy[bayes]` to install them.")

self.classifier = qp._get_classifier(classifier)
self.val_split = val_split
self.num_warmup = num_warmup
self.num_samples = num_samples
self.mcmc_seed = mcmc_seed

# Array of shape (n_classes, n_predicted_classes,) where entry (y, c) is the number of instances
# labeled as class y and predicted as class c.
# By default, this array is set to None and later defined as part of the `aggregation_fit` phase
self._n_and_c_labeled = None

# Dictionary with posterior samples, set when `aggregate` is provided.
self._samples = None

def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
"""
Estimates the misclassification rates.
:param classif_predictions: a :class:`quapy.data.base.LabelledCollection` containing,
as instances, the label predictions issued by the classifier and, as labels, the true labels
:param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
"""
pred_labels, true_labels = classif_predictions.Xy
self._n_and_c_labeled = confusion_matrix(y_true=true_labels, y_pred=pred_labels, labels=self.classifier.classes_).astype(float)

def sample_from_posterior(self, classif_predictions):
if self._n_and_c_labeled is None:
raise ValueError("aggregation_fit must be called before sample_from_posterior")

n_c_unlabeled = F.counts_from_labels(classif_predictions, self.classifier.classes_).astype(float)

self._samples = _bayesian.sample_posterior(
n_c_unlabeled=n_c_unlabeled,
n_y_and_c_labeled=self._n_and_c_labeled,
num_warmup=self.num_warmup,
num_samples=self.num_samples,
seed=self.mcmc_seed,
)
return self._samples

def get_prevalence_samples(self):
if self._samples is None:
raise ValueError("sample_from_posterior must be called before get_prevalence_samples")
return self._samples[_bayesian.P_TEST_Y]

def get_conditional_probability_samples(self):
if self._samples is None:
raise ValueError("sample_from_posterior must be called before get_conditional_probability_samples")
return self._samples[_bayesian.P_C_COND_Y]

def aggregate(self, classif_predictions):
samples = self.sample_from_posterior(classif_predictions)[_bayesian.P_TEST_Y]
return np.asarray(samples.mean(axis=0), dtype=float)


class HDy(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
"""
`Hellinger Distance y <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDy).
Expand Down
Loading

0 comments on commit 2728dfb

Please sign in to comment.