Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate composable methods from qunfold #32

Merged
merged 3 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
python -m pip install -e .[bayes,tests]
python -m pip install -e .[bayes,composable,tests]
- name: Test with unittest
run: python -m unittest
8 changes: 8 additions & 0 deletions docs/source/quapy.method.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@ quapy.method.non\_aggregative module
:undoc-members:
:show-inheritance:

quapy.method.composable module
------------------------

.. automodule:: quapy.method.composable
:members:
:undoc-members:
:show-inheritance:

Module contents
---------------

Expand Down
90 changes: 90 additions & 0 deletions quapy/method/composable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""This module allows the composition of quantification methods from loss functions and feature transformations. This functionality is realized through an integration of the qunfold package: https://github.com/mirkobunse/qunfold."""

import qunfold
from qunfold.quapy import QuaPyWrapper
from qunfold.sklearn import CVClassifier
from qunfold import (
LeastSquaresLoss, # losses
BlobelLoss,
EnergyLoss,
HellingerSurrogateLoss,
CombinedLoss,
TikhonovRegularization,
TikhonovRegularized,
ClassTransformer, # transformers
HistogramTransformer,
DistanceTransformer,
KernelTransformer,
EnergyKernelTransformer,
LaplacianKernelTransformer,
GaussianKernelTransformer,
GaussianRFFKernelTransformer,
)

__all__ = [ # control public members, e.g., for auto-documentation in sphinx; omit QuaPyWrapper
"ComposableQuantifier",
"CVClassifier",
"LeastSquaresLoss",
"BlobelLoss",
"EnergyLoss",
"HellingerSurrogateLoss",
"CombinedLoss",
"TikhonovRegularization",
"TikhonovRegularized",
"ClassTransformer",
"HistogramTransformer",
"DistanceTransformer",
"KernelTransformer",
"EnergyKernelTransformer",
"LaplacianKernelTransformer",
"GaussianKernelTransformer",
"GaussianRFFKernelTransformer",
]

def ComposableQuantifier(loss, transformer, **kwargs):
"""A generic quantification / unfolding method that solves a linear system of equations.

This class represents any quantifier that can be described in terms of a loss function, a feature transformation, and a regularization term. In this implementation, the loss is minimized through unconstrained second-order minimization. Valid probability estimates are ensured through a soft-max trick by Bunse (2022).

Args:
loss: An instance of a loss class from `quapy.methods.composable`.
transformer: An instance of a transformer class from `quapy.methods.composable`.
solver (optional): The `method` argument in `scipy.optimize.minimize`. Defaults to `"trust-ncg"`.
solver_options (optional): The `options` argument in `scipy.optimize.minimize`. Defaults to `{"gtol": 1e-8, "maxiter": 1000}`.
seed (optional): A random number generator seed from which a numpy RandomState is created. Defaults to `None`.

Examples:
Here, we create the ordinal variant of ACC (Bunse et al., 2023). This variant consists of the original feature transformation of ACC and of the original loss of ACC, the latter of which is regularized towards smooth solutions.

>>> from qunfold.method.composable import (
>>> ComposableQuantifier,
>>> TikhonovRegularized,
>>> LeastSquaresLoss,
>>> ClassTransformer,
>>> )
>>> from sklearn.ensemble import RandomForestClassifier
>>> o_acc = ComposableQuantifier(
>>> TikhonovRegularized(LeastSquaresLoss(), 0.01),
>>> ClassTransformer(RandomForestClassifier(oob_score=True))
>>> )

Here, we perform hyper-parameter optimization with the ordinal ACC.

>>> quapy.model_selection.GridSearchQ(
>>> model = o_acc,
>>> param_grid = { # try both splitting criteria
>>> "transformer__classifier__estimator__criterion": ["gini", "entropy"],
>>> },
>>> # ...
>>> )

To use a classifier that does not provide the `oob_score` argument, such as logistic regression, you have to configure a cross validation of this classifier. Here, we employ 10 cross validation folds. 5 folds are the default.

>>> from qunfold.method.composable import CVClassifier
>>> from sklearn.linear_model import LogisticRegression
>>> acc_lr = ComposableQuantifier(
>>> LeastSquaresLoss(),
>>> ClassTransformer(CVClassifier(LogisticRegression(), 10))
>>> )
"""
return QuaPyWrapper(qunfold.GenericMethod(loss, transformer, **kwargs))
31 changes: 31 additions & 0 deletions quapy/tests/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,29 @@
from quapy.method import AGGREGATIVE_METHODS, BINARY_METHODS, NON_AGGREGATIVE_METHODS
from quapy.functional import check_prevalence_vector

# a random selection of composed methods to test the qunfold integration
from quapy.method.composable import (
ComposableQuantifier,
LeastSquaresLoss,
HellingerSurrogateLoss,
ClassTransformer,
HistogramTransformer,
CVClassifier,
)
COMPOSABLE_METHODS = [
ComposableQuantifier( # ACC
LeastSquaresLoss(),
ClassTransformer(CVClassifier(LogisticRegression()))
),
ComposableQuantifier( # HDy
HellingerSurrogateLoss(),
HistogramTransformer(
3, # 3 bins per class
preprocessor = ClassTransformer(CVClassifier(LogisticRegression()))
)
),
]

class TestMethods(unittest.TestCase):

tiny_dataset_multiclass = qp.datasets.fetch_UCIMulticlassDataset('academic-success').reduce(n_test=10)
Expand Down Expand Up @@ -87,6 +110,14 @@ def test_quanet(self):
estim_prevalences = model.quantify(dataset.test.instances)
self.assertTrue(check_prevalence_vector(estim_prevalences))

def test_composable(self):
for dataset in TestMethods.datasets:
for q in COMPOSABLE_METHODS:
print('testing', q)
q.fit(dataset.training)
estim_prevalences = q.quantify(dataset.test.X)
self.assertTrue(check_prevalence_vector(estim_prevalences))


if __name__ == '__main__':
unittest.main()
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ def get_version(rel_path):
# projects.
extras_require={ # Optional
'bayes': ['jax', 'jaxlib', 'numpyro'],
'composable': ['qunfold @ git+https://github.com/mirkobunse/[email protected]'],
'tests': ['certifi'],
'docs' : ['sphinx-rtd-theme'],
},

# If there are data files included in your packages that need to be
Expand Down
Loading