diff --git a/0001-Relax-init-parameter-type-checks.patch b/0001-Relax-init-parameter-type-checks.patch new file mode 100644 index 00000000..256b1e21 --- /dev/null +++ b/0001-Relax-init-parameter-type-checks.patch @@ -0,0 +1,46 @@ +From 48a6dd8f0e7ff52d116b0c01e36dac4504547781 Mon Sep 17 00:00:00 2001 +From: Timo Kaufmann +Date: Thu, 16 Jul 2020 16:57:14 +0200 +Subject: [PATCH] Relax init parameter type checks + +We now allow any "type" (uninitialized classes) and all numeric numpy +types. See https://github.com/scikit-learn/scikit-learn/issues/17756 for +a discussion. +--- + sklearn/utils/estimator_checks.py | 20 ++++++++++++++------ + 1 file changed, 14 insertions(+), 6 deletions(-) + +diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py +index 30c668237..ed3c40d67 100644 +--- a/sklearn/utils/estimator_checks.py ++++ b/sklearn/utils/estimator_checks.py +@@ -2529,12 +2529,20 @@ def check_parameters_default_constructible(name, Estimator): + assert init_param.default != init_param.empty, ( + "parameter %s for %s has no default value" + % (init_param.name, type(estimator).__name__)) +- if type(init_param.default) is type: +- assert init_param.default in [np.float64, np.int64] +- else: +- assert (type(init_param.default) in +- [str, int, float, bool, tuple, type(None), +- np.float64, types.FunctionType, joblib.Memory]) ++ allowed_types = { ++ str, ++ int, ++ float, ++ bool, ++ tuple, ++ type(None), ++ type, ++ types.FunctionType, ++ joblib.Memory, ++ } ++ # Any numpy numeric such as np.int32. ++ allowed_types.update(np.core.numerictypes.allTypes.values()) ++ assert type(init_param.default) in allowed_types + if init_param.name not in params.keys(): + # deprecated parameter, not in get_params + assert init_param.default is None +-- +2.28.0 + diff --git a/csrank/choicefunction/__init__.py b/csrank/choicefunction/__init__.py index 41b4aff4..5ad21049 100644 --- a/csrank/choicefunction/__init__.py +++ b/csrank/choicefunction/__init__.py @@ -8,14 +8,16 @@ from .pairwise_choice import PairwiseSVMChoiceFunction from .ranknet_choice import RankNetChoiceFunction -__all__ = [ - "AllPositive", - "CmpNetChoiceFunction", - "FATEChoiceFunction", - "FATELinearChoiceFunction", - "FETAChoiceFunction", - "FETALinearChoiceFunction", - "GeneralizedLinearModel", - "PairwiseSVMChoiceFunction", - "RankNetChoiceFunction", +algorithms = [ + AllPositive, + CmpNetChoiceFunction, + FATEChoiceFunction, + FATELinearChoiceFunction, + FETAChoiceFunction, + FETALinearChoiceFunction, + GeneralizedLinearModel, + PairwiseSVMChoiceFunction, + RankNetChoiceFunction, ] + +__all__ = [algo.__name__ for algo in algorithms] diff --git a/csrank/dataset_reader/objectranking/util.py b/csrank/dataset_reader/objectranking/util.py index b1306840..b73a9db3 100644 --- a/csrank/dataset_reader/objectranking/util.py +++ b/csrank/dataset_reader/objectranking/util.py @@ -35,7 +35,7 @@ def generate_pairwise_instances(features): def generate_complete_pairwise_dataset(X, Y): """ - Generates the pairiwse preference data from the given rankings.The ranking amongst the objects in a query set + Generates the pairwise preference data from the given rankings.The ranking amongst the objects in a query set :math:`Q = \\{x_1, x_2, x_3\\}` is represented by :math:`\\pi = (2,1,3)`, such that :math:`\\pi(2)=1` is the position of the :math:`x_2`. One can extract the following *pairwise preferences* :math:`x_2 \\succ x_1, x_2 \\succ x_3 and x_1 \\succ x_3`. This function generates pairwise preferences which can be used to learn different :class:`ObjectRanker` as: diff --git a/csrank/discretechoice/__init__.py b/csrank/discretechoice/__init__.py index cb5ca873..808f8dd2 100644 --- a/csrank/discretechoice/__init__.py +++ b/csrank/discretechoice/__init__.py @@ -13,19 +13,21 @@ from .pairwise_discrete_choice import PairwiseSVMDiscreteChoiceFunction from .ranknet_discrete_choice import RankNetDiscreteChoiceFunction -__all__ = [ - "RandomBaselineDC", - "CmpNetDiscreteChoiceFunction", - "FATEDiscreteChoiceFunction", - "FATELinearDiscreteChoiceFunction", - "FETADiscreteChoiceFunction", - "FETALinearDiscreteChoiceFunction", - "GeneralizedNestedLogitModel", - "MixedLogitModel", - "ModelSelector", - "MultinomialLogitModel", - "NestedLogitModel", - "PairedCombinatorialLogit", - "PairwiseSVMDiscreteChoiceFunction", - "RankNetDiscreteChoiceFunction", +algorithms = [ + RandomBaselineDC, + CmpNetDiscreteChoiceFunction, + FATEDiscreteChoiceFunction, + FATELinearDiscreteChoiceFunction, + FETADiscreteChoiceFunction, + FETALinearDiscreteChoiceFunction, + GeneralizedNestedLogitModel, + MixedLogitModel, + ModelSelector, + MultinomialLogitModel, + NestedLogitModel, + PairedCombinatorialLogit, + PairwiseSVMDiscreteChoiceFunction, + RankNetDiscreteChoiceFunction, ] + +__all__ = [algo.__name__ for algo in algorithms] diff --git a/csrank/objectranking/__init__.py b/csrank/objectranking/__init__.py index ce09aaaf..b5f065ac 100644 --- a/csrank/objectranking/__init__.py +++ b/csrank/objectranking/__init__.py @@ -9,15 +9,17 @@ from .rank_net import RankNet from .rank_svm import RankSVM -__all__ = [ - "CmpNet", - "ExpectedRankRegression", - "FATEObjectRanker", - "FATELinearObjectRanker", - "FETAObjectRanker", - "FETALinearObjectRanker", - "ListNet", - "RankNet", - "RankSVM", - "RandomBaselineRanker", +algorithms = [ + CmpNet, + ExpectedRankRegression, + FATEObjectRanker, + FATELinearObjectRanker, + FETAObjectRanker, + FETALinearObjectRanker, + ListNet, + RankNet, + RankSVM, + RandomBaselineRanker, ] + +__all__ = [algo.__name__ for algo in algorithms] diff --git a/csrank/tests/test_estimators.py b/csrank/tests/test_estimators.py new file mode 100644 index 00000000..580cec38 --- /dev/null +++ b/csrank/tests/test_estimators.py @@ -0,0 +1,70 @@ +"""Check that our estimators adhere to the scikit-learn interface. + +https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator +""" + +from functools import partial + +import pytest +from sklearn.utils.estimator_checks import check_estimator + +from csrank import objectranking + + +def get_check_name(check): + if isinstance(check, partial): + return check.func.__name__ + else: + return check.__name__ + + +def _reshape_x(X): + n_instances, n_objects = X.shape + n_features = 1 + return X.reshape((n_instances, n_objects, n_features)) + + +@pytest.mark.parametrize( + "Estimator", + # TODO write wrappers for choice, discretechoice + objectranking.algorithms, +) +def test_all_estimators(Estimator): + class WrappedRanker(Estimator): + # scikit learn assumes that "X" is an array of one-dimensional + # feature vectors. Our learners however assume an array of objects + # as a "feature vector", hence they expect one more dimension. + # This is one scikit-learn API expectation that we do not fulfill. + # This thin wrapper is needed so that we can still use the other + # estimator checks. It just pretends every feature is itself a + # one-feature object. + def fit(self, X, Y, *args, **kwargs): + Xnew = _reshape_x(X) + Ynew = Xnew.argsort(axis=1).argsort(axis=1).squeeze(axis=-1) + return super().fit(Xnew, Ynew, *args, **kwargs) + + def predict(self, X, *args, **kwargs): + super().predict(_reshape_x(X), *args, **kwargs) + + for (estimator, check) in check_estimator(WrappedRanker, generate_only=True): + # checks that attempt to call "fit" do not work since our estimators + # expect a 3-dimensional data shape while scikit-learn assumes two + # dimensions (an array of 1d data). + if not get_check_name(check) in { + "check_estimators_fit_returns_self", # fails for all + "check_complex_data", # fails for CmpNet + "check_dtype_object", # fails for ExpectedRankRegression + "check_estimators_empty_data_messages", # fails for all + "check_estimators_nan_inf", # fails for CmpNet + "check_estimators_overwrite_params", # fails for FATELinearObjectRanker + "check_estimator_sparse_data", # fails for ExpectedRankRegression + "check_estimators_pickle", # fails for ExpectedRankRegression + "check_fit2d_predict1d", # fails for ExpectedRankRegression + "check_methods_subset_invariance", # fails for ExpectedRankRegression + "check_fit2d_1sample", # fails for FETAObjectRanker + "check_dict_unchanged", # fails for ListNet + "check_dont_overwrite_parameters", # fails for CmpNet + "check_fit_idempotent", # fails for ExpectedRankRegression + "check_n_features_in" # fails for RankSVM + }: + check(estimator) diff --git a/shell.nix b/shell.nix index bef389b6..f90c0f64 100644 --- a/shell.nix +++ b/shell.nix @@ -21,6 +21,11 @@ let buildInputs = with pkgs; [ xorg.libX11 ] ++ old.buildInputs; } ); + scikit-learn = super.scikit-learn.overridePythonAttrs ( + old: { + patches = [./0001-Relax-init-parameter-type-checks.patch]; + } + ); }); }; in pkgs.mkShell {