diff --git a/csrank/choicefunction/cmpnet_choice.py b/csrank/choicefunction/cmpnet_choice.py index 507527e3..a687102a 100644 --- a/csrank/choicefunction/cmpnet_choice.py +++ b/csrank/choicefunction/cmpnet_choice.py @@ -16,11 +16,11 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, diff --git a/csrank/choicefunction/fate_choice.py b/csrank/choicefunction/fate_choice.py index 3f06dcb0..a0569b95 100644 --- a/csrank/choicefunction/fate_choice.py +++ b/csrank/choicefunction/fate_choice.py @@ -20,10 +20,10 @@ def __init__( loss_function=binary_crossentropy, activation="selu", kernel_initializer="lecun_normal", - kernel_regularizer=l2(0.01), + kernel_regularizer=l2(), optimizer=SGD, batch_size=256, - metrics=None, + metrics=(), random_state=None, **kwargs, ): diff --git a/csrank/choicefunction/feta_choice.py b/csrank/choicefunction/feta_choice.py index ae44171b..daf2ae3c 100644 --- a/csrank/choicefunction/feta_choice.py +++ b/csrank/choicefunction/feta_choice.py @@ -31,11 +31,11 @@ def __init__( num_subsample=5, loss_function=binary_crossentropy, batch_normalization=False, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="selu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, @@ -119,7 +119,7 @@ def _construct_layers(self, **kwargs): # Todo: Variable sized input # X = Input(shape=(None, n_features)) if self.batch_normalization: - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.hidden_layers_zeroth = [ NormalizedDense( self.n_units, name="hidden_zeroth_{}".format(x), *kwargs @@ -131,7 +131,7 @@ def _construct_layers(self, **kwargs): for x in range(self.n_hidden) ] else: - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.hidden_layers_zeroth = [ Dense(self.n_units, name="hidden_zeroth_{}".format(x), **kwargs) for x in range(self.n_hidden) @@ -144,7 +144,7 @@ def _construct_layers(self, **kwargs): self.output_node = Dense( 1, activation="linear", kernel_regularizer=self.kernel_regularizer ) - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.output_node_zeroth = Dense( 1, activation="linear", kernel_regularizer=self.kernel_regularizer ) @@ -169,7 +169,7 @@ def construct_model(self): def create_input_lambda(i): return Lambda(lambda x: x[:, i]) - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.logger.debug("Create 0th order model") zeroth_order_outputs = [] inputs = [] @@ -184,7 +184,7 @@ def create_input_lambda(i): self.logger.debug("Create 1st order model") outputs = [list() for _ in range(self.n_objects_fit_)] for i, j in combinations(range(self.n_objects_fit_), 2): - if self._use_zeroth_model: + if self.add_zeroth_order_model: x1 = inputs[i] x2 = inputs[j] else: @@ -214,13 +214,15 @@ def create_input_lambda(i): ] scores = concatenate(scores) self.logger.debug("1st order model finished") - if self._use_zeroth_model: + if self.add_zeroth_order_model: scores = add([scores, zeroth_order_scores]) scores = Activation("sigmoid")(scores) model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug("Compiling complete model...") model.compile( - loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics + loss=self.loss_function, + optimizer=self.optimizer_, + metrics=list(self.metrics), ) return model diff --git a/csrank/choicefunction/ranknet_choice.py b/csrank/choicefunction/ranknet_choice.py index f55964bb..7731c9f3 100644 --- a/csrank/choicefunction/ranknet_choice.py +++ b/csrank/choicefunction/ranknet_choice.py @@ -16,11 +16,11 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, diff --git a/csrank/core/cmpnet_core.py b/csrank/core/cmpnet_core.py index 47a0ad5d..f54ce7fb 100644 --- a/csrank/core/cmpnet_core.py +++ b/csrank/core/cmpnet_core.py @@ -25,11 +25,11 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, @@ -109,7 +109,9 @@ def construct_model(self): merged_output = concatenate([N_g, N_l]) model = Model(inputs=[self.x1, self.x2], outputs=merged_output) model.compile( - loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics + loss=self.loss_function, + optimizer=self.optimizer_, + metrics=list(self.metrics), ) return model diff --git a/csrank/core/fate_network.py b/csrank/core/fate_network.py index 75ec200e..301b74b4 100644 --- a/csrank/core/fate_network.py +++ b/csrank/core/fate_network.py @@ -27,7 +27,7 @@ def __init__( n_hidden_joint_units=32, activation="selu", kernel_initializer="lecun_normal", - kernel_regularizer=l2(0.01), + kernel_regularizer=l2(), optimizer=SGD, batch_size=256, random_state=None, @@ -475,7 +475,9 @@ def construct_model(self, n_features, n_objects): model = Model(inputs=input_layer, outputs=scores) model.compile( - loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics + loss=self.loss_function, + optimizer=self.optimizer_, + metrics=list(self.metrics), ) return model diff --git a/csrank/core/feta_linear.py b/csrank/core/feta_linear.py index 4f0c875f..8566cfa6 100644 --- a/csrank/core/feta_linear.py +++ b/csrank/core/feta_linear.py @@ -13,6 +13,21 @@ class FETALinearCore(Learner): + """Core Learner implementing the First Evaluate then Aggregate approach. + + This implements a linear variant of the FETA approach introduced in + [PfGuH18]. The idea is to first evaluate each object in each sub-context of + fixed size with a linear function approximator and then to aggregate these + evaluations. + + References + ---------- + + .. [PfGuH18] Pfannschmidt, K., Gupta, P., & Hüllermeier, E. (2018). Deep + architectures for learning context-dependent ranking functions. arXiv + preprint arXiv:1803.05796. https://arxiv.org/pdf/1803.05796.pdf + """ + def __init__( self, learning_rate=1e-3, @@ -23,6 +38,25 @@ def __init__( random_state=None, **kwargs, ): + """ + Parameters + ---------- + learning_rate : float + The learning rate used by the gradient descent optimizer. + batch_size : int + The size of the mini-batches used to train the Neural Network. + loss_function + The loss function to minimize when training the Neural Network. See + the functions offered in the keras.losses module for more details. + epochs_drop: int + The amount of training epochs after which the learning rate is + decreased by a factor of `drop`. + drop: float + The factor by which to decrease the learning rate every + `epochs_drop` epochs. + random_state: np.RandomState + The random state to use in this object. + """ self.learning_rate = learning_rate self.batch_size = batch_size self.random_state = random_state @@ -90,6 +124,18 @@ def _construct_model_(self, n_objects): ) def step_decay(self, epoch): + """Update the current learning rate. + + Computes the current learning rate based on the initial learning rate, + the current epoch and the decay speed set by the `epochs_drop` and + `drop` hyperparameters. + + Parameters + ---------- + + epoch: int + The current epoch. + """ step = math.floor((1 + epoch) / self.epochs_drop) self.current_lr = self.learning_rate * math.pow(self.drop, step) self.optimizer = tf.train.GradientDescentOptimizer(self.current_lr).minimize( @@ -99,6 +145,22 @@ def step_decay(self, epoch): def fit( self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd ): + """ + Fit the preference learning algorithm on the provided set of queries X + and preferences Y of those objects. The provided queries and + corresponding preferences are of a fixed size (numpy arrays). + + Parameters + ---------- + X : array-like, shape (n_samples, n_objects, n_features) + Feature vectors of the objects + Y : array-like, shape (n_samples, n_objects) + Preferences of the objects in form of rankings or choices + epochs: int + The amount of epochs to train for. The training loop will try to + predict the target variables and adjust its parameters by gradient + descent `epochs` times. + """ self.random_state_ = check_random_state(self.random_state) # Global Variables Initializer n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape @@ -146,6 +208,18 @@ def _fit_(self, X, Y, epochs, n_instances, tf_session, verbose): self.logger.info("Epoch {}: cost {} ".format((epoch + 1), np.mean(c))) def _predict_scores_fixed(self, X, **kwargs): + """Predict the scores for a given collection of sets of objects of same size. + + Parameters + ---------- + X : array-like, shape (n_samples, n_objects, n_features) + + + Returns + ------- + Y : array-like, shape (n_samples, n_objects) + Returns the scores of each of the objects for each of the samples. + """ n_instances, n_objects, n_features = X.shape assert n_features == self.n_object_features_fit_ outputs = [list() for _ in range(n_objects)] @@ -168,7 +242,10 @@ def set_tunable_parameters( self, learning_rate=1e-3, batch_size=128, epochs_drop=300, drop=0.1, **point ): """ - Set tunable parameters of the FETA-network to the values provided. + Set tunable hyperparameters of the FETA-network to the values provided. + + This can be used for automatic hyperparameter optimization. See + csrank.tuning for more information. Parameters ---------- diff --git a/csrank/core/feta_network.py b/csrank/core/feta_network.py index 8f6c20ec..d9119528 100644 --- a/csrank/core/feta_network.py +++ b/csrank/core/feta_network.py @@ -32,11 +32,11 @@ def __init__( num_subsample=5, loss_function=hinged_rank_loss, batch_normalization=False, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="selu", optimizer=SGD, - metrics=None, + metrics=(), batch_size=256, random_state=None, **kwargs, @@ -54,7 +54,7 @@ def __init__( self.batch_size = batch_size self.hash_file = None self.optimizer = optimizer - self._use_zeroth_model = add_zeroth_order_model + self.add_zeroth_order_model = add_zeroth_order_model self.n_hidden = n_hidden self.n_units = n_units keys = list(kwargs.keys()) @@ -80,7 +80,7 @@ def _construct_layers(self, **kwargs): # X = Input(shape=(None, n_features)) self.logger.info("n_hidden {}, n_units {}".format(self.n_hidden, self.n_units)) if self.batch_normalization: - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.hidden_layers_zeroth = [ NormalizedDense( self.n_units, name="hidden_zeroth_{}".format(x), **kwargs @@ -92,7 +92,7 @@ def _construct_layers(self, **kwargs): for x in range(self.n_hidden) ] else: - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.hidden_layers_zeroth = [ Dense(self.n_units, name="hidden_zeroth_{}".format(x), **kwargs) for x in range(self.n_hidden) @@ -105,14 +105,14 @@ def _construct_layers(self, **kwargs): self.output_node = Dense( 1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer ) - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.output_node_zeroth = Dense( 1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer ) @property def zero_order_model(self): - if self._zero_order_model is None and self._use_zeroth_model: + if self._zero_order_model is None and self.add_zeroth_order_model: self.logger.info("Creating zeroth model") inp = Input(shape=(self.n_object_features_fit_,)) @@ -153,7 +153,7 @@ def pairwise_model(self): def _predict_pair(self, a, b, only_pairwise=False, **kwargs): # TODO: Is this working correctly? pairwise = self.pairwise_model.predict([a, b], **kwargs) - if not only_pairwise and self._use_zeroth_model: + if not only_pairwise and self.add_zeroth_order_model: utility_a = self.zero_order_model.predict([a]) utility_b = self.zero_order_model.predict([b]) return pairwise + (utility_a, utility_b) @@ -173,7 +173,7 @@ def _predict_scores_using_pairs(self, X, **kwd): scores[n] += result.reshape(n_objects, n_objects - 1).mean(axis=1) del result del pairs - if self._use_zeroth_model: + if self.add_zeroth_order_model: scores_zero = self.zero_order_model.predict(X.reshape(-1, n_features)) scores_zero = scores_zero.reshape(n_instances, n_objects) scores = scores + scores_zero @@ -199,7 +199,7 @@ def construct_model(self): def create_input_lambda(i): return Lambda(lambda x: x[:, i]) - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.logger.debug("Create 0th order model") zeroth_order_outputs = [] inputs = [] @@ -214,7 +214,7 @@ def create_input_lambda(i): self.logger.debug("Create 1st order model") outputs = [list() for _ in range(self.n_objects_fit_)] for i, j in combinations(range(self.n_objects_fit_), 2): - if self._use_zeroth_model: + if self.add_zeroth_order_model: x1 = inputs[i] x2 = inputs[j] else: @@ -244,12 +244,14 @@ def create_input_lambda(i): ] scores = concatenate(scores) self.logger.debug("1st order model finished") - if self._use_zeroth_model: + if self.add_zeroth_order_model: scores = add([scores, zeroth_order_scores]) model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug("Compiling complete model...") model.compile( - loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics + loss=self.loss_function, + optimizer=self.optimizer_, + metrics=list(self.metrics), ) return model diff --git a/csrank/core/ranknet_core.py b/csrank/core/ranknet_core.py index b73e9b43..d3819119 100644 --- a/csrank/core/ranknet_core.py +++ b/csrank/core/ranknet_core.py @@ -24,11 +24,11 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, @@ -99,7 +99,9 @@ def construct_model(self): output = self.output_node(merged_inputs) model = Model(inputs=[self.x1, self.x2], outputs=output) model.compile( - loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics + loss=self.loss_function, + optimizer=self.optimizer_, + metrics=list(self.metrics), ) return model diff --git a/csrank/discretechoice/cmpnet_discrete_choice.py b/csrank/discretechoice/cmpnet_discrete_choice.py index 5e8315d0..64b6e32c 100644 --- a/csrank/discretechoice/cmpnet_discrete_choice.py +++ b/csrank/discretechoice/cmpnet_discrete_choice.py @@ -15,11 +15,11 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, diff --git a/csrank/discretechoice/fate_discrete_choice.py b/csrank/discretechoice/fate_discrete_choice.py index 0da4e5f2..e29ca3dd 100644 --- a/csrank/discretechoice/fate_discrete_choice.py +++ b/csrank/discretechoice/fate_discrete_choice.py @@ -14,12 +14,12 @@ def __init__( n_hidden_set_layers=2, n_hidden_set_units=2, loss_function="categorical_hinge", - metrics=["categorical_accuracy"], + metrics=("categorical_accuracy",), n_hidden_joint_layers=32, n_hidden_joint_units=32, activation="selu", kernel_initializer="lecun_normal", - kernel_regularizer=l2(0.01), + kernel_regularizer=l2(), optimizer=SGD, batch_size=256, random_state=None, diff --git a/csrank/discretechoice/feta_discrete_choice.py b/csrank/discretechoice/feta_discrete_choice.py index 2705dada..3145ecf8 100644 --- a/csrank/discretechoice/feta_discrete_choice.py +++ b/csrank/discretechoice/feta_discrete_choice.py @@ -29,11 +29,11 @@ def __init__( num_subsample=5, loss_function="categorical_hinge", batch_normalization=False, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="selu", optimizer=SGD, - metrics=["categorical_accuracy"], + metrics=("categorical_accuracy",), batch_size=256, random_state=None, **kwargs, @@ -116,7 +116,7 @@ def _construct_layers(self, **kwargs): # Todo: Variable sized input # X = Input(shape=(None, n_features)) if self.batch_normalization: - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.hidden_layers_zeroth = [ NormalizedDense( self.n_units, name="hidden_zeroth_{}".format(x), *kwargs @@ -128,7 +128,7 @@ def _construct_layers(self, **kwargs): for x in range(self.n_hidden) ] else: - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.hidden_layers_zeroth = [ Dense(self.n_units, name="hidden_zeroth_{}".format(x), **kwargs) for x in range(self.n_hidden) @@ -144,7 +144,7 @@ def _construct_layers(self, **kwargs): kernel_regularizer=self.kernel_regularizer, name="score", ) - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.output_node_zeroth = Dense( 1, activation="linear", @@ -178,7 +178,7 @@ def construct_model(self): def create_input_lambda(i): return Lambda(lambda x: x[:, i]) - if self._use_zeroth_model: + if self.add_zeroth_order_model: self.logger.debug("Create 0th order model") zeroth_order_outputs = [] inputs = [] @@ -193,7 +193,7 @@ def create_input_lambda(i): self.logger.debug("Create 1st order model") outputs = [list() for _ in range(self.n_objects_fit_)] for i, j in combinations(range(self.n_objects_fit_), 2): - if self._use_zeroth_model: + if self.add_zeroth_order_model: x1 = inputs[i] x2 = inputs[j] else: @@ -223,7 +223,7 @@ def create_input_lambda(i): ] scores = concatenate(scores) self.logger.debug("1st order model finished") - if self._use_zeroth_model: + if self.add_zeroth_order_model: def get_score_object(i): return Lambda(lambda x: x[:, i, None]) @@ -242,9 +242,9 @@ def get_score_object(i): scores.append(self.weighted_sum(concat_scores[i])) scores = concatenate(scores) - # if self._use_zeroth_model: + # if self.add_zeroth_order_model: # scores = add([scores, zeroth_order_scores]) - # if self._use_zeroth_model: + # if self.add_zeroth_order_model: # def expand_dims(): # return Lambda(lambda x: x[..., None]) # @@ -259,12 +259,14 @@ def get_score_object(i): # kernel_regularizer=self.kernel_regularizer, use_bias=False) # scores = weighted_sum(concat_scores) # scores = squeeze_dims()(scores) - if not self._use_zeroth_model: + if not self.add_zeroth_order_model: scores = Activation("sigmoid")(scores) model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug("Compiling complete model...") model.compile( - loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics + loss=self.loss_function, + optimizer=self.optimizer_, + metrics=list(self.metrics), ) return model @@ -299,7 +301,7 @@ def _predict_scores_using_pairs(self, X, **kwd): scores[n] += result.reshape(n_objects, n_objects - 1).mean(axis=1) del result del pairs - if self._use_zeroth_model: + if self.add_zeroth_order_model: scores_zero = self.zero_order_model.predict(X.reshape(-1, n_features)) scores_zero = scores_zero.reshape(n_instances, n_objects) model = self._create_weighted_model(n_objects) diff --git a/csrank/discretechoice/generalized_nested_logit.py b/csrank/discretechoice/generalized_nested_logit.py index 3df68d86..083d9e12 100644 --- a/csrank/discretechoice/generalized_nested_logit.py +++ b/csrank/discretechoice/generalized_nested_logit.py @@ -92,7 +92,7 @@ def __init__( self.n_nests = n_nests self.alpha = alpha - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function self.random_state = random_state known_regularization_functions = {"l1", "l2"} @@ -257,6 +257,7 @@ def construct_model(self, X, Y): model : pymc3 Model :class:`pm.Model` """ self.random_state_ = check_random_state(self.random_state) + self.loss_function_ = likelihood_dict.get(self.loss_function, None) if np.prod(X.shape) > self.threshold: upper_bound = int(self.threshold / np.prod(X.shape[1:])) indices = self.random_state_.choice(X.shape[0], upper_bound, replace=False) @@ -280,7 +281,7 @@ def construct_model(self, X, Y): lambda_k = pm.Uniform("lambda_k", self.alpha, 1.0, shape=self.n_nests) self.p = self.get_probabilities(utility, lambda_k, alpha_ik) LogLikelihood( - "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt + "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt ) self.logger.info("Model construction completed") @@ -403,7 +404,7 @@ def set_tunable_parameters( raise ValueError( f"Loss function {loss_function} is unknown. Must be one of {set(likelihood_dict.keys())}" ) - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function self.regularization = regularization self.model = None self.trace = None diff --git a/csrank/discretechoice/mixed_logit_model.py b/csrank/discretechoice/mixed_logit_model.py index 21ce0b80..86efc9f0 100644 --- a/csrank/discretechoice/mixed_logit_model.py +++ b/csrank/discretechoice/mixed_logit_model.py @@ -72,7 +72,7 @@ def __init__(self, n_mixtures=4, loss_function="", regularization="l2", **kwargs [3] Daniel McFadden and Kenneth Train. „Mixed MNL models for discrete response“. In: Journal of applied Econometrics 15.5 (2000), pp. 447–470 """ self.logger = logging.getLogger(MixedLogitModel.__name__) - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function known_regularization_functions = {"l1", "l2"} if regularization not in known_regularization_functions: raise ValueError( @@ -155,6 +155,7 @@ def construct_model(self, X, Y): ------- model : pymc3 Model :class:`pm.Model` """ + self.loss_function_ = likelihood_dict.get(self.loss_function, None) with pm.Model() as self.model: self.Xt = theano.shared(X) self.Yt = theano.shared(Y) @@ -163,7 +164,7 @@ def construct_model(self, X, Y): utility = tt.dot(self.Xt, weights_dict["weights"]) self.p = tt.mean(ttu.softmax(utility, axis=1), axis=2) LogLikelihood( - "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt + "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt ) self.logger.info("Model construction completed") @@ -257,7 +258,7 @@ def set_tunable_parameters( Dictionary containing parameter values which are not tuned for the network """ if loss_function in likelihood_dict: - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function self.n_mixtures = n_mixtures self.regularization = regularization self.model = None diff --git a/csrank/discretechoice/multinomial_logit_model.py b/csrank/discretechoice/multinomial_logit_model.py index d6fe4f5f..fb52f095 100644 --- a/csrank/discretechoice/multinomial_logit_model.py +++ b/csrank/discretechoice/multinomial_logit_model.py @@ -64,7 +64,7 @@ def __init__(self, loss_function="", regularization="l2", **kwargs): [2] Kenneth Train. Qualitative choice analysis. Cambridge, MA: MIT Press, 1986 """ self.logger = logging.getLogger(MultinomialLogitModel.__name__) - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function known_regularization_functions = {"l1", "l2"} if regularization not in known_regularization_functions: raise ValueError( @@ -151,6 +151,7 @@ def construct_model(self, X, Y): print_dictionary(self.model_configuration) ) ) + self.loss_function_ = likelihood_dict.get(self.loss_function, None) with pm.Model() as self.model: self.Xt = theano.shared(X) self.Yt = theano.shared(Y) @@ -162,7 +163,7 @@ def construct_model(self, X, Y): self.p = ttu.softmax(utility, axis=1) LogLikelihood( - "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt + "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt ) self.logger.info("Model construction completed") @@ -257,7 +258,7 @@ def set_tunable_parameters(self, loss_function=None, regularization="l1", **poin raise ValueError( f"Loss function {loss_function} is unknown. Must be one of {set(likelihood_dict.keys())}" ) - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function self.regularization = regularization self.model = None self.trace = None diff --git a/csrank/discretechoice/nested_logit_model.py b/csrank/discretechoice/nested_logit_model.py index 328a9ef8..326620dc 100644 --- a/csrank/discretechoice/nested_logit_model.py +++ b/csrank/discretechoice/nested_logit_model.py @@ -91,7 +91,7 @@ def __init__( self.n_nests = n_nests self.alpha = alpha self.random_state = random_state - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function known_regularization_functions = {"l1", "l2"} if regularization not in known_regularization_functions: raise ValueError( @@ -313,6 +313,7 @@ def construct_model(self, X, Y): ------- model : pymc3 Model :class:`pm.Model` """ + self.loss_function_ = likelihood_dict.get(self.loss_function, None) if np.prod(X.shape) > self.threshold: upper_bound = int(self.threshold / np.prod(X.shape[1:])) indices = self.random_state_.choice(X.shape[0], upper_bound, replace=False) @@ -339,7 +340,7 @@ def construct_model(self, X, Y): self.p = self.get_probabilities(utility, lambda_k, utility_k) LogLikelihood( - "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt + "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt ) self.logger.info("Model construction completed") @@ -464,7 +465,7 @@ def set_tunable_parameters( raise ValueError( f"Loss function {loss_function} is unknown. Must be one of {set(likelihood_dict.keys())}" ) - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function self.cluster_model = None self.features_nests = None self.model = None diff --git a/csrank/discretechoice/paired_combinatorial_logit.py b/csrank/discretechoice/paired_combinatorial_logit.py index 7b19ff90..0cc4d084 100644 --- a/csrank/discretechoice/paired_combinatorial_logit.py +++ b/csrank/discretechoice/paired_combinatorial_logit.py @@ -91,7 +91,7 @@ def __init__( self.logger = logging.getLogger(PairedCombinatorialLogit.__name__) self.alpha = alpha self.random_state = random_state - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function known_regularization_functions = {"l1", "l2"} if regularization not in known_regularization_functions: raise ValueError( @@ -260,6 +260,7 @@ def construct_model(self, X, Y): ------- model : pymc3 Model :class:`pm.Model` """ + self.loss_function_ = likelihood_dict.get(self.loss_function, None) with pm.Model() as self.model: self.Xt = theano.shared(X) self.Yt = theano.shared(Y) @@ -269,7 +270,7 @@ def construct_model(self, X, Y): utility = tt.dot(self.Xt, weights_dict["weights"]) self.p = self.get_probabilities(utility, lambda_k) LogLikelihood( - "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt + "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt ) self.logger.info("Model construction completed") @@ -380,7 +381,7 @@ def set_tunable_parameters( raise ValueError( f"Loss function {loss_function} is unknown. Must be one of {set(likelihood_dict.keys())}" ) - self.loss_function = likelihood_dict.get(loss_function, None) + self.loss_function = loss_function self.regularization = regularization self.model = None self.trace = None diff --git a/csrank/discretechoice/ranknet_discrete_choice.py b/csrank/discretechoice/ranknet_discrete_choice.py index 7fc5bf0a..0df1a3e4 100644 --- a/csrank/discretechoice/ranknet_discrete_choice.py +++ b/csrank/discretechoice/ranknet_discrete_choice.py @@ -15,11 +15,11 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, diff --git a/csrank/learner.py b/csrank/learner.py index 12be59d7..a7fb92b8 100644 --- a/csrank/learner.py +++ b/csrank/learner.py @@ -1,6 +1,8 @@ from abc import ABCMeta from abc import abstractmethod +from sklearn.base import BaseEstimator + from csrank.tunable import Tunable @@ -13,7 +15,7 @@ def filter_dict_by_prefix(source, prefix): return result -class Learner(Tunable, metaclass=ABCMeta): +class Learner(Tunable, BaseEstimator, metaclass=ABCMeta): def _initialize_optimizer(self): optimizer_params = filter_dict_by_prefix(self.__dict__, "optimizer__") optimizer_params.update(filter_dict_by_prefix(self.kwargs, "optimizer__")) diff --git a/csrank/objectranking/cmp_net.py b/csrank/objectranking/cmp_net.py index 25d013b0..dc4deb96 100644 --- a/csrank/objectranking/cmp_net.py +++ b/csrank/objectranking/cmp_net.py @@ -17,11 +17,11 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs, diff --git a/csrank/objectranking/fate_object_ranker.py b/csrank/objectranking/fate_object_ranker.py index 841e6bff..ca07f737 100644 --- a/csrank/objectranking/fate_object_ranker.py +++ b/csrank/objectranking/fate_object_ranker.py @@ -18,11 +18,11 @@ def __init__( n_hidden_joint_units=32, activation="selu", kernel_initializer="lecun_normal", - kernel_regularizer=l2(0.01), + kernel_regularizer=l2(), optimizer=SGD, batch_size=256, loss_function=hinged_rank_loss, - metrics=[zero_one_rank_loss_for_scores_ties], + metrics=(zero_one_rank_loss_for_scores_ties,), random_state=None, **kwargs, ): diff --git a/csrank/objectranking/feta_object_ranker.py b/csrank/objectranking/feta_object_ranker.py index 02ee8bbb..e341ff16 100644 --- a/csrank/objectranking/feta_object_ranker.py +++ b/csrank/objectranking/feta_object_ranker.py @@ -20,11 +20,11 @@ def __init__( num_subsample=5, loss_function=hinged_rank_loss, batch_normalization=False, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="selu", optimizer=SGD, - metrics=None, + metrics=(), batch_size=256, random_state=None, **kwargs, diff --git a/csrank/objectranking/list_net.py b/csrank/objectranking/list_net.py index 74c136ab..896fd888 100644 --- a/csrank/objectranking/list_net.py +++ b/csrank/objectranking/list_net.py @@ -30,11 +30,11 @@ def __init__( n_units=8, loss_function=plackett_luce_loss, batch_normalization=False, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), activation="selu", kernel_initializer="lecun_normal", optimizer=SGD, - metrics=[zero_one_rank_loss_for_scores_ties], + metrics=(zero_one_rank_loss_for_scores_ties,), batch_size=256, random_state=None, **kwargs, @@ -214,7 +214,9 @@ def construct_model(self): merged = concatenate(outputs) model = Model(inputs=self.input_layer, outputs=merged) model.compile( - loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics + loss=self.loss_function, + optimizer=self.optimizer_, + metrics=list(self.metrics), ) return model diff --git a/csrank/objectranking/rank_net.py b/csrank/objectranking/rank_net.py index dc98c4ac..fba9aee6 100644 --- a/csrank/objectranking/rank_net.py +++ b/csrank/objectranking/rank_net.py @@ -17,11 +17,11 @@ def __init__( n_units=8, loss_function="binary_crossentropy", batch_normalization=True, - kernel_regularizer=l2(1e-4), + kernel_regularizer=l2(), kernel_initializer="lecun_normal", activation="relu", optimizer=SGD, - metrics=["binary_accuracy"], + metrics=("binary_accuracy",), batch_size=256, random_state=None, **kwargs,