Merge pull request #144 from timokau/misc-default-constructible

Misc fixes for default-constructibility of our learners
kiudee · Jul 1, 2020 · 194dac8 · 194dac8
2 parents 8c85937 + 217c838
commit 194dac8
Show file tree

Hide file tree

Showing 24 changed files with 181 additions and 83 deletions.
diff --git a/csrank/choicefunction/cmpnet_choice.py b/csrank/choicefunction/cmpnet_choice.py
@@ -16,11 +16,11 @@ def __init__(
         n_units=8,
         loss_function="binary_crossentropy",
         batch_normalization=True,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="relu",
         optimizer=SGD,
-        metrics=["binary_accuracy"],
+        metrics=("binary_accuracy",),
         batch_size=256,
         random_state=None,
         **kwargs,

diff --git a/csrank/choicefunction/fate_choice.py b/csrank/choicefunction/fate_choice.py
@@ -20,10 +20,10 @@ def __init__(
         loss_function=binary_crossentropy,
         activation="selu",
         kernel_initializer="lecun_normal",
-        kernel_regularizer=l2(0.01),
+        kernel_regularizer=l2(),
         optimizer=SGD,
         batch_size=256,
-        metrics=None,
+        metrics=(),
         random_state=None,
         **kwargs,
     ):

diff --git a/csrank/choicefunction/feta_choice.py b/csrank/choicefunction/feta_choice.py
@@ -31,11 +31,11 @@ def __init__(
         num_subsample=5,
         loss_function=binary_crossentropy,
         batch_normalization=False,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="selu",
         optimizer=SGD,
-        metrics=["binary_accuracy"],
+        metrics=("binary_accuracy",),
         batch_size=256,
         random_state=None,
         **kwargs,
@@ -119,7 +119,7 @@ def _construct_layers(self, **kwargs):
         # Todo: Variable sized input
         # X = Input(shape=(None, n_features))
         if self.batch_normalization:
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 self.hidden_layers_zeroth = [
                     NormalizedDense(
                         self.n_units, name="hidden_zeroth_{}".format(x), *kwargs
@@ -131,7 +131,7 @@ def _construct_layers(self, **kwargs):
                 for x in range(self.n_hidden)
             ]
         else:
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 self.hidden_layers_zeroth = [
                     Dense(self.n_units, name="hidden_zeroth_{}".format(x), **kwargs)
                     for x in range(self.n_hidden)
@@ -144,7 +144,7 @@ def _construct_layers(self, **kwargs):
         self.output_node = Dense(
             1, activation="linear", kernel_regularizer=self.kernel_regularizer
         )
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             self.output_node_zeroth = Dense(
                 1, activation="linear", kernel_regularizer=self.kernel_regularizer
             )
@@ -169,7 +169,7 @@ def construct_model(self):
         def create_input_lambda(i):
             return Lambda(lambda x: x[:, i])
 
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             self.logger.debug("Create 0th order model")
             zeroth_order_outputs = []
             inputs = []
@@ -184,7 +184,7 @@ def create_input_lambda(i):
         self.logger.debug("Create 1st order model")
         outputs = [list() for _ in range(self.n_objects_fit_)]
         for i, j in combinations(range(self.n_objects_fit_), 2):
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 x1 = inputs[i]
                 x2 = inputs[j]
             else:
@@ -214,13 +214,15 @@ def create_input_lambda(i):
         ]
         scores = concatenate(scores)
         self.logger.debug("1st order model finished")
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             scores = add([scores, zeroth_order_scores])
         scores = Activation("sigmoid")(scores)
         model = Model(inputs=self.input_layer, outputs=scores)
         self.logger.debug("Compiling complete model...")
         model.compile(
-            loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
+            loss=self.loss_function,
+            optimizer=self.optimizer_,
+            metrics=list(self.metrics),
         )
         return model
 

diff --git a/csrank/choicefunction/ranknet_choice.py b/csrank/choicefunction/ranknet_choice.py
@@ -16,11 +16,11 @@ def __init__(
         n_units=8,
         loss_function="binary_crossentropy",
         batch_normalization=True,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="relu",
         optimizer=SGD,
-        metrics=["binary_accuracy"],
+        metrics=("binary_accuracy",),
         batch_size=256,
         random_state=None,
         **kwargs,

diff --git a/csrank/core/cmpnet_core.py b/csrank/core/cmpnet_core.py
@@ -25,11 +25,11 @@ def __init__(
         n_units=8,
         loss_function="binary_crossentropy",
         batch_normalization=True,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="relu",
         optimizer=SGD,
-        metrics=["binary_accuracy"],
+        metrics=("binary_accuracy",),
         batch_size=256,
         random_state=None,
         **kwargs,
@@ -109,7 +109,9 @@ def construct_model(self):
         merged_output = concatenate([N_g, N_l])
         model = Model(inputs=[self.x1, self.x2], outputs=merged_output)
         model.compile(
-            loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
+            loss=self.loss_function,
+            optimizer=self.optimizer_,
+            metrics=list(self.metrics),
         )
         return model
 

diff --git a/csrank/core/fate_network.py b/csrank/core/fate_network.py
@@ -27,7 +27,7 @@ def __init__(
         n_hidden_joint_units=32,
         activation="selu",
         kernel_initializer="lecun_normal",
-        kernel_regularizer=l2(0.01),
+        kernel_regularizer=l2(),
         optimizer=SGD,
         batch_size=256,
         random_state=None,
@@ -475,7 +475,9 @@ def construct_model(self, n_features, n_objects):
         model = Model(inputs=input_layer, outputs=scores)
 
         model.compile(
-            loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
+            loss=self.loss_function,
+            optimizer=self.optimizer_,
+            metrics=list(self.metrics),
         )
         return model
 

diff --git a/csrank/core/feta_linear.py b/csrank/core/feta_linear.py
@@ -13,6 +13,21 @@
 
 
 class FETALinearCore(Learner):
+    """Core Learner implementing the First Evaluate then Aggregate approach.
+
+    This implements a linear variant of the FETA approach introduced in
+    [PfGuH18]. The idea is to first evaluate each object in each sub-context of
+    fixed size with a linear function approximator and then to aggregate these
+    evaluations.
+
+    References
+    ----------
+
+    .. [PfGuH18] Pfannschmidt, K., Gupta, P., & Hüllermeier, E. (2018). Deep
+       architectures for learning context-dependent ranking functions. arXiv
+       preprint arXiv:1803.05796. https://arxiv.org/pdf/1803.05796.pdf
+    """
+
     def __init__(
         self,
         learning_rate=1e-3,
@@ -23,6 +38,25 @@ def __init__(
         random_state=None,
         **kwargs,
     ):
+        """
+        Parameters
+        ----------
+        learning_rate : float
+            The learning rate used by the gradient descent optimizer.
+        batch_size : int
+            The size of the mini-batches used to train the Neural Network.
+        loss_function
+            The loss function to minimize when training the Neural Network. See
+            the functions offered in the keras.losses module for more details.
+        epochs_drop: int
+            The amount of training epochs after which the learning rate is
+            decreased by a factor of `drop`.
+        drop: float
+            The factor by which to decrease the learning rate every
+            `epochs_drop` epochs.
+        random_state: np.RandomState
+            The random state to use in this object.
+        """
         self.learning_rate = learning_rate
         self.batch_size = batch_size
         self.random_state = random_state
@@ -90,6 +124,18 @@ def _construct_model_(self, n_objects):
         )
 
     def step_decay(self, epoch):
+        """Update the current learning rate.
+
+        Computes the current learning rate based on the initial learning rate,
+        the current epoch and the decay speed set by the `epochs_drop` and
+        `drop` hyperparameters.
+
+        Parameters
+        ----------
+
+        epoch: int
+            The current epoch.
+        """
         step = math.floor((1 + epoch) / self.epochs_drop)
         self.current_lr = self.learning_rate * math.pow(self.drop, step)
         self.optimizer = tf.train.GradientDescentOptimizer(self.current_lr).minimize(
@@ -99,6 +145,22 @@ def step_decay(self, epoch):
     def fit(
         self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd
     ):
+        """
+        Fit the preference learning algorithm on the provided set of queries X
+        and preferences Y of those objects. The provided queries and
+        corresponding preferences are of a fixed size (numpy arrays).
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_objects, n_features)
+            Feature vectors of the objects
+        Y : array-like, shape (n_samples, n_objects)
+            Preferences of the objects in form of rankings or choices
+        epochs: int
+            The amount of epochs to train for. The training loop will try to
+            predict the target variables and adjust its parameters by gradient
+            descent `epochs` times.
+        """
         self.random_state_ = check_random_state(self.random_state)
         # Global Variables Initializer
         n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
@@ -146,6 +208,18 @@ def _fit_(self, X, Y, epochs, n_instances, tf_session, verbose):
             self.logger.info("Epoch {}: cost {} ".format((epoch + 1), np.mean(c)))
 
     def _predict_scores_fixed(self, X, **kwargs):
+        """Predict the scores for a given collection of sets of objects of same size.
+
+           Parameters
+           ----------
+           X : array-like, shape (n_samples, n_objects, n_features)
+
+
+           Returns
+           -------
+           Y : array-like, shape (n_samples, n_objects)
+               Returns the scores of each of the objects for each of the samples.
+        """
         n_instances, n_objects, n_features = X.shape
         assert n_features == self.n_object_features_fit_
         outputs = [list() for _ in range(n_objects)]
@@ -168,7 +242,10 @@ def set_tunable_parameters(
         self, learning_rate=1e-3, batch_size=128, epochs_drop=300, drop=0.1, **point
     ):
         """
-            Set tunable parameters of the FETA-network to the values provided.
+            Set tunable hyperparameters of the FETA-network to the values provided.
+
+            This can be used for automatic hyperparameter optimization. See
+            csrank.tuning for more information.
 
             Parameters
             ----------

diff --git a/csrank/core/feta_network.py b/csrank/core/feta_network.py
@@ -32,11 +32,11 @@ def __init__(
         num_subsample=5,
         loss_function=hinged_rank_loss,
         batch_normalization=False,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="selu",
         optimizer=SGD,
-        metrics=None,
+        metrics=(),
         batch_size=256,
         random_state=None,
         **kwargs,
@@ -54,7 +54,7 @@ def __init__(
         self.batch_size = batch_size
         self.hash_file = None
         self.optimizer = optimizer
-        self._use_zeroth_model = add_zeroth_order_model
+        self.add_zeroth_order_model = add_zeroth_order_model
         self.n_hidden = n_hidden
         self.n_units = n_units
         keys = list(kwargs.keys())
@@ -80,7 +80,7 @@ def _construct_layers(self, **kwargs):
         # X = Input(shape=(None, n_features))
         self.logger.info("n_hidden {}, n_units {}".format(self.n_hidden, self.n_units))
         if self.batch_normalization:
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 self.hidden_layers_zeroth = [
                     NormalizedDense(
                         self.n_units, name="hidden_zeroth_{}".format(x), **kwargs
@@ -92,7 +92,7 @@ def _construct_layers(self, **kwargs):
                 for x in range(self.n_hidden)
             ]
         else:
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 self.hidden_layers_zeroth = [
                     Dense(self.n_units, name="hidden_zeroth_{}".format(x), **kwargs)
                     for x in range(self.n_hidden)
@@ -105,14 +105,14 @@ def _construct_layers(self, **kwargs):
         self.output_node = Dense(
             1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer
         )
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             self.output_node_zeroth = Dense(
                 1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer
             )
 
     @property
     def zero_order_model(self):
-        if self._zero_order_model is None and self._use_zeroth_model:
+        if self._zero_order_model is None and self.add_zeroth_order_model:
             self.logger.info("Creating zeroth model")
             inp = Input(shape=(self.n_object_features_fit_,))
 
@@ -153,7 +153,7 @@ def pairwise_model(self):
     def _predict_pair(self, a, b, only_pairwise=False, **kwargs):
         # TODO: Is this working correctly?
         pairwise = self.pairwise_model.predict([a, b], **kwargs)
-        if not only_pairwise and self._use_zeroth_model:
+        if not only_pairwise and self.add_zeroth_order_model:
             utility_a = self.zero_order_model.predict([a])
             utility_b = self.zero_order_model.predict([b])
             return pairwise + (utility_a, utility_b)
@@ -173,7 +173,7 @@ def _predict_scores_using_pairs(self, X, **kwd):
             scores[n] += result.reshape(n_objects, n_objects - 1).mean(axis=1)
             del result
         del pairs
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             scores_zero = self.zero_order_model.predict(X.reshape(-1, n_features))
             scores_zero = scores_zero.reshape(n_instances, n_objects)
             scores = scores + scores_zero
@@ -199,7 +199,7 @@ def construct_model(self):
         def create_input_lambda(i):
             return Lambda(lambda x: x[:, i])
 
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             self.logger.debug("Create 0th order model")
             zeroth_order_outputs = []
             inputs = []
@@ -214,7 +214,7 @@ def create_input_lambda(i):
         self.logger.debug("Create 1st order model")
         outputs = [list() for _ in range(self.n_objects_fit_)]
         for i, j in combinations(range(self.n_objects_fit_), 2):
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 x1 = inputs[i]
                 x2 = inputs[j]
             else:
@@ -244,12 +244,14 @@ def create_input_lambda(i):
         ]
         scores = concatenate(scores)
         self.logger.debug("1st order model finished")
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             scores = add([scores, zeroth_order_scores])
         model = Model(inputs=self.input_layer, outputs=scores)
         self.logger.debug("Compiling complete model...")
         model.compile(
-            loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
+            loss=self.loss_function,
+            optimizer=self.optimizer_,
+            metrics=list(self.metrics),
         )
         return model