diff --git a/csrank/choicefunction/cmpnet_choice.py b/csrank/choicefunction/cmpnet_choice.py
index 507527e3..a687102a 100644
--- a/csrank/choicefunction/cmpnet_choice.py
+++ b/csrank/choicefunction/cmpnet_choice.py
@@ -16,11 +16,11 @@ def __init__(
         n_units=8,
         loss_function="binary_crossentropy",
         batch_normalization=True,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="relu",
         optimizer=SGD,
-        metrics=["binary_accuracy"],
+        metrics=("binary_accuracy",),
         batch_size=256,
         random_state=None,
         **kwargs,
diff --git a/csrank/choicefunction/fate_choice.py b/csrank/choicefunction/fate_choice.py
index 3f06dcb0..a0569b95 100644
--- a/csrank/choicefunction/fate_choice.py
+++ b/csrank/choicefunction/fate_choice.py
@@ -20,10 +20,10 @@ def __init__(
         loss_function=binary_crossentropy,
         activation="selu",
         kernel_initializer="lecun_normal",
-        kernel_regularizer=l2(0.01),
+        kernel_regularizer=l2(),
         optimizer=SGD,
         batch_size=256,
-        metrics=None,
+        metrics=(),
         random_state=None,
         **kwargs,
     ):
diff --git a/csrank/choicefunction/feta_choice.py b/csrank/choicefunction/feta_choice.py
index ae44171b..daf2ae3c 100644
--- a/csrank/choicefunction/feta_choice.py
+++ b/csrank/choicefunction/feta_choice.py
@@ -31,11 +31,11 @@ def __init__(
         num_subsample=5,
         loss_function=binary_crossentropy,
         batch_normalization=False,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="selu",
         optimizer=SGD,
-        metrics=["binary_accuracy"],
+        metrics=("binary_accuracy",),
         batch_size=256,
         random_state=None,
         **kwargs,
@@ -119,7 +119,7 @@ def _construct_layers(self, **kwargs):
         # Todo: Variable sized input
         # X = Input(shape=(None, n_features))
         if self.batch_normalization:
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 self.hidden_layers_zeroth = [
                     NormalizedDense(
                         self.n_units, name="hidden_zeroth_{}".format(x), *kwargs
@@ -131,7 +131,7 @@ def _construct_layers(self, **kwargs):
                 for x in range(self.n_hidden)
             ]
         else:
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 self.hidden_layers_zeroth = [
                     Dense(self.n_units, name="hidden_zeroth_{}".format(x), **kwargs)
                     for x in range(self.n_hidden)
@@ -144,7 +144,7 @@ def _construct_layers(self, **kwargs):
         self.output_node = Dense(
             1, activation="linear", kernel_regularizer=self.kernel_regularizer
         )
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             self.output_node_zeroth = Dense(
                 1, activation="linear", kernel_regularizer=self.kernel_regularizer
             )
@@ -169,7 +169,7 @@ def construct_model(self):
         def create_input_lambda(i):
             return Lambda(lambda x: x[:, i])
 
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             self.logger.debug("Create 0th order model")
             zeroth_order_outputs = []
             inputs = []
@@ -184,7 +184,7 @@ def create_input_lambda(i):
         self.logger.debug("Create 1st order model")
         outputs = [list() for _ in range(self.n_objects_fit_)]
         for i, j in combinations(range(self.n_objects_fit_), 2):
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 x1 = inputs[i]
                 x2 = inputs[j]
             else:
@@ -214,13 +214,15 @@ def create_input_lambda(i):
         ]
         scores = concatenate(scores)
         self.logger.debug("1st order model finished")
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             scores = add([scores, zeroth_order_scores])
         scores = Activation("sigmoid")(scores)
         model = Model(inputs=self.input_layer, outputs=scores)
         self.logger.debug("Compiling complete model...")
         model.compile(
-            loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
+            loss=self.loss_function,
+            optimizer=self.optimizer_,
+            metrics=list(self.metrics),
         )
         return model
 
diff --git a/csrank/choicefunction/ranknet_choice.py b/csrank/choicefunction/ranknet_choice.py
index f55964bb..7731c9f3 100644
--- a/csrank/choicefunction/ranknet_choice.py
+++ b/csrank/choicefunction/ranknet_choice.py
@@ -16,11 +16,11 @@ def __init__(
         n_units=8,
         loss_function="binary_crossentropy",
         batch_normalization=True,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="relu",
         optimizer=SGD,
-        metrics=["binary_accuracy"],
+        metrics=("binary_accuracy",),
         batch_size=256,
         random_state=None,
         **kwargs,
diff --git a/csrank/core/cmpnet_core.py b/csrank/core/cmpnet_core.py
index 47a0ad5d..f54ce7fb 100644
--- a/csrank/core/cmpnet_core.py
+++ b/csrank/core/cmpnet_core.py
@@ -25,11 +25,11 @@ def __init__(
         n_units=8,
         loss_function="binary_crossentropy",
         batch_normalization=True,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="relu",
         optimizer=SGD,
-        metrics=["binary_accuracy"],
+        metrics=("binary_accuracy",),
         batch_size=256,
         random_state=None,
         **kwargs,
@@ -109,7 +109,9 @@ def construct_model(self):
         merged_output = concatenate([N_g, N_l])
         model = Model(inputs=[self.x1, self.x2], outputs=merged_output)
         model.compile(
-            loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
+            loss=self.loss_function,
+            optimizer=self.optimizer_,
+            metrics=list(self.metrics),
         )
         return model
 
diff --git a/csrank/core/fate_network.py b/csrank/core/fate_network.py
index 75ec200e..301b74b4 100644
--- a/csrank/core/fate_network.py
+++ b/csrank/core/fate_network.py
@@ -27,7 +27,7 @@ def __init__(
         n_hidden_joint_units=32,
         activation="selu",
         kernel_initializer="lecun_normal",
-        kernel_regularizer=l2(0.01),
+        kernel_regularizer=l2(),
         optimizer=SGD,
         batch_size=256,
         random_state=None,
@@ -475,7 +475,9 @@ def construct_model(self, n_features, n_objects):
         model = Model(inputs=input_layer, outputs=scores)
 
         model.compile(
-            loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
+            loss=self.loss_function,
+            optimizer=self.optimizer_,
+            metrics=list(self.metrics),
         )
         return model
 
diff --git a/csrank/core/feta_linear.py b/csrank/core/feta_linear.py
index 4f0c875f..8566cfa6 100644
--- a/csrank/core/feta_linear.py
+++ b/csrank/core/feta_linear.py
@@ -13,6 +13,21 @@
 
 
 class FETALinearCore(Learner):
+    """Core Learner implementing the First Evaluate then Aggregate approach.
+
+    This implements a linear variant of the FETA approach introduced in
+    [PfGuH18]. The idea is to first evaluate each object in each sub-context of
+    fixed size with a linear function approximator and then to aggregate these
+    evaluations.
+
+    References
+    ----------
+
+    .. [PfGuH18] Pfannschmidt, K., Gupta, P., & Hüllermeier, E. (2018). Deep
+       architectures for learning context-dependent ranking functions. arXiv
+       preprint arXiv:1803.05796. https://arxiv.org/pdf/1803.05796.pdf
+    """
+
     def __init__(
         self,
         learning_rate=1e-3,
@@ -23,6 +38,25 @@ def __init__(
         random_state=None,
         **kwargs,
     ):
+        """
+        Parameters
+        ----------
+        learning_rate : float
+            The learning rate used by the gradient descent optimizer.
+        batch_size : int
+            The size of the mini-batches used to train the Neural Network.
+        loss_function
+            The loss function to minimize when training the Neural Network. See
+            the functions offered in the keras.losses module for more details.
+        epochs_drop: int
+            The amount of training epochs after which the learning rate is
+            decreased by a factor of `drop`.
+        drop: float
+            The factor by which to decrease the learning rate every
+            `epochs_drop` epochs.
+        random_state: np.RandomState
+            The random state to use in this object.
+        """
         self.learning_rate = learning_rate
         self.batch_size = batch_size
         self.random_state = random_state
@@ -90,6 +124,18 @@ def _construct_model_(self, n_objects):
         )
 
     def step_decay(self, epoch):
+        """Update the current learning rate.
+
+        Computes the current learning rate based on the initial learning rate,
+        the current epoch and the decay speed set by the `epochs_drop` and
+        `drop` hyperparameters.
+
+        Parameters
+        ----------
+
+        epoch: int
+            The current epoch.
+        """
         step = math.floor((1 + epoch) / self.epochs_drop)
         self.current_lr = self.learning_rate * math.pow(self.drop, step)
         self.optimizer = tf.train.GradientDescentOptimizer(self.current_lr).minimize(
@@ -99,6 +145,22 @@ def step_decay(self, epoch):
     def fit(
         self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd
     ):
+        """
+        Fit the preference learning algorithm on the provided set of queries X
+        and preferences Y of those objects. The provided queries and
+        corresponding preferences are of a fixed size (numpy arrays).
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_objects, n_features)
+            Feature vectors of the objects
+        Y : array-like, shape (n_samples, n_objects)
+            Preferences of the objects in form of rankings or choices
+        epochs: int
+            The amount of epochs to train for. The training loop will try to
+            predict the target variables and adjust its parameters by gradient
+            descent `epochs` times.
+        """
         self.random_state_ = check_random_state(self.random_state)
         # Global Variables Initializer
         n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
@@ -146,6 +208,18 @@ def _fit_(self, X, Y, epochs, n_instances, tf_session, verbose):
             self.logger.info("Epoch {}: cost {} ".format((epoch + 1), np.mean(c)))
 
     def _predict_scores_fixed(self, X, **kwargs):
+        """Predict the scores for a given collection of sets of objects of same size.
+
+           Parameters
+           ----------
+           X : array-like, shape (n_samples, n_objects, n_features)
+
+
+           Returns
+           -------
+           Y : array-like, shape (n_samples, n_objects)
+               Returns the scores of each of the objects for each of the samples.
+        """
         n_instances, n_objects, n_features = X.shape
         assert n_features == self.n_object_features_fit_
         outputs = [list() for _ in range(n_objects)]
@@ -168,7 +242,10 @@ def set_tunable_parameters(
         self, learning_rate=1e-3, batch_size=128, epochs_drop=300, drop=0.1, **point
     ):
         """
-            Set tunable parameters of the FETA-network to the values provided.
+            Set tunable hyperparameters of the FETA-network to the values provided.
+
+            This can be used for automatic hyperparameter optimization. See
+            csrank.tuning for more information.
 
             Parameters
             ----------
diff --git a/csrank/core/feta_network.py b/csrank/core/feta_network.py
index 8f6c20ec..d9119528 100644
--- a/csrank/core/feta_network.py
+++ b/csrank/core/feta_network.py
@@ -32,11 +32,11 @@ def __init__(
         num_subsample=5,
         loss_function=hinged_rank_loss,
         batch_normalization=False,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="selu",
         optimizer=SGD,
-        metrics=None,
+        metrics=(),
         batch_size=256,
         random_state=None,
         **kwargs,
@@ -54,7 +54,7 @@ def __init__(
         self.batch_size = batch_size
         self.hash_file = None
         self.optimizer = optimizer
-        self._use_zeroth_model = add_zeroth_order_model
+        self.add_zeroth_order_model = add_zeroth_order_model
         self.n_hidden = n_hidden
         self.n_units = n_units
         keys = list(kwargs.keys())
@@ -80,7 +80,7 @@ def _construct_layers(self, **kwargs):
         # X = Input(shape=(None, n_features))
         self.logger.info("n_hidden {}, n_units {}".format(self.n_hidden, self.n_units))
         if self.batch_normalization:
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 self.hidden_layers_zeroth = [
                     NormalizedDense(
                         self.n_units, name="hidden_zeroth_{}".format(x), **kwargs
@@ -92,7 +92,7 @@ def _construct_layers(self, **kwargs):
                 for x in range(self.n_hidden)
             ]
         else:
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 self.hidden_layers_zeroth = [
                     Dense(self.n_units, name="hidden_zeroth_{}".format(x), **kwargs)
                     for x in range(self.n_hidden)
@@ -105,14 +105,14 @@ def _construct_layers(self, **kwargs):
         self.output_node = Dense(
             1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer
         )
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             self.output_node_zeroth = Dense(
                 1, activation="sigmoid", kernel_regularizer=self.kernel_regularizer
             )
 
     @property
     def zero_order_model(self):
-        if self._zero_order_model is None and self._use_zeroth_model:
+        if self._zero_order_model is None and self.add_zeroth_order_model:
             self.logger.info("Creating zeroth model")
             inp = Input(shape=(self.n_object_features_fit_,))
 
@@ -153,7 +153,7 @@ def pairwise_model(self):
     def _predict_pair(self, a, b, only_pairwise=False, **kwargs):
         # TODO: Is this working correctly?
         pairwise = self.pairwise_model.predict([a, b], **kwargs)
-        if not only_pairwise and self._use_zeroth_model:
+        if not only_pairwise and self.add_zeroth_order_model:
             utility_a = self.zero_order_model.predict([a])
             utility_b = self.zero_order_model.predict([b])
             return pairwise + (utility_a, utility_b)
@@ -173,7 +173,7 @@ def _predict_scores_using_pairs(self, X, **kwd):
             scores[n] += result.reshape(n_objects, n_objects - 1).mean(axis=1)
             del result
         del pairs
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             scores_zero = self.zero_order_model.predict(X.reshape(-1, n_features))
             scores_zero = scores_zero.reshape(n_instances, n_objects)
             scores = scores + scores_zero
@@ -199,7 +199,7 @@ def construct_model(self):
         def create_input_lambda(i):
             return Lambda(lambda x: x[:, i])
 
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             self.logger.debug("Create 0th order model")
             zeroth_order_outputs = []
             inputs = []
@@ -214,7 +214,7 @@ def create_input_lambda(i):
         self.logger.debug("Create 1st order model")
         outputs = [list() for _ in range(self.n_objects_fit_)]
         for i, j in combinations(range(self.n_objects_fit_), 2):
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 x1 = inputs[i]
                 x2 = inputs[j]
             else:
@@ -244,12 +244,14 @@ def create_input_lambda(i):
         ]
         scores = concatenate(scores)
         self.logger.debug("1st order model finished")
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             scores = add([scores, zeroth_order_scores])
         model = Model(inputs=self.input_layer, outputs=scores)
         self.logger.debug("Compiling complete model...")
         model.compile(
-            loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
+            loss=self.loss_function,
+            optimizer=self.optimizer_,
+            metrics=list(self.metrics),
         )
         return model
 
diff --git a/csrank/core/ranknet_core.py b/csrank/core/ranknet_core.py
index b73e9b43..d3819119 100644
--- a/csrank/core/ranknet_core.py
+++ b/csrank/core/ranknet_core.py
@@ -24,11 +24,11 @@ def __init__(
         n_units=8,
         loss_function="binary_crossentropy",
         batch_normalization=True,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="relu",
         optimizer=SGD,
-        metrics=["binary_accuracy"],
+        metrics=("binary_accuracy",),
         batch_size=256,
         random_state=None,
         **kwargs,
@@ -99,7 +99,9 @@ def construct_model(self):
         output = self.output_node(merged_inputs)
         model = Model(inputs=[self.x1, self.x2], outputs=output)
         model.compile(
-            loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
+            loss=self.loss_function,
+            optimizer=self.optimizer_,
+            metrics=list(self.metrics),
         )
         return model
 
diff --git a/csrank/discretechoice/cmpnet_discrete_choice.py b/csrank/discretechoice/cmpnet_discrete_choice.py
index 5e8315d0..64b6e32c 100644
--- a/csrank/discretechoice/cmpnet_discrete_choice.py
+++ b/csrank/discretechoice/cmpnet_discrete_choice.py
@@ -15,11 +15,11 @@ def __init__(
         n_units=8,
         loss_function="binary_crossentropy",
         batch_normalization=True,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="relu",
         optimizer=SGD,
-        metrics=["binary_accuracy"],
+        metrics=("binary_accuracy",),
         batch_size=256,
         random_state=None,
         **kwargs,
diff --git a/csrank/discretechoice/fate_discrete_choice.py b/csrank/discretechoice/fate_discrete_choice.py
index 0da4e5f2..e29ca3dd 100644
--- a/csrank/discretechoice/fate_discrete_choice.py
+++ b/csrank/discretechoice/fate_discrete_choice.py
@@ -14,12 +14,12 @@ def __init__(
         n_hidden_set_layers=2,
         n_hidden_set_units=2,
         loss_function="categorical_hinge",
-        metrics=["categorical_accuracy"],
+        metrics=("categorical_accuracy",),
         n_hidden_joint_layers=32,
         n_hidden_joint_units=32,
         activation="selu",
         kernel_initializer="lecun_normal",
-        kernel_regularizer=l2(0.01),
+        kernel_regularizer=l2(),
         optimizer=SGD,
         batch_size=256,
         random_state=None,
diff --git a/csrank/discretechoice/feta_discrete_choice.py b/csrank/discretechoice/feta_discrete_choice.py
index 2705dada..3145ecf8 100644
--- a/csrank/discretechoice/feta_discrete_choice.py
+++ b/csrank/discretechoice/feta_discrete_choice.py
@@ -29,11 +29,11 @@ def __init__(
         num_subsample=5,
         loss_function="categorical_hinge",
         batch_normalization=False,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="selu",
         optimizer=SGD,
-        metrics=["categorical_accuracy"],
+        metrics=("categorical_accuracy",),
         batch_size=256,
         random_state=None,
         **kwargs,
@@ -116,7 +116,7 @@ def _construct_layers(self, **kwargs):
         # Todo: Variable sized input
         # X = Input(shape=(None, n_features))
         if self.batch_normalization:
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 self.hidden_layers_zeroth = [
                     NormalizedDense(
                         self.n_units, name="hidden_zeroth_{}".format(x), *kwargs
@@ -128,7 +128,7 @@ def _construct_layers(self, **kwargs):
                 for x in range(self.n_hidden)
             ]
         else:
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 self.hidden_layers_zeroth = [
                     Dense(self.n_units, name="hidden_zeroth_{}".format(x), **kwargs)
                     for x in range(self.n_hidden)
@@ -144,7 +144,7 @@ def _construct_layers(self, **kwargs):
             kernel_regularizer=self.kernel_regularizer,
             name="score",
         )
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             self.output_node_zeroth = Dense(
                 1,
                 activation="linear",
@@ -178,7 +178,7 @@ def construct_model(self):
         def create_input_lambda(i):
             return Lambda(lambda x: x[:, i])
 
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             self.logger.debug("Create 0th order model")
             zeroth_order_outputs = []
             inputs = []
@@ -193,7 +193,7 @@ def create_input_lambda(i):
         self.logger.debug("Create 1st order model")
         outputs = [list() for _ in range(self.n_objects_fit_)]
         for i, j in combinations(range(self.n_objects_fit_), 2):
-            if self._use_zeroth_model:
+            if self.add_zeroth_order_model:
                 x1 = inputs[i]
                 x2 = inputs[j]
             else:
@@ -223,7 +223,7 @@ def create_input_lambda(i):
         ]
         scores = concatenate(scores)
         self.logger.debug("1st order model finished")
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
 
             def get_score_object(i):
                 return Lambda(lambda x: x[:, i, None])
@@ -242,9 +242,9 @@ def get_score_object(i):
                 scores.append(self.weighted_sum(concat_scores[i]))
             scores = concatenate(scores)
 
-        # if self._use_zeroth_model:
+        # if self.add_zeroth_order_model:
         #     scores = add([scores, zeroth_order_scores])
-        # if self._use_zeroth_model:
+        # if self.add_zeroth_order_model:
         #     def expand_dims():
         #         return Lambda(lambda x: x[..., None])
         #
@@ -259,12 +259,14 @@ def get_score_object(i):
         #                          kernel_regularizer=self.kernel_regularizer, use_bias=False)
         #     scores = weighted_sum(concat_scores)
         #     scores = squeeze_dims()(scores)
-        if not self._use_zeroth_model:
+        if not self.add_zeroth_order_model:
             scores = Activation("sigmoid")(scores)
         model = Model(inputs=self.input_layer, outputs=scores)
         self.logger.debug("Compiling complete model...")
         model.compile(
-            loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
+            loss=self.loss_function,
+            optimizer=self.optimizer_,
+            metrics=list(self.metrics),
         )
         return model
 
@@ -299,7 +301,7 @@ def _predict_scores_using_pairs(self, X, **kwd):
             scores[n] += result.reshape(n_objects, n_objects - 1).mean(axis=1)
             del result
         del pairs
-        if self._use_zeroth_model:
+        if self.add_zeroth_order_model:
             scores_zero = self.zero_order_model.predict(X.reshape(-1, n_features))
             scores_zero = scores_zero.reshape(n_instances, n_objects)
             model = self._create_weighted_model(n_objects)
diff --git a/csrank/discretechoice/generalized_nested_logit.py b/csrank/discretechoice/generalized_nested_logit.py
index 3df68d86..083d9e12 100644
--- a/csrank/discretechoice/generalized_nested_logit.py
+++ b/csrank/discretechoice/generalized_nested_logit.py
@@ -92,7 +92,7 @@ def __init__(
 
         self.n_nests = n_nests
         self.alpha = alpha
-        self.loss_function = likelihood_dict.get(loss_function, None)
+        self.loss_function = loss_function
 
         self.random_state = random_state
         known_regularization_functions = {"l1", "l2"}
@@ -257,6 +257,7 @@ def construct_model(self, X, Y):
              model : pymc3 Model :class:`pm.Model`
         """
         self.random_state_ = check_random_state(self.random_state)
+        self.loss_function_ = likelihood_dict.get(self.loss_function, None)
         if np.prod(X.shape) > self.threshold:
             upper_bound = int(self.threshold / np.prod(X.shape[1:]))
             indices = self.random_state_.choice(X.shape[0], upper_bound, replace=False)
@@ -280,7 +281,7 @@ def construct_model(self, X, Y):
             lambda_k = pm.Uniform("lambda_k", self.alpha, 1.0, shape=self.n_nests)
             self.p = self.get_probabilities(utility, lambda_k, alpha_ik)
             LogLikelihood(
-                "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt
+                "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt
             )
         self.logger.info("Model construction completed")
 
@@ -403,7 +404,7 @@ def set_tunable_parameters(
                 raise ValueError(
                     f"Loss function {loss_function} is unknown. Must be one of {set(likelihood_dict.keys())}"
                 )
-            self.loss_function = likelihood_dict.get(loss_function, None)
+            self.loss_function = loss_function
         self.regularization = regularization
         self.model = None
         self.trace = None
diff --git a/csrank/discretechoice/mixed_logit_model.py b/csrank/discretechoice/mixed_logit_model.py
index 21ce0b80..86efc9f0 100644
--- a/csrank/discretechoice/mixed_logit_model.py
+++ b/csrank/discretechoice/mixed_logit_model.py
@@ -72,7 +72,7 @@ def __init__(self, n_mixtures=4, loss_function="", regularization="l2", **kwargs
                 [3] Daniel McFadden and Kenneth Train. „Mixed MNL models for discrete response“. In: Journal of applied Econometrics 15.5 (2000), pp. 447–470
         """
         self.logger = logging.getLogger(MixedLogitModel.__name__)
-        self.loss_function = likelihood_dict.get(loss_function, None)
+        self.loss_function = loss_function
         known_regularization_functions = {"l1", "l2"}
         if regularization not in known_regularization_functions:
             raise ValueError(
@@ -155,6 +155,7 @@ def construct_model(self, X, Y):
             -------
              model : pymc3 Model :class:`pm.Model`
         """
+        self.loss_function_ = likelihood_dict.get(self.loss_function, None)
         with pm.Model() as self.model:
             self.Xt = theano.shared(X)
             self.Yt = theano.shared(Y)
@@ -163,7 +164,7 @@ def construct_model(self, X, Y):
             utility = tt.dot(self.Xt, weights_dict["weights"])
             self.p = tt.mean(ttu.softmax(utility, axis=1), axis=2)
             LogLikelihood(
-                "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt
+                "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt
             )
         self.logger.info("Model construction completed")
 
@@ -257,7 +258,7 @@ def set_tunable_parameters(
                 Dictionary containing parameter values which are not tuned for the network
         """
         if loss_function in likelihood_dict:
-            self.loss_function = likelihood_dict.get(loss_function, None)
+            self.loss_function = loss_function
         self.n_mixtures = n_mixtures
         self.regularization = regularization
         self.model = None
diff --git a/csrank/discretechoice/multinomial_logit_model.py b/csrank/discretechoice/multinomial_logit_model.py
index d6fe4f5f..fb52f095 100644
--- a/csrank/discretechoice/multinomial_logit_model.py
+++ b/csrank/discretechoice/multinomial_logit_model.py
@@ -64,7 +64,7 @@ def __init__(self, loss_function="", regularization="l2", **kwargs):
                 [2] Kenneth Train. Qualitative choice analysis. Cambridge, MA: MIT Press, 1986
         """
         self.logger = logging.getLogger(MultinomialLogitModel.__name__)
-        self.loss_function = likelihood_dict.get(loss_function, None)
+        self.loss_function = loss_function
         known_regularization_functions = {"l1", "l2"}
         if regularization not in known_regularization_functions:
             raise ValueError(
@@ -151,6 +151,7 @@ def construct_model(self, X, Y):
                 print_dictionary(self.model_configuration)
             )
         )
+        self.loss_function_ = likelihood_dict.get(self.loss_function, None)
         with pm.Model() as self.model:
             self.Xt = theano.shared(X)
             self.Yt = theano.shared(Y)
@@ -162,7 +163,7 @@ def construct_model(self, X, Y):
             self.p = ttu.softmax(utility, axis=1)
 
             LogLikelihood(
-                "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt
+                "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt
             )
         self.logger.info("Model construction completed")
 
@@ -257,7 +258,7 @@ def set_tunable_parameters(self, loss_function=None, regularization="l1", **poin
                 raise ValueError(
                     f"Loss function {loss_function} is unknown. Must be one of {set(likelihood_dict.keys())}"
                 )
-            self.loss_function = likelihood_dict.get(loss_function, None)
+            self.loss_function = loss_function
         self.regularization = regularization
         self.model = None
         self.trace = None
diff --git a/csrank/discretechoice/nested_logit_model.py b/csrank/discretechoice/nested_logit_model.py
index 328a9ef8..326620dc 100644
--- a/csrank/discretechoice/nested_logit_model.py
+++ b/csrank/discretechoice/nested_logit_model.py
@@ -91,7 +91,7 @@ def __init__(
         self.n_nests = n_nests
         self.alpha = alpha
         self.random_state = random_state
-        self.loss_function = likelihood_dict.get(loss_function, None)
+        self.loss_function = loss_function
         known_regularization_functions = {"l1", "l2"}
         if regularization not in known_regularization_functions:
             raise ValueError(
@@ -313,6 +313,7 @@ def construct_model(self, X, Y):
             -------
              model : pymc3 Model :class:`pm.Model`
         """
+        self.loss_function_ = likelihood_dict.get(self.loss_function, None)
         if np.prod(X.shape) > self.threshold:
             upper_bound = int(self.threshold / np.prod(X.shape[1:]))
             indices = self.random_state_.choice(X.shape[0], upper_bound, replace=False)
@@ -339,7 +340,7 @@ def construct_model(self, X, Y):
             self.p = self.get_probabilities(utility, lambda_k, utility_k)
 
             LogLikelihood(
-                "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt
+                "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt
             )
         self.logger.info("Model construction completed")
 
@@ -464,7 +465,7 @@ def set_tunable_parameters(
                 raise ValueError(
                     f"Loss function {loss_function} is unknown. Must be one of {set(likelihood_dict.keys())}"
                 )
-            self.loss_function = likelihood_dict.get(loss_function, None)
+            self.loss_function = loss_function
         self.cluster_model = None
         self.features_nests = None
         self.model = None
diff --git a/csrank/discretechoice/paired_combinatorial_logit.py b/csrank/discretechoice/paired_combinatorial_logit.py
index 7b19ff90..0cc4d084 100644
--- a/csrank/discretechoice/paired_combinatorial_logit.py
+++ b/csrank/discretechoice/paired_combinatorial_logit.py
@@ -91,7 +91,7 @@ def __init__(
         self.logger = logging.getLogger(PairedCombinatorialLogit.__name__)
         self.alpha = alpha
         self.random_state = random_state
-        self.loss_function = likelihood_dict.get(loss_function, None)
+        self.loss_function = loss_function
         known_regularization_functions = {"l1", "l2"}
         if regularization not in known_regularization_functions:
             raise ValueError(
@@ -260,6 +260,7 @@ def construct_model(self, X, Y):
             -------
              model : pymc3 Model :class:`pm.Model`
         """
+        self.loss_function_ = likelihood_dict.get(self.loss_function, None)
         with pm.Model() as self.model:
             self.Xt = theano.shared(X)
             self.Yt = theano.shared(Y)
@@ -269,7 +270,7 @@ def construct_model(self, X, Y):
             utility = tt.dot(self.Xt, weights_dict["weights"])
             self.p = self.get_probabilities(utility, lambda_k)
             LogLikelihood(
-                "yl", loss_func=self.loss_function, p=self.p, observed=self.Yt
+                "yl", loss_func=self.loss_function_, p=self.p, observed=self.Yt
             )
         self.logger.info("Model construction completed")
 
@@ -380,7 +381,7 @@ def set_tunable_parameters(
                 raise ValueError(
                     f"Loss function {loss_function} is unknown. Must be one of {set(likelihood_dict.keys())}"
                 )
-            self.loss_function = likelihood_dict.get(loss_function, None)
+            self.loss_function = loss_function
         self.regularization = regularization
         self.model = None
         self.trace = None
diff --git a/csrank/discretechoice/ranknet_discrete_choice.py b/csrank/discretechoice/ranknet_discrete_choice.py
index 7fc5bf0a..0df1a3e4 100644
--- a/csrank/discretechoice/ranknet_discrete_choice.py
+++ b/csrank/discretechoice/ranknet_discrete_choice.py
@@ -15,11 +15,11 @@ def __init__(
         n_units=8,
         loss_function="binary_crossentropy",
         batch_normalization=True,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="relu",
         optimizer=SGD,
-        metrics=["binary_accuracy"],
+        metrics=("binary_accuracy",),
         batch_size=256,
         random_state=None,
         **kwargs,
diff --git a/csrank/learner.py b/csrank/learner.py
index 12be59d7..a7fb92b8 100644
--- a/csrank/learner.py
+++ b/csrank/learner.py
@@ -1,6 +1,8 @@
 from abc import ABCMeta
 from abc import abstractmethod
 
+from sklearn.base import BaseEstimator
+
 from csrank.tunable import Tunable
 
 
@@ -13,7 +15,7 @@ def filter_dict_by_prefix(source, prefix):
     return result
 
 
-class Learner(Tunable, metaclass=ABCMeta):
+class Learner(Tunable, BaseEstimator, metaclass=ABCMeta):
     def _initialize_optimizer(self):
         optimizer_params = filter_dict_by_prefix(self.__dict__, "optimizer__")
         optimizer_params.update(filter_dict_by_prefix(self.kwargs, "optimizer__"))
diff --git a/csrank/objectranking/cmp_net.py b/csrank/objectranking/cmp_net.py
index 25d013b0..dc4deb96 100644
--- a/csrank/objectranking/cmp_net.py
+++ b/csrank/objectranking/cmp_net.py
@@ -17,11 +17,11 @@ def __init__(
         n_units=8,
         loss_function="binary_crossentropy",
         batch_normalization=True,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="relu",
         optimizer=SGD,
-        metrics=["binary_accuracy"],
+        metrics=("binary_accuracy",),
         batch_size=256,
         random_state=None,
         **kwargs,
diff --git a/csrank/objectranking/fate_object_ranker.py b/csrank/objectranking/fate_object_ranker.py
index 841e6bff..ca07f737 100644
--- a/csrank/objectranking/fate_object_ranker.py
+++ b/csrank/objectranking/fate_object_ranker.py
@@ -18,11 +18,11 @@ def __init__(
         n_hidden_joint_units=32,
         activation="selu",
         kernel_initializer="lecun_normal",
-        kernel_regularizer=l2(0.01),
+        kernel_regularizer=l2(),
         optimizer=SGD,
         batch_size=256,
         loss_function=hinged_rank_loss,
-        metrics=[zero_one_rank_loss_for_scores_ties],
+        metrics=(zero_one_rank_loss_for_scores_ties,),
         random_state=None,
         **kwargs,
     ):
diff --git a/csrank/objectranking/feta_object_ranker.py b/csrank/objectranking/feta_object_ranker.py
index 02ee8bbb..e341ff16 100644
--- a/csrank/objectranking/feta_object_ranker.py
+++ b/csrank/objectranking/feta_object_ranker.py
@@ -20,11 +20,11 @@ def __init__(
         num_subsample=5,
         loss_function=hinged_rank_loss,
         batch_normalization=False,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="selu",
         optimizer=SGD,
-        metrics=None,
+        metrics=(),
         batch_size=256,
         random_state=None,
         **kwargs,
diff --git a/csrank/objectranking/list_net.py b/csrank/objectranking/list_net.py
index 74c136ab..896fd888 100644
--- a/csrank/objectranking/list_net.py
+++ b/csrank/objectranking/list_net.py
@@ -30,11 +30,11 @@ def __init__(
         n_units=8,
         loss_function=plackett_luce_loss,
         batch_normalization=False,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         activation="selu",
         kernel_initializer="lecun_normal",
         optimizer=SGD,
-        metrics=[zero_one_rank_loss_for_scores_ties],
+        metrics=(zero_one_rank_loss_for_scores_ties,),
         batch_size=256,
         random_state=None,
         **kwargs,
@@ -214,7 +214,9 @@ def construct_model(self):
         merged = concatenate(outputs)
         model = Model(inputs=self.input_layer, outputs=merged)
         model.compile(
-            loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
+            loss=self.loss_function,
+            optimizer=self.optimizer_,
+            metrics=list(self.metrics),
         )
         return model
 
diff --git a/csrank/objectranking/rank_net.py b/csrank/objectranking/rank_net.py
index dc98c4ac..fba9aee6 100644
--- a/csrank/objectranking/rank_net.py
+++ b/csrank/objectranking/rank_net.py
@@ -17,11 +17,11 @@ def __init__(
         n_units=8,
         loss_function="binary_crossentropy",
         batch_normalization=True,
-        kernel_regularizer=l2(1e-4),
+        kernel_regularizer=l2(),
         kernel_initializer="lecun_normal",
         activation="relu",
         optimizer=SGD,
-        metrics=["binary_accuracy"],
+        metrics=("binary_accuracy",),
         batch_size=256,
         random_state=None,
         **kwargs,