Skip to content

Commit

Permalink
Merge pull request #117 from timokau/delay-random-state-validation
Browse files Browse the repository at this point in the history
Delay random state validation
  • Loading branch information
timokau authored May 14, 2020
2 parents 2019bff + 359cd2d commit b8f8288
Show file tree
Hide file tree
Showing 17 changed files with 60 additions and 45 deletions.
5 changes: 3 additions & 2 deletions csrank/choicefunction/generalized_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def __init__(
self.regularization = regularization
else:
self.regularization = "l2"
self.random_state = check_random_state(random_state)
self.random_state = random_state
self.model = None
self.trace = None
self.trace_vi = None
Expand Down Expand Up @@ -224,9 +224,10 @@ def fit(
**kwargs :
Keyword arguments for the fit function
"""
self.random_state_ = check_random_state(self.random_state)
if tune_size > 0:
X_train, X_val, Y_train, Y_val = train_test_split(
X, Y, test_size=tune_size, random_state=self.random_state
X, Y, test_size=tune_size, random_state=self.random_state_
)
try:
self._fit(
Expand Down
3 changes: 2 additions & 1 deletion csrank/core/cmpnet_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(
del kwargs[key]
self.kwargs = kwargs
self.threshold_instances = int(1e10)
self.random_state = check_random_state(random_state)
self.random_state = random_state
self.model = None
self._construct_layers(
kernel_regularizer=self.kernel_regularizer,
Expand Down Expand Up @@ -156,6 +156,7 @@ def fit(
**kwd :
Keyword arguments for the fit function
"""
self.random_state_ = check_random_state(self.random_state)
x1, x2, y_double = self._convert_instances_(X, Y)

self.logger.debug("Instances created {}".format(x1.shape[0]))
Expand Down
12 changes: 6 additions & 6 deletions csrank/core/fate_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(
self.n_hidden_set_units = n_hidden_set_units
self.learning_rate = learning_rate
self.batch_size = batch_size
self.random_state = check_random_state(random_state)
self.random_state = random_state
self.n_object_features = n_object_features
self.loss_function = loss_function
self.n_objects = n_objects
Expand All @@ -46,25 +46,25 @@ def _construct_model_(self, n_objects):
self.Y = tf.placeholder("float32", [None, n_objects])
std = 1 / np.sqrt(self.n_object_features)
self.b1 = tf.Variable(
self.random_state.normal(loc=0, scale=std, size=self.n_hidden_set_units),
self.random_state_.normal(loc=0, scale=std, size=self.n_hidden_set_units),
dtype=tf.float32,
)
self.W1 = tf.Variable(
self.random_state.normal(
self.random_state_.normal(
loc=0, scale=std, size=(self.n_object_features, self.n_hidden_set_units)
),
dtype=tf.float32,
)
self.W2 = tf.Variable(
self.random_state.normal(
self.random_state_.normal(
loc=0,
scale=std,
size=(self.n_object_features + self.n_hidden_set_units),
),
dtype=tf.float32,
)
self.b2 = tf.Variable(
self.random_state.normal(loc=0, scale=std, size=1), dtype=tf.float32
self.random_state_.normal(loc=0, scale=std, size=1), dtype=tf.float32
)

set_rep = (
Expand Down Expand Up @@ -93,6 +93,7 @@ def step_decay(self, epoch):
def fit(
self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd
):
self.random_state_ = check_random_state(self.random_state)
# Global Variables Initializer
n_instances, n_objects, n_features = X.shape
assert n_features == self.n_object_features
Expand Down Expand Up @@ -178,7 +179,6 @@ def set_tunable_parameters(
self.n_hidden_set_units = n_hidden_set_units
self.batch_size = batch_size
self.learning_rate = learning_rate
self._construct_model_(self.n_objects)
self.epochs_drop = epochs_drop
self.drop = drop
if len(point) > 0:
Expand Down
3 changes: 2 additions & 1 deletion csrank/core/fate_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(
Keyword arguments for the hidden units
"""
self.logger = logging.getLogger(FATENetworkCore.__name__)
self.random_state = check_random_state(random_state)
self.random_state = random_state

self.n_hidden_joint_layers = n_hidden_joint_layers
self.n_hidden_joint_units = n_hidden_joint_units
Expand Down Expand Up @@ -500,6 +500,7 @@ def fit(
**kwargs :
Keyword arguments for the fit function
"""
self.random_state_ = check_random_state(self.random_state)
self._fit(
X=X,
Y=Y,
Expand Down
16 changes: 9 additions & 7 deletions csrank/core/feta_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(
):
self.learning_rate = learning_rate
self.batch_size = batch_size
self.random_state = check_random_state(random_state)
self.random_state = random_state
self.n_object_features = n_object_features
self.loss_function = loss_function
self.n_objects = n_objects
Expand All @@ -47,21 +47,23 @@ def _construct_model_(self, n_objects):
self.Y = tf.placeholder("float32", [None, n_objects])
std = 1 / np.sqrt(self.n_object_features)
self.b1 = tf.Variable(
self.random_state.normal(loc=0, scale=std, size=1), dtype=tf.float32
self.random_state_.normal(loc=0, scale=std, size=1), dtype=tf.float32
)
self.W1 = tf.Variable(
self.random_state.normal(loc=0, scale=std, size=2 * self.n_object_features),
self.random_state_.normal(
loc=0, scale=std, size=2 * self.n_object_features
),
dtype=tf.float32,
)
self.W2 = tf.Variable(
self.random_state.normal(loc=0, scale=std, size=self.n_object_features),
self.random_state_.normal(loc=0, scale=std, size=self.n_object_features),
dtype=tf.float32,
)
self.b2 = tf.Variable(
self.random_state.normal(loc=0, scale=std, size=1), dtype=tf.float32
self.random_state_.normal(loc=0, scale=std, size=1), dtype=tf.float32
)
self.W_out = tf.Variable(
self.random_state.normal(loc=0, scale=std, size=2),
self.random_state_.normal(loc=0, scale=std, size=2),
dtype=tf.float32,
name="W_out",
)
Expand Down Expand Up @@ -97,6 +99,7 @@ def step_decay(self, epoch):
def fit(
self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd
):
self.random_state_ = check_random_state(self.random_state)
# Global Variables Initializer
n_instances, n_objects, n_features = X.shape
assert n_features == self.n_object_features
Expand Down Expand Up @@ -183,7 +186,6 @@ def set_tunable_parameters(
"""
self.batch_size = batch_size
self.learning_rate = learning_rate
self._construct_model_(self.n_objects)
self.epochs_drop = epochs_drop
self.drop = drop
if len(point) > 0:
Expand Down
5 changes: 3 additions & 2 deletions csrank/core/feta_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(
**kwargs
):
self.logger = logging.getLogger(FETANetwork.__name__)
self.random_state = check_random_state(random_state)
self.random_state = random_state
self.kernel_regularizer = kernel_regularizer
self.kernel_initializer = kernel_initializer
self.batch_normalization = batch_normalization
Expand Down Expand Up @@ -290,6 +290,7 @@ def fit(
Keyword arguments for the fit function
"""
self.logger.debug("Enter fit function...")
self.random_state_ = check_random_state(self.random_state)

X, Y = self.sub_sampling(X, Y)
self.model = self.construct_model()
Expand All @@ -311,7 +312,7 @@ def fit(
def sub_sampling(self, X, Y):
if self._n_objects > self.max_number_of_objects:
bucket_size = int(self._n_objects / self.max_number_of_objects)
idx = self.random_state.randint(bucket_size, size=(len(X), self.n_objects))
idx = self.random_state_.randint(bucket_size, size=(len(X), self.n_objects))
# TODO: subsampling multiple rankings
idx += np.arange(start=0, stop=self._n_objects, step=bucket_size)[
: self.n_objects
Expand Down
7 changes: 4 additions & 3 deletions csrank/core/pairwise_svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(
self.C = C
self.tol = tol
self.logger = logging.getLogger("RankSVM")
self.random_state = check_random_state(random_state)
self.random_state = random_state
self.threshold_instances = int(1e10)
self.fit_intercept = fit_intercept
self.weights = None
Expand All @@ -70,21 +70,22 @@ def fit(self, X, Y, **kwargs):
Keyword arguments for the fit function
"""
self.random_state_ = check_random_state(self.random_state)
x_train, y_single = self._convert_instances_(X, Y)
if x_train.shape[0] > self.threshold_instances:
self.model = LogisticRegression(
C=self.C,
tol=self.tol,
fit_intercept=self.fit_intercept,
random_state=self.random_state,
random_state=self.random_state_,
)
self.logger.info("Logistic Regression model ")
else:
self.model = LinearSVC(
C=self.C,
tol=self.tol,
fit_intercept=self.fit_intercept,
random_state=self.random_state,
random_state=self.random_state_,
)
self.logger.info("Linear SVC model ")

Expand Down
3 changes: 2 additions & 1 deletion csrank/core/ranknet_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(
self._scoring_model = None
self.model = None
self.hash_file = None
self.random_state = check_random_state(random_state)
self.random_state = random_state
self._construct_layers(
kernel_regularizer=self.kernel_regularizer,
kernel_initializer=self.kernel_initializer,
Expand Down Expand Up @@ -148,6 +148,7 @@ def fit(
**kwd :
Keyword arguments for the fit function
"""
self.random_state_ = check_random_state(self.random_state)
X1, X2, Y_single = self._convert_instances_(X, Y)

self.logger.debug("Instances created {}".format(X1.shape[0]))
Expand Down
6 changes: 3 additions & 3 deletions csrank/discretechoice/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ def __init__(self, random_state=None, **kwargs):
"""

self.logger = logging.getLogger(RandomBaselineDC.__name__)
self.random_state = check_random_state(random_state)
self.random_state = random_state
self.model = None

def fit(self, X, Y, **kwd):
pass
self.random_state_ = check_random_state(self.random_state)

def _predict_scores_fixed(self, X, **kwargs):
n_instances, n_objects, n_features = X.shape
return self.random_state.rand(n_instances, n_objects)
return self.random_state_.rand(n_instances, n_objects)

def predict_scores(self, X, **kwargs):
return super().predict_scores(X, **kwargs)
Expand Down
5 changes: 3 additions & 2 deletions csrank/discretechoice/generalized_nested_logit.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def __init__(
self.alpha = alpha
self.loss_function = likelihood_dict.get(loss_function, None)

self.random_state = check_random_state(random_state)
self.random_state = random_state
if regularization in ["l1", "l2"]:
self.regularization = regularization
else:
Expand Down Expand Up @@ -261,9 +261,10 @@ def construct_model(self, X, Y):
-------
model : pymc3 Model :class:`pm.Model`
"""
self.random_state_ = check_random_state(self.random_state)
if np.prod(X.shape) > self.threshold:
upper_bound = int(self.threshold / np.prod(X.shape[1:]))
indices = self.random_state.choice(X.shape[0], upper_bound, replace=False)
indices = self.random_state_.choice(X.shape[0], upper_bound, replace=False)
X = X[indices, :, :]
Y = Y[indices, :]
self.logger.info(
Expand Down
8 changes: 5 additions & 3 deletions csrank/discretechoice/nested_logit_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def __init__(
else:
self.n_nests = n_nests
self.alpha = alpha
self.random_state = check_random_state(random_state)
self.random_state = random_state
self.loss_function = likelihood_dict.get(loss_function, None)
if regularization in ["l1", "l2"]:
self.regularization = regularization
Expand Down Expand Up @@ -192,11 +192,12 @@ def create_nests(self, X):
(n_instances, n_objects) Values for each object implying the nest it belongs to. For example for :math:`2` nests the value 0 implies that object is allocated to nest 1 and value 1 implies it is allocated to nest 2.
"""
self.random_state_ = self.random_state_
n, n_obj, n_dim = X.shape
objects = X.reshape(n * n_obj, n_dim)
if self.cluster_model is None:
self.cluster_model = MiniBatchKMeans(
n_clusters=self.n_nests, random_state=self.random_state
n_clusters=self.n_nests, random_state=self.random_state_
).fit(objects)
self.features_nests = self.cluster_model.cluster_centers_
prediction = self.cluster_model.labels_
Expand Down Expand Up @@ -321,7 +322,7 @@ def construct_model(self, X, Y):
"""
if np.prod(X.shape) > self.threshold:
upper_bound = int(self.threshold / np.prod(X.shape[1:]))
indices = self.random_state.choice(X.shape[0], upper_bound, replace=False)
indices = self.random_state_.choice(X.shape[0], upper_bound, replace=False)
X = X[indices, :, :]
Y = Y[indices, :]
self.logger.info(
Expand Down Expand Up @@ -399,6 +400,7 @@ def fit(
**kwargs :
Keyword arguments for the fit function of :meth:`pymc3.fit`or :meth:`pymc3.sample`
"""
self.random_state_ = check_random_state(self.random_state)
self.construct_model(X, Y)
fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs)

Expand Down
3 changes: 2 additions & 1 deletion csrank/discretechoice/paired_combinatorial_logit.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def __init__(
self.nests_indices = np.array(list(combinations(np.arange(n_objects), 2)))
self.n_nests = len(self.nests_indices)
self.alpha = alpha
self.random_state = check_random_state(random_state)
self.random_state = random_state
self.loss_function = likelihood_dict.get(loss_function, None)
if regularization in ["l1", "l2"]:
self.regularization = regularization
Expand Down Expand Up @@ -329,6 +329,7 @@ def fit(
**kwargs :
Keyword arguments for the fit function of :meth:`pymc3.fit`or :meth:`pymc3.sample`
"""
self.random_state_ = check_random_state(self.random_state)
self.construct_model(X, Y)
fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs)

Expand Down
6 changes: 3 additions & 3 deletions csrank/objectranking/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ def __init__(self, random_state=None, **kwargs):
"""

self.logger = logging.getLogger(RandomBaselineRanker.__name__)
self.random_state = check_random_state(random_state)
self.random_state = (random_state,)
self.model = None

def fit(self, X, Y, **kwd):
pass
self.random_state_ = check_random_state(self.random_state)

def _predict_scores_fixed(self, X, **kwargs):
n_instances, n_objects, n_features = X.shape
return self.random_state.rand(n_instances, n_objects)
return self.random_state_.rand(n_instances, n_objects)

def predict_scores(self, X, **kwargs):
return super().predict_scores(X, **kwargs)
Expand Down
2 changes: 1 addition & 1 deletion csrank/objectranking/cmp_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def _convert_instances_(self, X, Y):
garbage, x1, x2, y_double, garbage = generate_complete_pairwise_dataset(X, Y)
del garbage
if x1.shape[0] > self.threshold_instances:
indices = self.random_state.choice(
indices = self.random_state_.choice(
x1.shape[0], self.threshold_instances, replace=False
)
x1 = x1[indices, :]
Expand Down
Loading

0 comments on commit b8f8288

Please sign in to comment.