diff --git a/poc/datasets/variable_choice_datasets.py b/poc/datasets/variable_choice_datasets.py index 24438a06..0830821f 100644 --- a/poc/datasets/variable_choice_datasets.py +++ b/poc/datasets/variable_choice_datasets.py @@ -87,7 +87,6 @@ def __init__( ) super().__init__(torch.tensor(x).float(), torch.tensor(y_true).float()) - # TODO (next) adapt this, write some tests and understand it def make_globular_pareto_choices( self, n_instances: int, @@ -160,17 +159,19 @@ def make_randn_pareto_choices( cluster_size: int, n_features: int, n_objects: int, - center: float, + center: np.array, ): """Generate random objects from a d-dimensional isometric normal distribution. This should be the easiest possible Pareto-problem, since the model can learn a latent-utility which scores how likely a point is on the front (independent of the other points).""" + # Generate a single cluster, uniformly at random. X = self.random_state.randn(cluster_size, n_objects, n_features) Y = np.empty((cluster_size, n_objects), dtype=bool) for i in range(cluster_size): Y[i] = pareto_front(X[i]) + # Return the shifted cluster with its Pareto front. return X + center, Y X = np.empty((n_instances, n_objects, n_features)) @@ -179,12 +180,17 @@ def make_randn_pareto_choices( center = sample_unit_ball( n_inst=1, n_features=n_features, radius=cluster_spread ) + # Cluster of points sampled uniformly at random and shifted to + # center. Center is some point on the unit ball (cluster_spread). x, y = make_randn_pareto_choices( cluster_size=cluster_size, n_features=n_features, n_objects=n_objects, center=center, ) + # TODO(next) how do clusters and instances relate? Is each cluster + # one instance? Then why the n_instances / cluster_size in the for + # loop? X[i * cluster_size : (i + 1) * cluster_size] = x Y[i * cluster_size : (i + 1) * cluster_size] = y return X, Y