From 784650e3069d057e15d333b8a23bf53c83377831 Mon Sep 17 00:00:00 2001 From: Timo Kaufmann Date: Sat, 31 Oct 2020 16:33:15 +0100 Subject: [PATCH] Paint it black --- poc/discrete_choice_datasets.py | 8 ++++++-- poc/discrete_choice_losses.py | 14 +++++++++----- poc/estimators.py | 17 +++++++++++++---- poc/experiment.py | 2 ++ poc/rank_losses.py | 2 +- 5 files changed, 31 insertions(+), 12 deletions(-) diff --git a/poc/discrete_choice_datasets.py b/poc/discrete_choice_datasets.py index 15f7356d..4b2186f0 100644 --- a/poc/discrete_choice_datasets.py +++ b/poc/discrete_choice_datasets.py @@ -3,10 +3,11 @@ import numpy as np import torch + def scores_to_choice_mask(scores, choice_size): (n_instances, n_objects) = scores.shape (_values, sort_indices) = torch.sort(scores) - top_k_indices = sort_indices[:,0:choice_size] + top_k_indices = sort_indices[:, 0:choice_size] # Convert an index array to a mask. I'm sure there is an easier and # better way to do this, but this works. @@ -17,6 +18,7 @@ def scores_to_choice_mask(scores, choice_size): mask[rows, top_k_indices] = 1 return mask + class TrivialDiscreteChoiceProblem(TensorDataset): """Generate a trivial discrete choice problem for testing purposes. @@ -63,5 +65,7 @@ def __init__( weights = random_state.rand(n_features) weighted_feature_sums = np.dot(x, weights) - y_true = scores_to_choice_mask(torch.tensor(weighted_feature_sums), choice_size=choice_size) + y_true = scores_to_choice_mask( + torch.tensor(weighted_feature_sums), choice_size=choice_size + ) super().__init__(torch.tensor(x), y_true) diff --git a/poc/discrete_choice_losses.py b/poc/discrete_choice_losses.py index a21069f1..22661864 100644 --- a/poc/discrete_choice_losses.py +++ b/poc/discrete_choice_losses.py @@ -2,7 +2,7 @@ import torch.nn as nn -class CategoricalHingeLossMax(): +class CategoricalHingeLossMax: # "max" aggregated version of CHL, described on page 14/15 of # https://arxiv.org/pdf/1901.10860.pdf. # Should be: First true, then predicted (as in Cross-Entropy Loss) @@ -35,13 +35,17 @@ def __call__(self, scores, true_choice): # chosen_scores = torch.masked_select(scores, chosen_mask) # not_chosen_scores = torch.masked_select(scores, not_chosen_mask) - # not quite, but dealing with true infintiy is hairy and there should be no practical difference - infty = 2**32 + # not quite, but dealing with true infintiy is hairy and there should be no practical difference + infty = 2 ** 32 # Mask out the chosen scores from the max with a value of -infinity. - (max_score_not_chosen, _indices) = torch.max(scores - true_choice * infty, dim=1) + (max_score_not_chosen, _indices) = torch.max( + scores - true_choice * infty, dim=1 + ) # Mask out the not-chosen scores from the min with a value of +infinity. - (min_score_chosen, _indices) = torch.min(scores + (1 - true_choice) * infty, dim=1) + (min_score_chosen, _indices) = torch.min( + scores + (1 - true_choice) * infty, dim=1 + ) # print(max_score_not_chosen) # print(min_score_chosen) diff --git a/poc/estimators.py b/poc/estimators.py index 778078ce..7556e931 100644 --- a/poc/estimators.py +++ b/poc/estimators.py @@ -18,7 +18,7 @@ def __init__(self, scoring_module, criterion=HingedRankLoss, **kwargs): def initialize_module(self, *args, **kwargs): _params = self.get_params_for("module") - self.module_ = self.scoring_module(n_features = self.n_features_).double() + self.module_ = self.scoring_module(n_features=self.n_features_).double() def fit(self, dataset): (_n_objects, self.n_features_) = dataset[0][0].shape @@ -30,6 +30,7 @@ def predict(self, objects): (_values, indices) = torch.sort(evaluations.squeeze()) return indices + class FETARankingEstimator(ScoreRankingEstimator): """A ranking estimator based on the FETA-Approach. @@ -43,6 +44,7 @@ class FETARankingEstimator(ScoreRankingEstimator): def __init__(self, criterion=HingedRankLoss, **kwargs): super().__init__(scoring_module=FETAScoring, criterion=criterion, **kwargs) + class FATERankingEstimator(ScoreRankingEstimator): """A ranking estimator based on the FATE-Approach. @@ -57,14 +59,16 @@ def __init__(self, criterion=HingedRankLoss, **kwargs): class ScoreDiscreteChoiceEstimator(skorch.NeuralNet): - def __init__(self, scoring_module, choice_size, criterion=CategoricalHingeLossMax, **kwargs): + def __init__( + self, scoring_module, choice_size, criterion=CategoricalHingeLossMax, **kwargs + ): super().__init__(module=None, criterion=criterion, **kwargs) self.scoring_module = scoring_module self.choice_size = choice_size def initialize_module(self, *args, **kwargs): _params = self.get_params_for("module") - self.module_ = FETAScoring(n_features = self.n_features_).double() + self.module_ = FETAScoring(n_features=self.n_features_).double() def fit(self, dataset): (_n_objects, self.n_features_) = dataset[0][0].shape @@ -92,4 +96,9 @@ class FETADiscreteChoiceEstimator(ScoreDiscreteChoiceEstimator): """ def __init__(self, choice_size=1, criterion=CategoricalHingeLossMax, **kwargs): - super().__init__(choice_size=choice_size, scoring_module=FETAScoring, criterion=criterion, **kwargs) + super().__init__( + choice_size=choice_size, + scoring_module=FETAScoring, + criterion=criterion, + **kwargs + ) diff --git a/poc/experiment.py b/poc/experiment.py index 4ccf3a10..9babaf36 100644 --- a/poc/experiment.py +++ b/poc/experiment.py @@ -29,6 +29,7 @@ def _ranking_experiment(): print("===OUTPUT Ranking===") print(estimator.predict(test_ds[0][0].unsqueeze(0))) + def _ranking_experiment_fate(): n_objects = 5 n_features = 1 @@ -51,6 +52,7 @@ def _ranking_experiment_fate(): print("===OUTPUT Ranking===") print(estimator.predict(test_ds[0][0].unsqueeze(0))) + def _choice_experiment(): n_objects = 5 n_features = 1 diff --git a/poc/rank_losses.py b/poc/rank_losses.py index 061b0998..ebf23a2c 100644 --- a/poc/rank_losses.py +++ b/poc/rank_losses.py @@ -2,7 +2,7 @@ import torch.nn as nn -class HingedRankLoss(): +class HingedRankLoss: # Should be: First true, then predicted (as in Cross-Entropy Loss) # But for some reason skorch calls it with a swapped argument order. def __call__(self, comparison_rankings, true_rankings):