Skip to content

Commit

Permalink
Paint it black
Browse files Browse the repository at this point in the history
  • Loading branch information
timokau committed Oct 31, 2020
1 parent a72ab6a commit 784650e
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 12 deletions.
8 changes: 6 additions & 2 deletions poc/discrete_choice_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import numpy as np
import torch


def scores_to_choice_mask(scores, choice_size):
(n_instances, n_objects) = scores.shape
(_values, sort_indices) = torch.sort(scores)
top_k_indices = sort_indices[:,0:choice_size]
top_k_indices = sort_indices[:, 0:choice_size]

# Convert an index array to a mask. I'm sure there is an easier and
# better way to do this, but this works.
Expand All @@ -17,6 +18,7 @@ def scores_to_choice_mask(scores, choice_size):
mask[rows, top_k_indices] = 1
return mask


class TrivialDiscreteChoiceProblem(TensorDataset):
"""Generate a trivial discrete choice problem for testing purposes.
Expand Down Expand Up @@ -63,5 +65,7 @@ def __init__(
weights = random_state.rand(n_features)
weighted_feature_sums = np.dot(x, weights)

y_true = scores_to_choice_mask(torch.tensor(weighted_feature_sums), choice_size=choice_size)
y_true = scores_to_choice_mask(
torch.tensor(weighted_feature_sums), choice_size=choice_size
)
super().__init__(torch.tensor(x), y_true)
14 changes: 9 additions & 5 deletions poc/discrete_choice_losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import torch.nn as nn


class CategoricalHingeLossMax():
class CategoricalHingeLossMax:
# "max" aggregated version of CHL, described on page 14/15 of
# https://arxiv.org/pdf/1901.10860.pdf.
# Should be: First true, then predicted (as in Cross-Entropy Loss)
Expand Down Expand Up @@ -35,13 +35,17 @@ def __call__(self, scores, true_choice):
# chosen_scores = torch.masked_select(scores, chosen_mask)
# not_chosen_scores = torch.masked_select(scores, not_chosen_mask)

# not quite, but dealing with true infintiy is hairy and there should be no practical difference
infty = 2**32
# not quite, but dealing with true infintiy is hairy and there should be no practical difference
infty = 2 ** 32

# Mask out the chosen scores from the max with a value of -infinity.
(max_score_not_chosen, _indices) = torch.max(scores - true_choice * infty, dim=1)
(max_score_not_chosen, _indices) = torch.max(
scores - true_choice * infty, dim=1
)
# Mask out the not-chosen scores from the min with a value of +infinity.
(min_score_chosen, _indices) = torch.min(scores + (1 - true_choice) * infty, dim=1)
(min_score_chosen, _indices) = torch.min(
scores + (1 - true_choice) * infty, dim=1
)
# print(max_score_not_chosen)
# print(min_score_chosen)

Expand Down
17 changes: 13 additions & 4 deletions poc/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(self, scoring_module, criterion=HingedRankLoss, **kwargs):

def initialize_module(self, *args, **kwargs):
_params = self.get_params_for("module")
self.module_ = self.scoring_module(n_features = self.n_features_).double()
self.module_ = self.scoring_module(n_features=self.n_features_).double()

def fit(self, dataset):
(_n_objects, self.n_features_) = dataset[0][0].shape
Expand All @@ -30,6 +30,7 @@ def predict(self, objects):
(_values, indices) = torch.sort(evaluations.squeeze())
return indices


class FETARankingEstimator(ScoreRankingEstimator):
"""A ranking estimator based on the FETA-Approach.
Expand All @@ -43,6 +44,7 @@ class FETARankingEstimator(ScoreRankingEstimator):
def __init__(self, criterion=HingedRankLoss, **kwargs):
super().__init__(scoring_module=FETAScoring, criterion=criterion, **kwargs)


class FATERankingEstimator(ScoreRankingEstimator):
"""A ranking estimator based on the FATE-Approach.
Expand All @@ -57,14 +59,16 @@ def __init__(self, criterion=HingedRankLoss, **kwargs):


class ScoreDiscreteChoiceEstimator(skorch.NeuralNet):
def __init__(self, scoring_module, choice_size, criterion=CategoricalHingeLossMax, **kwargs):
def __init__(
self, scoring_module, choice_size, criterion=CategoricalHingeLossMax, **kwargs
):
super().__init__(module=None, criterion=criterion, **kwargs)
self.scoring_module = scoring_module
self.choice_size = choice_size

def initialize_module(self, *args, **kwargs):
_params = self.get_params_for("module")
self.module_ = FETAScoring(n_features = self.n_features_).double()
self.module_ = FETAScoring(n_features=self.n_features_).double()

def fit(self, dataset):
(_n_objects, self.n_features_) = dataset[0][0].shape
Expand Down Expand Up @@ -92,4 +96,9 @@ class FETADiscreteChoiceEstimator(ScoreDiscreteChoiceEstimator):
"""

def __init__(self, choice_size=1, criterion=CategoricalHingeLossMax, **kwargs):
super().__init__(choice_size=choice_size, scoring_module=FETAScoring, criterion=criterion, **kwargs)
super().__init__(
choice_size=choice_size,
scoring_module=FETAScoring,
criterion=criterion,
**kwargs
)
2 changes: 2 additions & 0 deletions poc/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def _ranking_experiment():
print("===OUTPUT Ranking===")
print(estimator.predict(test_ds[0][0].unsqueeze(0)))


def _ranking_experiment_fate():
n_objects = 5
n_features = 1
Expand All @@ -51,6 +52,7 @@ def _ranking_experiment_fate():
print("===OUTPUT Ranking===")
print(estimator.predict(test_ds[0][0].unsqueeze(0)))


def _choice_experiment():
n_objects = 5
n_features = 1
Expand Down
2 changes: 1 addition & 1 deletion poc/rank_losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import torch.nn as nn


class HingedRankLoss():
class HingedRankLoss:
# Should be: First true, then predicted (as in Cross-Entropy Loss)
# But for some reason skorch calls it with a swapped argument order.
def __call__(self, comparison_rankings, true_rankings):
Expand Down

0 comments on commit 784650e

Please sign in to comment.