Skip to content

Commit

Permalink
🐛 make order explicit (by feat freq or bin and bin count)
Browse files Browse the repository at this point in the history
  • Loading branch information
enryH committed Oct 21, 2024
1 parent a77adf8 commit 5f6bee8
Showing 1 changed file with 35 additions and 40 deletions.
75 changes: 35 additions & 40 deletions tests/pandas/test_calc_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,9 @@ def example_data():
columns=['observed'] + ['model_' + str(i + 1) for i in range(4)])
data.columns.name = 'model'
data.index.name = 'feat'
data['freq_feat'] = [4, 5, 5, 4, 6, 7, 7, 9, 8, 6]
data['freq_feat'] = [4, 4, 5, 5, 5, 6, 7, 9, 8, 6]
return data

# %%


def test_get_absolute_error(example_data):
expected = {'feat': {0: 'feat_0',
1: 'feat_0',
Expand Down Expand Up @@ -89,58 +86,57 @@ def test_get_absolute_error(example_data):

def test_calc_errors_per_feat(example_data):
expected = {'feat': {0: 'feat_0',
1: 'feat_1',
2: 'feat_0',
1: 'feat_0',
2: 'feat_1',
3: 'feat_1',
4: 'feat_1',
5: 'feat_6',
6: 'feat_2',
5: 'feat_2',
6: 'feat_6',
7: 'feat_3',
8: 'feat_5',
9: 'feat_4'},
'model_1': {0: 1.0836015099999994,
1: 0.38399649333333247,
2: 1.0836015099999994,
1: 1.0836015099999994,
2: 0.38399649333333247,
3: 0.38399649333333247,
4: 0.38399649333333247,
5: 0.3581477100000008,
6: 1.0785032900000004,
5: 1.0785032900000004,
6: 0.3581477100000008,
7: 0.5197284500000023,
8: 0.35989225000000147,
9: 0.25562937999999846},
'model_2': {0: 0.6558889949999998,
1: 0.30025493000000125,
2: 0.6558889949999998,
1: 0.6558889949999998,
2: 0.30025493000000125,
3: 0.30025493000000125,
4: 0.30025493000000125,
5: 0.10481768000000002,
6: 0.6079609700000006,
5: 0.6079609700000006,
6: 0.10481768000000002,
7: 0.48225405000000166,
8: 0.3109490500000014,
9: 0.24097977999999998},
'model_3': {0: 1.8424256349999997,
1: 0.3030794033333339,
2: 1.8424256349999997,
1: 1.8424256349999997,
2: 0.3030794033333339,
3: 0.3030794033333339,
4: 0.3030794033333339,
5: 0.025569629999999677,
6: 1.3011469200000008,
5: 1.3011469200000008,
6: 0.025569629999999677,
7: 0.6282909300000021,
8: 0.749302710000002,
9: 0.04352294999999984},
'model_4': {0: 1.3207320749999987,
1: 0.6042852166666677,
2: 1.3207320749999987,
1: 1.3207320749999987,
2: 0.6042852166666677,
3: 0.6042852166666677,
4: 0.6042852166666677,
5: 0.1415143900000011,
6: 1.2042582899999985,
5: 1.2042582899999985,
6: 0.1415143900000011,
7: 0.8281038200000026,
8: 0.5444545000000005,
9: 0.06842009000000004},
'freq_feat': {0: 4, 1: 4, 2: 5, 3: 5, 4: 6, 5: 6, 6: 7, 7: 7, 8: 8, 9: 9},
'n_obs': {0: 2, 1: 3, 2: 2, 3: 3, 4: 3, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}}

'freq_feat': {0: 4, 1: 4, 2: 5, 3: 5, 4: 5, 5: 6, 6: 6, 7: 7, 8: 8, 9: 9},
'n_obs': {0: 2, 1: 2, 2: 3, 3: 3, 4: 3, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}}
actual = calc_errors.calc_errors_per_feat(
pred=example_data.drop('freq_feat', axis=1),
freq_feat=example_data['freq_feat']).reset_index().to_dict()
Expand All @@ -152,55 +148,54 @@ def test_calc_errors_per_bin(example_data):
1: 'feat_0',
2: 'feat_1',
3: 'feat_2',
4: 'feat_3',
4: 'feat_5',
5: 'feat_4',
6: 'feat_5',
6: 'feat_3',
7: 'feat_1',
8: 'feat_1',
9: 'feat_6'},
'model_1': {0: 1.7588900899999977,
1: 0.408312930000001,
2: 0.03497017999999841,
3: 1.0785032900000004,
4: 0.5197284500000023,
4: 0.35989225000000147,
5: 0.25562937999999846,
6: 0.35989225000000147,
6: 0.5197284500000023,
7: 0.31798253999999915,
8: 0.7990367599999999,
9: 0.3581477100000008},
'model_2': {0: 0.9619296899999981,
1: 0.34984830000000144,
2: 0.04799503999999999,
3: 0.6079609700000006,
4: 0.48225405000000166,
4: 0.3109490500000014,
5: 0.24097977999999998,
6: 0.3109490500000014,
6: 0.48225405000000166,
7: 0.055784630000001556,
8: 0.7969851200000022,
9: 0.10481768000000002},
'model_3': {0: 2.9334374200000006,
1: 0.7514138499999987,
2: 0.023260270000001526,
3: 1.3011469200000008,
4: 0.6282909300000021,
4: 0.749302710000002,
5: 0.04352294999999984,
6: 0.749302710000002,
6: 0.6282909300000021,
7: 0.18840471000000036,
8: 0.6975732299999997,
9: 0.025569629999999677},
'model_4': {0: 2.1805211699999987,
1: 0.46094297999999867,
2: 0.1140570700000012,
3: 1.2042582899999985,
4: 0.8281038200000026,
4: 0.5444545000000005,
5: 0.06842009000000004,
6: 0.5444545000000005,
6: 0.8281038200000026,
7: 0.7145071600000001,
8: 0.9842914200000017,
9: 0.1415143900000011},
'bin': {0: 25, 1: 30, 2: 31, 3: 26, 4: 29, 5: 29, 6: 26, 7: 28, 8: 28, 9: 28},
'bin': {0: 25, 1: 30, 2: 31, 3: 26, 4: 26, 5: 29, 6: 29, 7: 28, 8: 28, 9: 28},
'n_obs': {0: 1, 1: 1, 2: 1, 3: 2, 4: 2, 5: 2, 6: 2, 7: 3, 8: 3, 9: 3}}

actual = calc_errors.calc_errors_per_bin(
example_data.drop('freq_feat', axis=1)).reset_index().to_dict()
example_data.drop('freq_feat', axis=1)).sort_values(["n_obs", "bin"]).reset_index().to_dict()
assert actual == expected

0 comments on commit 5f6bee8

Please sign in to comment.