-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdelong.py
72 lines (58 loc) · 2.63 KB
/
delong.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""Helper function based on delong package by yandexdataschool:
https://github.com/yandexdataschool/roc_comparison
"""
import os
import logging
import pandas as pd
from roc_comparison import compare_auc_delong_xu
logger = logging.getLogger()
def calc_p_value_delong_xu(model_1: str, model_2: str, folder_dumps: str,
verbose=False, index_col='Sample ID'):
"""Calculate p_value based on the a previous dump of model scores.
Parameters
----------
model_1 : str
model_name of model contained folder_dumps to be loaded from disk.
model_2 : str
model_name of model contained folder_dumps to be loaded from disk.
folder_dumps : str
Path du model dumps with `index_col` saved. The index is used to find
the common subset of samples on which the test can be based.
verbose : bool, optional
logging logging.INFO statements, by default False
index_col : str, optional
Index column to be used when loading data into a
pandas.DataFrame, by default 'Sample ID'
Returns
-------
float
p-value for DeLong AUC-ROC test.
"""
model_1_name = " ".join(model_1.split('.csv')[0].split('_'))
model_2_name = " ".join(model_2.split('.csv')[0].split('_'))
if verbose:
logger.info("Compare {} to {}\n".format(model_1_name, model_2_name))
# compare_auc_delong_xu.delong_roc_test()
model_1 = pd.read_csv(os.path.join(
folder_dumps, model_1), index_col=index_col)
model_2 = pd.read_csv(os.path.join(
folder_dumps, model_2), index_col=index_col)
in_both = model_2.y_test.dropna().index.intersection(
model_1.y_test.dropna().index)
model_1_omitted_ids = model_1.y_test.dropna().index.difference(in_both)
model_2_omitted_ids = model_2.y_test.dropna().index.difference(in_both)
if verbose:
logger.info("Omitting {} from test set of {}: {}\n".format(
len(model_1_omitted_ids), model_1_name, ', '.join(model_1_omitted_ids)))
logger.info("Omitting {} from test set of {}: {}\n".format(
len(model_2_omitted_ids), model_2_name, ', '.join(model_2_omitted_ids)))
logger.info("Comparison based on {} in total, which are: {}".format(
len(in_both), ", ".join(in_both)))
assert all(model_1.loc[in_both].y_test.dropna() ==
model_1.loc[in_both].y_test.dropna())
log10_pvalue = compare_auc_delong_xu.delong_roc_test(
ground_truth=model_1.loc[in_both].y_test.dropna(),
predictions_one=model_2.y_test_pred.loc[in_both],
predictions_two=model_1.y_test_pred.loc[in_both]
)
return 10**log10_pvalue[0][0]