forked from harsha-simhadri/big-ann-benchmarks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
results.py
93 lines (83 loc) · 3.34 KB
/
results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from __future__ import absolute_import
import h5py
import json
import os
import re
import traceback
def get_result_filename(dataset=None, count=None, definition=None,
query_arguments=None, neurips23track=None, runbook_path=None):
d = ['results']
if neurips23track and neurips23track != 'none':
d.append('neurips23')
d.append(neurips23track)
if neurips23track == 'streaming':
if runbook_path == None:
raise RuntimeError('Need runbook_path to store results')
else:
d.append(os.path.split(runbook_path)[1])
if dataset:
d.append(dataset)
if count:
d.append(str(count))
if definition:
d.append(definition.algorithm)
build_args = definition.arguments
try:
for args in build_args:
if type(args) == dict and 'indexkey' in args:
build_args = [args['indexkey']]
except:
pass
data = build_args + query_arguments
data = re.sub(r'\W+', '_', json.dumps(data, sort_keys=True)).strip('_')
if len(data) > 150:
data = data[-149:]
d.append(data)
return os.path.join(*d)
def add_results_to_h5py(f, search_type, results, count, suffix = ''):
if search_type == "knn" or search_type == "knn_filtered":
neighbors = f.create_dataset('neighbors' + suffix, (len(results), count), 'i', data = results)
elif search_type == "range":
lims, D, I= results
f.create_dataset('neighbors' + suffix, data=I)
f.create_dataset('lims' + suffix, data=lims)
f.create_dataset('distances' + suffix, data=D)
else:
raise NotImplementedError()
def store_results(dataset, count, definition, query_arguments,
attrs, results, search_type, neurips23track=None, runbook_path=None):
fn = get_result_filename(
dataset, count, definition, query_arguments, neurips23track, runbook_path) + '.hdf5'
head, tail = os.path.split(fn)
if not os.path.isdir(head):
os.makedirs(head)
f = h5py.File(name=fn, mode='w', libver='latest')
for k, v in attrs.items():
f.attrs[k] = v
if neurips23track == 'streaming':
for i, step_results in enumerate(results):
step = attrs['step_' + str(i)]
add_results_to_h5py(f, search_type, step_results, count, '_step' + str(step))
else:
add_results_to_h5py(f, search_type, results, count)
f.close()
def load_all_results(dataset=None, count=None, neurips23track=None, runbook_path=None):
"""
A generator for all result files.
"""
for root, _, files in os.walk(get_result_filename(dataset, count, \
neurips23track=neurips23track, \
runbook_path=runbook_path)):
for fn in files:
if os.path.splitext(fn)[-1] != '.hdf5':
continue
try:
f = h5py.File(name=os.path.join(root, fn), mode='r+', libver='latest')
properties = dict(f.attrs)
yield properties, f
f.close()
except:
print('Was unable to read', fn)
traceback.print_exc()
def get_unique_algorithms():
return set(properties['algo'] for properties, _ in load_all_results())