forked from harsha-simhadri/big-ann-benchmarks
-
Notifications
You must be signed in to change notification settings - Fork 1
/
data_export.py
48 lines (43 loc) · 1.39 KB
/
data_export.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pandas as pd
import sys
import os
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import argparse
import bz2
from benchmark.datasets import DATASETS
from benchmark.plotting.utils import compute_metrics_all_runs
from benchmark.results import load_all_results, get_unique_algorithms
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'--output',
help='Path to the output csv file',
required=True)
parser.add_argument(
'--recompute',
action='store_true',
help='Path to the output csv file')
args = parser.parse_args()
datasets = DATASETS.keys()
dfs = []
is_first = True
for dataset_name in datasets:
print("Looking at dataset", dataset_name)
dataset = DATASETS[dataset_name]()
results = load_all_results(dataset_name)
results = compute_metrics_all_runs(dataset, results, args.recompute)
cleaned = []
for result in results:
if 'k-nn' in result:
result['recall/ap'] = result['k-nn']
del result['k-nn']
if 'ap' in result:
result['recall/ap'] = result['ap']
del result['ap']
cleaned.append(result)
dfs.append(pd.DataFrame(cleaned))
if len(dfs) > 0:
data = pd.concat(dfs)
data.to_csv(args.output, index=False)