forked from scotthlee/hamlet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_predictions.py
177 lines (163 loc) · 7.19 KB
/
generate_predictions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""Writes a report with the models' predictions for a set of new images.
Notes:
1. 'img_dir' points to the directory holding the folder that holds the image
files for prediction. That folder should be named 'img/'.
"""
import numpy as np
import pandas as pd
import argparse
import os
import tensorflow as tf
from hamlet import models
from hamlet.tools import metrics as tm
from hamlet.tools import generic as tg
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--image_dir',
type=str,
help='Path to the folder holding the images for \
prediction.')
parser.add_argument('--output_dir',
type=str,
default=None,
help='Path where the script should dump the output \
files. Writes to img_dir by default.')
parser.add_argument('--write_to',
type=str,
default=None,
help='Existing CSV file to which the predictions \
should be written. Must also ')
parser.add_argument('--id_column',
type=str,
default='id',
help='Name for the column holding the image IDs.')
parser.add_argument('--ab_mod_dir',
type=str,
default='output/abnormal/checkpoints/training/',
help='Path to the folder holding the trained \
abnormal/normal model.')
parser.add_argument('--abtb_mod_dir',
type=str,
default='output/abnormal_tb/checkpoints/training/',
help='Path to the folder holding the trained \
abnormal-TB model.')
parser.add_argument('--find_mod_dir',
type=str,
default='output/findings/checkpoints/training/',
help='Path to the folder holding the trained \
multilabel classification model.')
parser.add_argument('--model_flavor',
type=str,
default='EfficientNetV2M',
help='What pretrained model to use as the feature \
extractor.')
parser.add_argument('--no_augmentation',
action='store_true',
help='Specifies that the models should be built with \
the image augmentation layer.')
parser.add_argument('--batch_size',
type=int,
default=12,
help='Minibatch size for inference.')
parser.add_argument('--image_dim',
type=int,
default=600,
help='Either dimension of the image to be passed \
to the model.')
parser.add_argument('--single_GPU',
action='store_true',
help='Turns off distributed (multi-GPU) training')
parser.add_argument('--prefix',
type=str,
default='',
help='Prefix for the predictions file.')
parser.set_defaults(no_augmentation=False,
single_GPU=False)
args = parser.parse_args()
# Setting things up
AUGMENT = not args.no_augmentation
AB_MOD_DIR = args.ab_mod_dir
ABTB_MOD_DIR = args.abtb_mod_dir
FIND_MOD_DIR = args.find_mod_dir
MODEL_FLAVOR = args.model_flavor
IMG_DIR = args.image_dir
IMG_DIM = args.image_dim
BATCH_SIZE = args.batch_size
OUT_DIR = IMG_DIR
DISTRIBUTED = not args.single_GPU
PREFIX = args.prefix
WRITE_TO = args.write_to
ID_COL = args.id_column
if args.output_dir is not None:
OUT_DIR = args.output_dir
# Setting training strategy
if DISTRIBUTED:
print('Using multiple GPUs.\n')
cdo = tf.distribute.HierarchicalCopyAllReduce()
strategy = tf.distribute.MirroredStrategy(cross_device_ops=cdo)
else:
strategy = tf.distribute.get_strategy()
# Setting the column labels for the multilabel task
findings = [
'infiltrate', 'reticular', 'cavity',
'nodule', 'pleural_effusion', 'hilar_adenopathy',
'linear_opacity', 'discrete_nodule', 'volume_loss',
'pleural_reaction', 'other', 'miliary'
]
# Checking the existing CSV file to make sure it contains the specified
# image ID column
if WRITE_TO:
current_data = pd.read_csv(OUT_DIR + WRITE_TO)
no_id = ID_COL + ' must be a valid column in the WRITE_TO CSV file.'
assert ID_COL in current_data.columns.values, no_id
# Loading the data
test_files = os.listdir(IMG_DIR + 'img/')
test_ids = [f[:-4] for f in test_files]
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
IMG_DIR,
labels=None,
shuffle=False,
image_size=(IMG_DIM, IMG_DIM),
batch_size=BATCH_SIZE
)
preds_df = pd.DataFrame(test_ids, columns=[ID_COL])
# Loading the trained model
with strategy.scope():
if FIND_MOD_DIR:
multi_mod = models.EfficientNet(num_classes=len(findings),
img_height=IMG_DIM,
img_width=IMG_DIM,
augmentation=AUGMENT,
model_flavor=MODEL_FLAVOR)
multi_mod.load_weights(FIND_MOD_DIR)
multi_preds = multi_mod.predict(test_ds, verbose=1)
preds_df[[f + '_prob' for f in findings]] = multi_preds
if AB_MOD_DIR:
ab_mod = models.EfficientNet(num_classes=1,
img_height=IMG_DIM,
img_width=IMG_DIM,
augmentation=AUGMENT,
model_flavor=MODEL_FLAVOR)
ab_mod.load_weights(AB_MOD_DIR)
ab_probs = ab_mod.predict(test_ds, verbose=1).flatten()
preds_df['abnormal_prob'] = ab_probs
if ABTB_MOD_DIR:
abtb_mod = models.EfficientNet(num_classes=1,
img_height=IMG_DIM,
img_width=IMG_DIM,
augmentation=AUGMENT,
model_flavor=MODEL_FLAVOR)
abtb_mod.load_weights(ABTB_MOD_DIR)
abtb_probs = abtb_mod.predict(test_ds, verbose=1).flatten()
preds_df['abnormal_tb_prob'] = abtb_probs
if WRITE_TO:
is_file = tg.is_file(current_data[ID_COL][0])
if is_file[0]:
ids = current_data[ID_COL].str.replace(is_file[1], '')
current_data[ID_COL] = ids
current_data.sort_values(ID_COL, inplace=True)
preds_df.sort_values(ID_COL, inplace=True)
all_data = pd.merge(current_data, preds_df, on=ID_COL)
all_data.to_csv(OUT_DIR + WRITE_TO, index=False)
else:
preds_df.to_csv(OUT_DIR + PREFIX + 'predictions.csv', index=False)