-
Notifications
You must be signed in to change notification settings - Fork 111
/
model.py
127 lines (95 loc) · 4.52 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from __future__ import print_function
from hbconfig import Config
import tensorflow as tf
import nltk
import transformer
class Model:
def __init__(self):
pass
def model_fn(self, mode, features, labels, params):
self.dtype = tf.float32
self.mode = mode
self.params = params
self.loss, self.train_op, self.metrics, self.predictions = None, None, None, None
self._init_placeholder(features, labels)
self.build_graph()
# train mode: required loss and train_op
# eval mode: required loss
# predict mode: required predictions
return tf.estimator.EstimatorSpec(
mode=mode,
loss=self.loss,
train_op=self.train_op,
eval_metric_ops=self.metrics,
predictions={"prediction": self.predictions})
def _init_placeholder(self, features, labels):
self.encoder_inputs = features["enc_inputs"]
self.targets = labels
self.batch_size = tf.shape(self.encoder_inputs)[0]
start_tokens = tf.fill([self.batch_size, 1], Config.data.START_ID)
if self.mode == tf.estimator.ModeKeys.TRAIN:
# slice last pad token
target_slice_last_1 = tf.slice(self.targets, [0, 0],
[self.batch_size, Config.data.max_seq_length-1])
self.decoder_inputs = tf.concat([start_tokens, target_slice_last_1], axis=1)
tf.identity(self.decoder_inputs[0], 'train/dec_0')
else:
pad_tokens = tf.zeros([self.batch_size, Config.data.max_seq_length-1], dtype=tf.int32) # 0: PAD ID
self.decoder_inputs = tf.concat([start_tokens, pad_tokens], axis=1)
tf.identity(self.decoder_inputs[0], 'test/dec_0')
def build_graph(self):
graph = transformer.Graph(self.mode)
output, predictions = graph.build(encoder_inputs=self.encoder_inputs,
decoder_inputs=self.decoder_inputs)
self.predictions = predictions
if self.mode != tf.estimator.ModeKeys.PREDICT:
self._build_loss(output)
self._build_optimizer()
self._build_metric()
def _build_loss(self, logits):
with tf.variable_scope('loss'):
target_lengths = tf.reduce_sum(
tf.to_int32(tf.not_equal(self.targets, Config.data.PAD_ID)), 1)
weight_masks = tf.sequence_mask(
lengths=target_lengths,
maxlen=Config.data.max_seq_length,
dtype=self.dtype, name='masks')
self.loss = tf.contrib.seq2seq.sequence_loss(
logits=logits,
targets=self.targets,
weights=weight_masks,
name="sequence-loss")
def _build_optimizer(self):
self.train_op = tf.contrib.layers.optimize_loss(
self.loss, tf.train.get_global_step(),
optimizer=Config.train.get('optimizer', 'Adam'),
learning_rate=Config.train.learning_rate,
summaries=['loss', 'gradients', 'learning_rate'],
name="train_op")
def _build_metric(self):
def blue_score(labels, predictions,
weights=None, metrics_collections=None,
updates_collections=None, name=None):
def _nltk_blue_score(labels, predictions):
# slice after <eos>
predictions = predictions.tolist()
for i in range(len(predictions)):
prediction = predictions[i]
if Config.data.EOS_ID in prediction:
predictions[i] = prediction[:prediction.index(Config.data.EOS_ID)+1]
rev_target_vocab = Config.data.rev_target_vocab
labels = [
[[rev_target_vocab.get(w_id, "") for w_id in label if w_id != Config.data.PAD_ID]]
for label in labels.tolist()]
predictions = [
[rev_target_vocab.get(w_id, "") for w_id in prediction]
for prediction in predictions]
if Config.train.print_verbose:
print("label: ", labels[0][0])
print("prediction: ", predictions[0])
return float(nltk.translate.bleu_score.corpus_bleu(labels, predictions))
score = tf.py_func(_nltk_blue_score, (labels, predictions), tf.float64)
return tf.metrics.mean(score * 100.0)
self.metrics = {
"bleu": blue_score(self.targets, self.predictions)
}