pytorch  1.8.2
About: PyTorch provides Tensor computation (like NumPy) with strong GPU acceleration and Deep Neural Networks (in Python) built on a tape-based autograd system. LTS (Long Term Support) release.
  Fossies Dox: pytorch-1.8.2.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

seq2seq_util.py
Go to the documentation of this file.
1## @package seq2seq_util
2# Module caffe2.python.examples.seq2seq_util
3""" A bunch of util functions to build Seq2Seq models with Caffe2."""
4
5
6
7
8
9
10import collections
11from future.utils import viewitems
12
13import caffe2.proto.caffe2_pb2 as caffe2_pb2
14from caffe2.python import attention, core, rnn_cell, brew
15
16
17PAD_ID = 0
18PAD = '<PAD>'
19GO_ID = 1
20GO = '<GO>'
21EOS_ID = 2
22EOS = '<EOS>'
23UNK_ID = 3
24UNK = '<UNK>'
25
26
27def gen_vocab(corpus, unk_threshold):
28 vocab = collections.defaultdict(lambda: len(vocab))
29 freqs = collections.defaultdict(lambda: 0)
30 # Adding padding tokens to the vocabulary to maintain consistency with IDs
31 vocab[PAD]
32 vocab[GO]
33 vocab[EOS]
34 vocab[UNK]
35
36 with open(corpus) as f:
37 for sentence in f:
38 tokens = sentence.strip().split()
39 for token in tokens:
40 freqs[token] += 1
41 for token, freq in viewitems(freqs):
42 if freq > unk_threshold:
43 vocab[token]
44
45 return vocab
46
47
48def get_numberized_sentence(sentence, vocab):
49 numerized_sentence = []
50 for token in sentence.strip().split():
51 if token in vocab:
52 numerized_sentence.append(vocab[token])
53 else:
54 numerized_sentence.append(vocab[UNK])
55 return numerized_sentence
56
57
59 model,
60 inputs,
61 input_lengths,
62 input_size,
63 num_units,
64 dropout_keep_prob,
65 forward_only,
66 return_sequence_output,
67 return_final_state,
68 scope=None,
69):
70 """ Unidirectional LSTM encoder."""
71 with core.NameScope(scope):
72 initial_cell_state = model.param_init_net.ConstantFill(
73 [],
74 'initial_cell_state',
75 shape=[num_units],
76 value=0.0,
77 )
78 initial_hidden_state = model.param_init_net.ConstantFill(
79 [],
80 'initial_hidden_state',
81 shape=[num_units],
82 value=0.0,
83 )
84
85 cell = rnn_cell.LSTMCell(
86 input_size=input_size,
87 hidden_size=num_units,
88 forget_bias=0.0,
89 memory_optimization=False,
90 name=(scope + '/' if scope else '') + 'lstm',
91 forward_only=forward_only,
92 )
93
94 dropout_ratio = (
95 None if dropout_keep_prob is None else (1.0 - dropout_keep_prob)
96 )
97 if dropout_ratio is not None:
99 internal_cell=cell,
100 dropout_ratio=dropout_ratio,
101 name=(scope + '/' if scope else '') + 'dropout',
102 forward_only=forward_only,
103 is_test=False,
104 )
105
106 outputs_with_grads = []
107 if return_sequence_output:
108 outputs_with_grads.append(0)
109 if return_final_state:
110 outputs_with_grads.extend([1, 3])
111
112 outputs, (_, final_hidden_state, _, final_cell_state) = (
113 cell.apply_over_sequence(
114 model=model,
115 inputs=inputs,
116 seq_lengths=input_lengths,
117 initial_states=(initial_hidden_state, initial_cell_state),
118 outputs_with_grads=outputs_with_grads,
119 )
120 )
121 return outputs, final_hidden_state, final_cell_state
122
123
125 model,
126 inputs,
127 input_lengths,
128 input_size,
129 num_units,
130 dropout_keep_prob,
131 forward_only,
132 return_sequence_output,
133 return_final_state,
134 scope=None,
135):
136 outputs_fw, final_hidden_fw, final_cell_fw = rnn_unidirectional_layer(
137 model,
138 inputs,
139 input_lengths,
140 input_size,
141 num_units,
142 dropout_keep_prob,
143 forward_only,
144 return_sequence_output,
145 return_final_state,
146 scope=(scope + '/' if scope else '') + 'fw',
147 )
148 with core.NameScope(scope):
149 reversed_inputs = model.net.ReversePackedSegs(
150 [inputs, input_lengths],
151 ['reversed_inputs'],
152 )
153 outputs_bw, final_hidden_bw, final_cell_bw = rnn_unidirectional_layer(
154 model,
155 reversed_inputs,
156 input_lengths,
157 input_size,
158 num_units,
159 dropout_keep_prob,
160 forward_only,
161 return_sequence_output,
162 return_final_state,
163 scope=(scope + '/' if scope else '') + 'bw',
164 )
165 with core.NameScope(scope):
166 outputs_bw = model.net.ReversePackedSegs(
167 [outputs_bw, input_lengths],
168 ['outputs_bw'],
169 )
170
171 # Concatenate forward and backward results
172 if return_sequence_output:
173 with core.NameScope(scope):
174 outputs, _ = model.net.Concat(
175 [outputs_fw, outputs_bw],
176 ['outputs', 'outputs_dim'],
177 axis=2,
178 )
179 else:
180 outputs = None
181
182 if return_final_state:
183 with core.NameScope(scope):
184 final_hidden_state, _ = model.net.Concat(
185 [final_hidden_fw, final_hidden_bw],
186 ['final_hidden_state', 'final_hidden_state_dim'],
187 axis=2,
188 )
189 final_cell_state, _ = model.net.Concat(
190 [final_cell_fw, final_cell_bw],
191 ['final_cell_state', 'final_cell_state_dim'],
192 axis=2,
193 )
194 else:
195 final_hidden_state = None
196 final_cell_state = None
197
198 return outputs, final_hidden_state, final_cell_state
199
200
202 model,
203 vocab_size,
204 embedding_size,
205 name,
206 freeze_embeddings,
207):
208 embeddings = model.param_init_net.GaussianFill(
209 [],
210 name,
211 shape=[vocab_size, embedding_size],
212 std=0.1,
213 )
214 if not freeze_embeddings:
215 model.params.append(embeddings)
216 return embeddings
217
218
219def get_layer_scope(scope, layer_type, i):
220 prefix = (scope + '/' if scope else '') + layer_type
221 return '{}/layer{}'.format(prefix, i)
222
223
225 model,
226 encoder_params,
227 num_decoder_layers,
228 inputs,
229 input_lengths,
230 vocab_size,
231 embeddings,
232 embedding_size,
233 use_attention,
234 num_gpus=0,
235 forward_only=False,
236 scope=None,
237):
238 with core.NameScope(scope or ''):
239 if num_gpus == 0:
240 embedded_encoder_inputs = model.net.Gather(
241 [embeddings, inputs],
242 ['embedded_encoder_inputs'],
243 )
244 else:
245 with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
246 embedded_encoder_inputs_cpu = model.net.Gather(
247 [embeddings, inputs],
248 ['embedded_encoder_inputs_cpu'],
249 )
250 embedded_encoder_inputs = model.CopyCPUToGPU(
251 embedded_encoder_inputs_cpu,
252 'embedded_encoder_inputs',
253 )
254
255 layer_inputs = embedded_encoder_inputs
256 layer_input_size = embedding_size
257 encoder_units_per_layer = []
258 final_encoder_hidden_states = []
259 final_encoder_cell_states = []
260
261 num_encoder_layers = len(encoder_params['encoder_layer_configs'])
262 use_bidirectional_encoder = encoder_params.get(
263 'use_bidirectional_encoder',
264 False,
265 )
266
267 for i, layer_config in enumerate(encoder_params['encoder_layer_configs']):
268
269 if use_bidirectional_encoder and i == 0:
270 layer_func = rnn_bidirectional_layer
271 output_dims = 2 * layer_config['num_units']
272 else:
273 layer_func = rnn_unidirectional_layer
274 output_dims = layer_config['num_units']
275 encoder_units_per_layer.append(output_dims)
276
277 is_final_layer = (i == num_encoder_layers - 1)
278
279 dropout_keep_prob = layer_config.get(
280 'dropout_keep_prob',
281 None,
282 )
283
284 return_final_state = i >= (num_encoder_layers - num_decoder_layers)
285 (
286 layer_outputs,
287 final_layer_hidden_state,
288 final_layer_cell_state,
289 ) = layer_func(
290 model=model,
291 inputs=layer_inputs,
292 input_lengths=input_lengths,
293 input_size=layer_input_size,
294 num_units=layer_config['num_units'],
295 dropout_keep_prob=dropout_keep_prob,
296 forward_only=forward_only,
297 return_sequence_output=(not is_final_layer) or use_attention,
298 return_final_state=return_final_state,
299 scope=get_layer_scope(scope, 'encoder', i),
300 )
301
302 if not is_final_layer:
303 layer_inputs = layer_outputs
304 layer_input_size = output_dims
305 final_encoder_hidden_states.append(final_layer_hidden_state)
306 final_encoder_cell_states.append(final_layer_cell_state)
307
308 encoder_outputs = layer_outputs
309 weighted_encoder_outputs = None
310
311 return (
312 encoder_outputs,
313 weighted_encoder_outputs,
314 final_encoder_hidden_states,
315 final_encoder_cell_states,
316 encoder_units_per_layer,
317 )
318
319
321
322 def scope(self, name):
323 return self.namename + '/' + name if self.namename is not None else name
324
325 def _get_attention_type(self, attention_type_as_string):
326 if attention_type_as_string == 'regular':
327 return attention.AttentionType.Regular
328 elif attention_type_as_string == 'recurrent':
329 return attention.AttentionType.Recurrent
330 else:
331 assert False, 'Unknown type ' + attention_type_as_string
332
334 self,
335 encoder_outputs,
336 encoder_output_dim,
337 encoder_lengths,
338 vocab_size,
339 attention_type,
340 embedding_size,
341 decoder_num_units,
342 decoder_cells,
343 residual_output_layers=None,
344 name=None,
345 weighted_encoder_outputs=None,
346 ):
347 self.namename = name
348 self.num_layersnum_layers = len(decoder_cells)
349 if attention_type == 'none':
351 decoder_cells,
352 name=self.scopescope('decoder'),
353 residual_output_layers=residual_output_layers,
354 )
355 self.use_attentionuse_attention = False
356 self.decoder_output_dimdecoder_output_dim = decoder_num_units
357 self.output_indicesoutput_indices = self.cellcell.output_indices
358 else:
359 decoder_cell = rnn_cell.MultiRNNCell(
360 decoder_cells,
361 name=self.scopescope('decoder'),
362 residual_output_layers=residual_output_layers,
363 )
364 self.cellcell = rnn_cell.AttentionCell(
365 encoder_output_dim=encoder_output_dim,
366 encoder_outputs=encoder_outputs,
367 encoder_lengths=encoder_lengths,
368 decoder_cell=decoder_cell,
369 decoder_state_dim=decoder_num_units,
370 name=self.scopescope('attention_decoder'),
371 attention_type=self._get_attention_type_get_attention_type(attention_type),
372 weighted_encoder_outputs=weighted_encoder_outputs,
373 attention_memory_optimization=True,
374 )
375 self.use_attentionuse_attention = True
376 self.decoder_output_dimdecoder_output_dim = decoder_num_units + encoder_output_dim
377
378 self.output_indicesoutput_indices = decoder_cell.output_indices
379 self.output_indicesoutput_indices.append(2 * self.num_layersnum_layers)
380
382 return self.cellcell.get_state_names()
383
385 # sequence (all) output locations are at twice their state index
386 return [2 * i for i in self.output_indicesoutput_indices]
387
388 def get_output_dim(self):
389 return self.decoder_output_dimdecoder_output_dim
390
392 assert self.use_attentionuse_attention
393 # [batch_size, encoder_length, 1]
394 return self.cellcell.get_attention_weights()
395
396 def apply(
397 self,
398 model,
399 input_t,
400 seq_lengths,
401 states,
402 timestep,
403 ):
404 return self.cellcell.apply(
405 model=model,
406 input_t=input_t,
407 seq_lengths=seq_lengths,
408 states=states,
409 timestep=timestep,
410 )
411
413 self,
414 model,
415 inputs,
416 seq_lengths,
417 initial_states,
418 ):
419 return self.cellcell.apply_over_sequence(
420 model=model,
421 inputs=inputs,
422 seq_lengths=seq_lengths,
423 initial_states=initial_states,
424 outputs_with_grads=self.get_outputs_with_gradsget_outputs_with_grads(),
425 )
426
427
429 model,
430 encoder_units_per_layer,
431 decoder_units_per_layer,
432 final_encoder_hidden_states,
433 final_encoder_cell_states,
434 use_attention,
435):
436 num_encoder_layers = len(encoder_units_per_layer)
437 num_decoder_layers = len(decoder_units_per_layer)
438 if num_encoder_layers > num_decoder_layers:
439 offset = num_encoder_layers - num_decoder_layers
440 else:
441 offset = 0
442
443 initial_states = []
444 for i, decoder_num_units in enumerate(decoder_units_per_layer):
445
446 if (
447 final_encoder_hidden_states and
448 len(final_encoder_hidden_states) > (i + offset)
449 ):
450 final_encoder_hidden_state = final_encoder_hidden_states[i + offset]
451 else:
452 final_encoder_hidden_state = None
453
454 if final_encoder_hidden_state is None:
455 decoder_initial_hidden_state = model.param_init_net.ConstantFill(
456 [],
457 'decoder_initial_hidden_state_{}'.format(i),
458 shape=[decoder_num_units],
459 value=0.0,
460 )
461 model.params.append(decoder_initial_hidden_state)
462 elif decoder_num_units != encoder_units_per_layer[i + offset]:
463 decoder_initial_hidden_state = brew.fc(
464 model,
465 final_encoder_hidden_state,
466 'decoder_initial_hidden_state_{}'.format(i),
467 encoder_units_per_layer[i + offset],
468 decoder_num_units,
469 axis=2,
470 )
471 else:
472 decoder_initial_hidden_state = final_encoder_hidden_state
473 initial_states.append(decoder_initial_hidden_state)
474
475 if (
476 final_encoder_cell_states and
477 len(final_encoder_cell_states) > (i + offset)
478 ):
479 final_encoder_cell_state = final_encoder_cell_states[i + offset]
480 else:
481 final_encoder_cell_state = None
482
483 if final_encoder_cell_state is None:
484 decoder_initial_cell_state = model.param_init_net.ConstantFill(
485 [],
486 'decoder_initial_cell_state_{}'.format(i),
487 shape=[decoder_num_units],
488 value=0.0,
489 )
490 model.params.append(decoder_initial_cell_state)
491 elif decoder_num_units != encoder_units_per_layer[i + offset]:
492 decoder_initial_cell_state = brew.fc(
493 model,
494 final_encoder_cell_state,
495 'decoder_initial_cell_state_{}'.format(i),
496 encoder_units_per_layer[i + offset],
497 decoder_num_units,
498 axis=2,
499 )
500 else:
501 decoder_initial_cell_state = final_encoder_cell_state
502 initial_states.append(decoder_initial_cell_state)
503
504 if use_attention:
505 initial_attention_weighted_encoder_context = (
506 model.param_init_net.ConstantFill(
507 [],
508 'initial_attention_weighted_encoder_context',
509 shape=[encoder_units_per_layer[-1]],
510 value=0.0,
511 )
512 )
513 model.params.append(initial_attention_weighted_encoder_context)
514 initial_states.append(initial_attention_weighted_encoder_context)
515
516 return initial_states
517
518
520 model,
521 decoder_layer_configs,
522 inputs,
523 input_lengths,
524 encoder_lengths,
525 encoder_outputs,
526 weighted_encoder_outputs,
527 final_encoder_hidden_states,
528 final_encoder_cell_states,
529 encoder_units_per_layer,
530 vocab_size,
531 embeddings,
532 embedding_size,
533 attention_type,
534 forward_only,
535 num_gpus=0,
536 scope=None,
537):
538 with core.NameScope(scope or ''):
539 if num_gpus == 0:
540 embedded_decoder_inputs = model.net.Gather(
541 [embeddings, inputs],
542 ['embedded_decoder_inputs'],
543 )
544 else:
545 with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
546 embedded_decoder_inputs_cpu = model.net.Gather(
547 [embeddings, inputs],
548 ['embedded_decoder_inputs_cpu'],
549 )
550 embedded_decoder_inputs = model.CopyCPUToGPU(
551 embedded_decoder_inputs_cpu,
552 'embedded_decoder_inputs',
553 )
554
555 decoder_cells = []
556 decoder_units_per_layer = []
557 for i, layer_config in enumerate(decoder_layer_configs):
558 num_units = layer_config['num_units']
559 decoder_units_per_layer.append(num_units)
560
561 if i == 0:
562 input_size = embedding_size
563 else:
564 input_size = decoder_cells[-1].get_output_dim()
565
566 cell = rnn_cell.LSTMCell(
567 forward_only=forward_only,
568 input_size=input_size,
569 hidden_size=num_units,
570 forget_bias=0.0,
571 memory_optimization=False,
572 )
573
574 dropout_keep_prob = layer_config.get('dropout_keep_prob', None)
575 if dropout_keep_prob is not None:
576 dropout_ratio = 1.0 - layer_config.dropout_keep_prob
578 internal_cell=cell,
579 dropout_ratio=dropout_ratio,
580 forward_only=forward_only,
581 is_test=False,
582 name=get_layer_scope(scope, 'decoder_dropout', i),
583 )
584
585 decoder_cells.append(cell)
586
588 model=model,
589 encoder_units_per_layer=encoder_units_per_layer,
590 decoder_units_per_layer=decoder_units_per_layer,
591 final_encoder_hidden_states=final_encoder_hidden_states,
592 final_encoder_cell_states=final_encoder_cell_states,
593 use_attention=(attention_type != 'none'),
594 )
595 attention_decoder = LSTMWithAttentionDecoder(
596 encoder_outputs=encoder_outputs,
597 encoder_output_dim=encoder_units_per_layer[-1],
598 encoder_lengths=encoder_lengths,
599 vocab_size=vocab_size,
600 attention_type=attention_type,
601 embedding_size=embedding_size,
602 decoder_num_units=decoder_units_per_layer[-1],
603 decoder_cells=decoder_cells,
604 weighted_encoder_outputs=weighted_encoder_outputs,
605 name=scope,
606 )
607 decoder_outputs, _ = attention_decoder.apply_over_sequence(
608 model=model,
609 inputs=embedded_decoder_inputs,
610 seq_lengths=input_lengths,
611 initial_states=states,
612 )
613
614 # we do softmax over the whole sequence
615 # (max_length in the batch * batch_size) x decoder embedding size
616 # -1 because we don't know max_length yet
617 decoder_outputs_flattened, _ = model.net.Reshape(
618 [decoder_outputs],
619 [
620 'decoder_outputs_flattened',
621 'decoder_outputs_and_contexts_combination_old_shape',
622 ],
623 shape=[-1, attention_decoder.get_output_dim()],
624 )
625
626 decoder_outputs = decoder_outputs_flattened
627 decoder_output_dim = attention_decoder.get_output_dim()
628
629 return (decoder_outputs, decoder_output_dim)
630
631
633 model,
634 decoder_outputs,
635 decoder_output_size,
636 target_vocab_size,
637 decoder_softmax_size,
638):
639 if decoder_softmax_size is not None:
640 decoder_outputs = brew.fc(
641 model,
642 decoder_outputs,
643 'decoder_outputs_scaled',
644 dim_in=decoder_output_size,
645 dim_out=decoder_softmax_size,
646 )
647 decoder_output_size = decoder_softmax_size
648
649 output_projection_w = model.param_init_net.XavierFill(
650 [],
651 'output_projection_w',
652 shape=[target_vocab_size, decoder_output_size],
653 )
654
655 output_projection_b = model.param_init_net.XavierFill(
656 [],
657 'output_projection_b',
658 shape=[target_vocab_size],
659 )
660 model.params.extend([
661 output_projection_w,
662 output_projection_b,
663 ])
664 output_logits = model.net.FC(
665 [
666 decoder_outputs,
667 output_projection_w,
668 output_projection_b,
669 ],
670 ['output_logits'],
671 )
672 return output_logits
def apply(self, model, input_t, seq_lengths, states, timestep)
def apply_over_sequence(self, model, inputs, seq_lengths, initial_states)
def __init__(self, encoder_outputs, encoder_output_dim, encoder_lengths, vocab_size, attention_type, embedding_size, decoder_num_units, decoder_cells, residual_output_layers=None, name=None, weighted_encoder_outputs=None)
constexpr Symbol append(static_cast< unique_t >(_keys::aten_append))
constexpr Symbol len(static_cast< unique_t >(_keys::aten_len))
constexpr Symbol enumerate(static_cast< unique_t >(_keys::prim_enumerate))
def get_numberized_sentence(sentence, vocab)
Definition: seq2seq_util.py:48
def rnn_unidirectional_layer(model, inputs, input_lengths, input_size, num_units, dropout_keep_prob, forward_only, return_sequence_output, return_final_state, scope=None)
Definition: seq2seq_util.py:69
def build_embedding_decoder(model, decoder_layer_configs, inputs, input_lengths, encoder_lengths, encoder_outputs, weighted_encoder_outputs, final_encoder_hidden_states, final_encoder_cell_states, encoder_units_per_layer, vocab_size, embeddings, embedding_size, attention_type, forward_only, num_gpus=0, scope=None)
def get_layer_scope(scope, layer_type, i)
def build_initial_rnn_decoder_states(model, encoder_units_per_layer, decoder_units_per_layer, final_encoder_hidden_states, final_encoder_cell_states, use_attention)
def rnn_bidirectional_layer(model, inputs, input_lengths, input_size, num_units, dropout_keep_prob, forward_only, return_sequence_output, return_final_state, scope=None)
def build_embedding_encoder(model, encoder_params, num_decoder_layers, inputs, input_lengths, vocab_size, embeddings, embedding_size, use_attention, num_gpus=0, forward_only=False, scope=None)
def build_embeddings(model, vocab_size, embedding_size, name, freeze_embeddings)
def output_projection(model, decoder_outputs, decoder_output_size, target_vocab_size, decoder_softmax_size)
def gen_vocab(corpus, unk_threshold)
Definition: seq2seq_util.py:27
bounding box regression result deltas as well as predefined bounding box shapes anchors Greedy non maximum suppression is applied to generate the final bounding boxes DOC int RPN_PRE_NMS_TOP_N float RPN_NMS_THRESH for rotated angle is normalized to be within[angle_bound_lo, angle_bound_hi] for rotated angle is normalized to be within[angle_bound_lo, angle_bound_hi] Scores from conv Bounding box deltas from conv Image format(height, width, scale)") .Input(3
Module caffe2.python.layers.split.