pytorch  1.8.2
About: PyTorch provides Tensor computation (like NumPy) with strong GPU acceleration and Deep Neural Networks (in Python) built on a tape-based autograd system. LTS (Long Term Support) release.
  Fossies Dox: pytorch-1.8.2.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

ctc_beam_search_decoder_op.cc
Go to the documentation of this file.
2
3namespace caffe2 {
4
5namespace {
6
7const float* getTensorDataPtr(const Tensor& tensor, int t, int n) {
8 const auto dims = tensor.sizes();
9 CAFFE_ENFORCE_EQ(dims.size(), 3);
10 int offset = (t * dims[1] + n) * dims[2];
12 return tensor.template data<float>() + offset;
13}
14
15} // namespace
16
17template <>
19 // shape: max_activation_length x batch_size x alphabet_size
20 auto& inputs = Input(INPUTS);
21 // shape: batch_size
22
23 // shape: sum over all decoded_length
24 const auto inputs_dims = inputs.sizes();
25 int32_t max_activation_length = inputs_dims[0];
26 int32_t batch_size = inputs_dims[1];
27 int32_t alphabet_size = inputs_dims[2];
28 // [batch_size]
29 const int* seq_len_data =
30 (InputSize() == 2) ? Input(SEQ_LEN).data<int>() : nullptr;
31
32 vector<int32_t> values_cache;
33 const int total_candidates = batch_size * num_candidates_;
34 auto* output_len =
35 Output(OUTPUT_LEN, vector<int64_t>{total_candidates}, at::dtype<int>());
36 int* output_len_data = output_len->mutable_data<int>();
37 memset(output_len_data, 0, total_candidates * sizeof(int));
38 auto* output_prob = Output(
39 OUTPUT_PROB, vector<int64_t>{total_candidates}, at::dtype<float>());
40 float* output_prob_data = output_prob->mutable_data<float>();
41 memset(output_prob_data, 0, total_candidates * sizeof(float));
42
43 for (int32_t i = 0; i < batch_size; ++i) {
44 const int32_t activation_length =
45 (seq_len_data) ? seq_len_data[i] : max_activation_length;
46 std::multimap<float, vector<int32_t>, std::greater<float>> A_next_inv;
47 // For a given time step, Pb maps prefixes to the probability of all
48 // candidate sequences that end in a blank and Pnb maps prefixes to the
49 // probability of all candidate sequences that don't end in a blank.
50 vector<std::map<vector<int32_t>, float>> Pb(
51 activation_length + 1, std::map<vector<int32_t>, float>());
52 vector<std::map<vector<int32_t>, float>> Pnb(
53 activation_length + 1, std::map<vector<int32_t>, float>());
54 set<vector<int32_t>> A_prev;
55 Pb[0][vector<int32_t>()] = 1;
56 Pnb[0][vector<int32_t>()] = 0;
57 A_prev.insert(vector<int32_t>());
58
59 for (int t = 0; t < activation_length; t++) {
60 const float* ctc = getTensorDataPtr(inputs, t, i);
61
62 vector<int32_t> pruned_alpha;
63 for (int32_t c = 0; c < alphabet_size; c++) {
64 if (ctc[c] > prune_threshold_) {
65 pruned_alpha.push_back(c);
66 }
67 }
68
69 // If the pruned alphabet is empty, don't use pruning.
70 if (pruned_alpha.size() == 0) {
71 pruned_alpha = vector<int32_t>(alphabet_size);
72 std::iota(pruned_alpha.begin(), pruned_alpha.end(), 0);
73 }
74
75 for (auto const& l : A_prev) {
76 // We skip the code handling the end character from the article since
77 // our system does not support an end character.
78
79 for (auto const c : pruned_alpha) {
80 // Assumption: blank character always mapped to index 0
81 if (c == 0) {
82 Pb[t + 1][l] += ctc[c] * (Pb[t][l] + Pnb[t][l]);
83 } else {
84 vector<int32_t> l_plus = vector<int32_t>(l);
85 l_plus.push_back(c);
86 if (l.size() > 0 && c == l.back()) {
87 Pnb[t + 1][l_plus] += ctc[c] * Pb[t][l];
88 Pnb[t + 1][l] += ctc[c] * Pnb[t][l];
89 } else {
90 Pnb[t + 1][l_plus] += ctc[c] * (Pb[t][l] + Pnb[t][l]);
91 }
92
93 if (A_prev.find(l_plus) == A_prev.end()) {
94 Pb[t + 1][l_plus] += ctc[0] * (Pb[t][l_plus] + Pnb[t][l_plus]);
95 Pnb[t + 1][l_plus] += ctc[c] * Pnb[t][l_plus];
96 }
97 }
98 }
99 }
100
101 std::map<vector<int32_t>, float> A_next(Pb[t + 1]);
102 for (const auto& it : Pnb[t + 1]) {
103 A_next[it.first] += it.second;
104 }
105 A_next_inv.clear();
106 for (const auto& it : A_next) {
107 A_next_inv.insert({it.second, it.first});
108 }
109
110 A_prev.clear();
111 auto it = A_next_inv.begin();
112 for (int j = 0; j < beam_width_; j++) {
113 if (it == A_next_inv.end()) {
114 break;
115 }
116 A_prev.insert(it->second);
117 it++;
118 }
119 }
120
121 auto it = A_next_inv.begin();
122 for (int index = 0; index < num_candidates_; index++, it++) {
123 if (it == A_next_inv.end()) {
124 break;
125 }
126 auto& candidate = it->second;
127 output_len_data[i * num_candidates_ + index] = candidate.size();
128 output_prob_data[i * num_candidates_ + index] =
129 Pb.back()[candidate] + Pnb.back()[candidate];
130 values_cache.insert(
131 values_cache.end(), candidate.begin(), candidate.end());
132 }
133 }
134
135 int32_t values_cache_size = values_cache.size();
136 auto* values =
137 Output(VALUES, vector<int64_t>{values_cache_size}, at::dtype<int>());
138 int* values_data = values->mutable_data<int>();
139 for (int i = 0; i < values_cache.size(); ++i) {
140 values_data[i] = values_cache.at(i);
141 }
142 values_cache.clear();
143
144 return true;
145}
146
148OPERATOR_SCHEMA(CTCBeamSearchDecoder)
149 .NumInputs(1, 2)
150 .NumOutputs(2, 3)
151 .SetDoc(
152 "Prefix beam search decoder for connectionist temporal classification.")
153 .Arg(
154 "beam_width",
155 "Maximum number of candidates to carry over to next activation step.")
156 .Arg(
157 "prune_threshold",
158 "Probability threshold below which outputs are ignored.")
159 .Input(
160 0,
161 "INPUTS",
162 "3D float Tensor sized [max_activation_length, batch_size, alphabet_size] "
163 "of network logits (before softmax application).")
164 .Input(
165 1,
166 "SEQ_LEN",
167 "(optional) 1D int vector containing sequence lengths, "
168 "having size [batch_size] "
169 "seq_len will be set to max_time if not provided.")
170 .Output(
171 0,
172 "OUTPUT_LEN",
173 "Output_len matrix size (batch_size * num_candidates). "
174 "Each index stores lengths of candidates for its corresponding batch item.")
175 .Output(
176 1,
177 "VALUES",
178 "Values vector, size (total_decoded_outputs). "
179 "The flattened vector of final output sequences, in batch order.")
180 .Output(
181 2,
182 "OUTPUT_PROB",
183 "Probability vector, size (total_decoded_outputs). "
184 "Each index stores final output probability of its corresponding batch item.")
185 .InheritOnnxSchema();
186SHOULD_NOT_DO_GRADIENT(CTCBeamSearchDecoder);
187
188} // namespace caffe2
#define CAFFE_ENFORCE_LT(x, y,...)
Definition: Logging.h:259
const Tensor const c10::optional< at::Tensor > const Conv2DParams NeuronType t
Definition: MPSCNNOps.mm:56
void map(const Op &vec_fun, scalar_t *output_data, const scalar_t *input_data, int64_t size)
Definition: functional.h:168
Copyright (c) 2016-present, Facebook, Inc.
Definition: blob.h:13
const vector< TensorShape > & inputs
REGISTER_CPU_OPERATOR(ATen, ATenOp< CPUContext >)
OPERATOR_SCHEMA(ATen)
CAFFE_ENFORCE_EQ(in.size(), 1, "New shape must not be specified by the input blob and the " "argument `shape` at the same time.")
Maximum number of candidates to carry over to next activation step float Tensor sized[max_activation_length, batch_size, alphabet_size] of network optional int vector containing sequence having size[batch_size] seq_len will be set to max_time if not provided VALUES
When merge_repeated is merge repeated classes in output float Tensor sized[max_time, batch_size, num_classes] OUTPUT_LEN
SparseLengths8BitsRowwiseOp< CPUContext, 0, 1 >::LENGTHS uint8 tensor obtained with Vector with the same sum of elements as the first dimension of DATA Input(3, "scale_bias", "Matrix of floats, each row r_i of which stores a pair " "s_i, b_i -- scale and bias for i-th row") .Output(0
The common world The allreduced tensor
Maximum number of candidates to carry over to next activation step float Tensor sized[max_activation_length, batch_size, alphabet_size] of network SEQ_LEN
*and produces a single output tensor *expanded *The op also takes an argument *dims *with a list of dimensions for where to add the single dimensional entries If the same blob is provided as input and the operation is copy free This is the exact inverse operation of *Squeeze *Github dims
SHOULD_NOT_DO_GRADIENT(ScriptModule)
bucketize it based on the boundary values and then do one hot encoding The lengths specifies the number of boundary values for each column The final number of buckets is this number plus This would also be the expanded feature size boundaries specifies all the boundary values Note that each bucket is right inclusive That given boundary values[b1, b2, b3]
Definition: one_hot_ops.cc:225
SparseLengths8BitsRowwiseOp< CPUContext, 1, 1 >::LENGTHS uint8 tensor obtained with Integer vector containing indices of the first dimension of DATA for the slices that are being aggregated Matrix of each row r_i of which stores a pair b_i scale and bias for i th row Output(0, "output", "output")
Maximum number of candidates to carry over to next activation step INPUTS
index
Definition: find_op.cc:9
we add to it