pytorch  1.8.2
About: PyTorch provides Tensor computation (like NumPy) with strong GPU acceleration and Deep Neural Networks (in Python) built on a tape-based autograd system. LTS (Long Term Support) release.
  Fossies Dox: pytorch-1.8.2.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

boolean_mask_ops.cc
Go to the documentation of this file.
4
5namespace caffe2 {
6namespace {
7
8template <class Context>
9class BooleanMaskLengthsOp final : public Operator<Context> {
10 public:
12 template <class... Args>
13 explicit BooleanMaskLengthsOp(Args&&... args)
14 : Operator<Context>(std::forward<Args>(args)...) {}
15
16 bool RunOnDevice() override {
17 return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
18 }
19
20 template <typename T>
21 bool DoRunWithType() {
22 auto& lengths = Input(0);
23 auto& mask = Input(1);
24
25 CAFFE_ENFORCE(lengths.dim() == 1);
26 CAFFE_ENFORCE(mask.dim() == 1);
27 const auto* lengthsPtr = lengths.template data<T>();
28 const auto* maskPtr = mask.template data<bool>();
29 auto totalLength =
30 std::accumulate(lengthsPtr, lengthsPtr + lengths.numel(), 0);
31 CAFFE_ENFORCE(mask.numel() == totalLength);
32 auto* lengthsOut = Output(0, lengths.sizes(), at::dtype<T>());
33 auto* lengthsOutPtr = lengthsOut->template mutable_data<T>();
34 int p = 0;
35 for (int i = 0; i < lengths.numel(); ++i) {
36 T lengthOut = 0;
37 for (int j = 0; j < lengthsPtr[i]; ++j) {
38 if (maskPtr[p++]) {
39 ++lengthOut;
40 }
41 }
42 lengthsOutPtr[i] = lengthOut;
43 }
44 return true;
45 }
46};
47} // namespace
48
49template <>
51 auto& data = Input(0);
52 auto& mask = Input(1);
53 auto* dataOut = Output(0);
54 CAFFE_ENFORCE(data.dim() >= 1);
55 CAFFE_ENFORCE_EQ(mask.dim(), 1);
56 CAFFE_ENFORCE(data.size(0) == mask.size(0));
57
58 const auto* maskPtr = mask.template data<bool>();
59 int numOutputs = 0;
60 int outerSize = mask.numel();
61 for (int i = 0; i < outerSize; ++i) {
62 if (maskPtr[i]) {
63 ++numOutputs;
64 }
65 }
66 std::vector<int64_t> outShape;
67 outShape.push_back(numOutputs);
68 outShape.insert(outShape.end(), data.sizes().begin() + 1, data.sizes().end());
69 dataOut->Resize(outShape);
70 auto* outPtr = (char*)dataOut->raw_mutable_data(data.dtype());
71
72 int64_t* out_vec = nullptr;
73 if (OutputSize() == 2) {
74 auto* indicesOut = Output(1, {numOutputs}, at::dtype<int64_t>());
75 out_vec = indicesOut->template mutable_data<int64_t>();
76 }
77
78 if (numOutputs == 0) {
79 return true;
80 }
81 const auto innerSize = data.size_from_dim(1);
82 const auto innerSizeBytes = innerSize * data.dtype().itemsize();
83
84 int64_t lastStart = -1;
85 const auto* inPtr = (char*)data.raw_data();
86 int64_t outStart = 0;
87
88 for (int64_t i = 0;; ++i) {
89 // mask was true and either a) became false, or b) sequence finished
90 if (lastStart != -1 && ((i >= outerSize) || !maskPtr[i])) {
91 const auto* src = inPtr + lastStart * innerSizeBytes;
92 auto* dst = outPtr + outStart * innerSizeBytes;
93 int numItems = i - lastStart;
95 data.dtype(), numItems * innerSize, src, dst);
96 outStart += numItems;
97 lastStart = -1;
98 }
99 if (i >= outerSize) {
100 break;
101 }
102 // mask was false and became true
103 if (lastStart == -1 && maskPtr[i]) {
104 lastStart = i;
105 }
106 if (maskPtr[i] && OutputSize() == 2) {
107 *(out_vec++) = i;
108 }
109 }
110 return true;
111}
112
113template <>
114template <class T>
116 const auto& mask = Input(0);
117 const auto& dY = Input(1);
118 auto* dX = Output(0);
119
120 const int data_length_before_mask = mask.size(0);
121
122 dX->Resize(data_length_before_mask);
123
124 // TODO: we should support any type, not just float
125 T* dXdata = dX->template mutable_data<T>();
126 const T* dYdata = dY.template data<T>();
127 const bool* mask_data = mask.template data<bool>();
128
129 int ind = 0;
130
131 for (int i = 0; i < data_length_before_mask; i++) {
132 dXdata[i] = mask_data[i] ? dYdata[ind++] : 0;
133 }
134
135 return true;
136}
137
140 BooleanMaskGradient,
142REGISTER_CPU_OPERATOR(BooleanMaskLengths, BooleanMaskLengthsOp<CPUContext>);
143
144OPERATOR_SCHEMA(BooleanMask)
145 .NumInputs(2)
146 .NumOutputs(1, 2)
147 .SetDoc(R"DOC(
148Given a 1D `data` tensor and a boolean `mask` tensor of the same shape, returns a `masked_data` tensor containing only the elements corresponding to positions where the `mask` is True, and a `masked_indices` tensor containing the indices of the True elements.
149
150
151Github Links:
152- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/boolean_mask_ops.cc
154<details>
155
156<summary> <b>Example</b> </summary>
157
158**Code**
160```
161
162workspace.ResetWorkspace()
163
164op = core.CreateOperator(
165 "BooleanMask",
166 ["data", "mask"],
167 ["masked_data", "masked_indices"]
169
170workspace.FeedBlob("data", np.array([1,2,3,4,5,6]))
171workspace.FeedBlob("mask", np.array([True,False,False,True,True,False]))
172print("data:", workspace.FetchBlob("data"))
173print("mask:", workspace.FetchBlob("mask"))
174workspace.RunOperatorOnce(op)
175print("masked_data:", workspace.FetchBlob("masked_data"))
176print("masked_indices:", workspace.FetchBlob("masked_indices"))
177
178```
179
180**Result**
181
182```
183
184data: [1 2 3 4 5 6]
185mask: [ True False False True True False]
186masked_data: [1 4 5]
187masked_indices: [0 3 4]
188
189```
191</details>
192
193)DOC")
194 .Input(0, "data", "(*Tensor*): 1D input tensor")
195 .Input(
196 1,
197 "mask",
198 "(*Tensor`<bool>`*): tensor of bools which determines the input elements that will be left in the `masked_data` output tensor; same shape as `data`")
199 .Output(
200 0,
201 "masked_data",
202 "(*Tensor*): 1D tensor of same type as `data` input that contains the masked input tensor")
203 .Output(
204 1,
205 "masked_indices",
206 "(*Tensor`<int>`*): 1D tensor of indices of the True elements in the `mask` tensor");
207
208OPERATOR_SCHEMA(BooleanMaskLengths)
209 .NumInputs(2)
210 .NumOutputs(1)
211 .SetDoc(R"DOC(
212Given a tensor of int32 `lengths` tensor representing segment lengths and a `mask` (boolean) tensor, return the segment lengths of the corresponding segmented tensor after **BooleanMask** is applied.
213
214If `lengths` tensor is $[a_1, a_2, ..., a_n]$, then length of `mask` tensor must be $a_1 + a_2 + ... + a_n$.
215
216
217Github Links:
218- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/boolean_mask_ops.cc
219
220<details>
221
222<summary> <b>Example</b> </summary>
223
224**Code**
225
226```
227
228workspace.ResetWorkspace()
229
230op = core.CreateOperator(
231 "BooleanMaskLengths",
232 ["lengths", "mask"],
233 ["masked_lengths"]
234)
235
236workspace.FeedBlob("lengths", np.array([1,3,2], dtype=np.int32))
237workspace.FeedBlob("mask", np.array([False,True,True,False,True,True]))
238print("lengths:", workspace.FetchBlob("lengths"))
239print("mask:", workspace.FetchBlob("mask"))
240workspace.RunOperatorOnce(op)
241print("masked_lengths:", workspace.FetchBlob("masked_lengths"))
242
243```
244
245**Result**
247```
248
249lengths: [1 3 2]
250mask: [False True True False True True]
251masked_lengths: [0 2 2]
252
253```
254
255</details>
256
257)DOC")
258 .Input(
259 0,
260 "lengths",
261 "(*Tensor`<int>`*): input tensor containing segment lengths")
262 .Input(1, "mask", "(*Tensor`<bool>`*): A 1D bool tensor of values to keep.")
263 .Output(
264 0,
265 "masked_lengths",
266 "(*Tensor`<int>`*): 1D tensor of same type as inputs that contains the sequence");
267
268GRADIENT_OPERATOR_SCHEMA(BooleanMaskGradient).NumInputs(2).NumOutputs(1);
269
270namespace {
271class GetBooleanMaskGradient : public GradientMakerBase {
273 vector<OperatorDef> GetGradientDefs() override {
274 return SingleGradientDef(
275 "BooleanMaskGradient",
276 "",
277 vector<string>{I(1), GO(0)},
278 vector<string>{GI(0)});
279 }
280};
281
282REGISTER_GRADIENT(BooleanMask, GetBooleanMaskGradient);
283NO_GRADIENT(BooleanMaskLengths);
284
285} // namespace
286
287const float minf = -1.0f * std::numeric_limits<float>::infinity();
289// Template this on a functor object so we can generate different
290// implementations at compile time and have a better chance of inlining
291template <typename Functor>
292void MaskWithFunctor(
293 int N,
294 int M,
295 int B,
296 const float* in,
297 Functor fn,
298 float fill_val,
299 float* out) {
300 if (B >= 0) { // with batching
301 // collapse tensor to 3-dim view [B, N, M] where:
302 // B is product of dims up to and including batch
303 // N is product of dims between batch and axis, exclusive
304 // M is product of dimensions at/after axis
305 // then mask each batch [i, :, :] (note that this is N x M matrix)
306 for (int i = 0; i < B; ++i) {
307 for (int j = 0; j < N; ++j) {
308 for (int k = 0; k < M; ++k) {
309 // when [i, :, :] is laid out in row major order
310 // N * M * i + M * j + k is index of entry in N x M matrix
311 // with coordinates (row = j, col = k)
312 auto val = in[N * M * i + M * j + k];
313 out[N * M * i + M * j + k] = (fn(j, k, val) ? fill_val : val);
314 }
315 }
316 }
317 } else { // without batching
318 // TODO(T20952436): vector implementation
319 // collapse tensor to 2-dim view [N, M], where
320 // N is product of dimensions before axis
321 // M is product of dimensions at/after axis
322 // and mask N by M matrix
323 for (int i = 0; i < N; ++i) {
324 for (int j = 0; j < M; ++j) {
325 auto val = in[M * i + j];
326 out[M * i + j] = (fn(i, j, val) ? fill_val : val);
327 }
328 }
329 }
332// Repeat masking along continuous segments (right axes) of size D
333template <typename Functor>
335 int N,
336 int M,
337 int D,
338 const float* in,
339 Functor fn,
340 float fill_val,
341 float* out) {
342 for (int i = 0; i < N; ++i) {
343 for (int j = 0; j < M; ++j) {
344 for (int k = 0; k < D; ++k) {
345 auto val = in[M * D * i + D * j + k];
346 out[M * D * i + D * j + k] = (fn(i, j, val) ? fill_val : val);
347 }
348 }
349 }
350}
351
352namespace {
353
354class SequenceFunctor {
355 public:
356 explicit SequenceFunctor(const int* sl, const size_t len)
357 : sl_(sl), len_(len) {}
358 bool operator()(int i, int j, float /* val*/) {
359 CAFFE_ENFORCE(i < len_, "Out of bound.");
360 return j >= sl_[i];
361 }
362
363 private:
364 const int* sl_;
365 const size_t len_;
366};
367
368class WindowFunctor {
369 public:
370 explicit WindowFunctor(const int* c, int r) : c(c), r(r) {}
371 bool operator()(int i, int j, float /* val*/) {
372 return j > c[i] + r || j < c[i] - r;
373 }
374
375 private:
376 const int* c;
377 const int r;
378};
379
380class UpperFunctor {
381 public:
382 bool operator()(int i, int j, float /* val */) {
383 return j > i;
384 }
385};
386
387class LowerFunctor {
388 public:
389 bool operator()(int i, int j, float /* val */) {
390 return j < i;
391 }
392};
393
394class UpperDiagFunctor {
395 public:
396 bool operator()(int i, int j, float /* val */) {
397 return j >= i;
398 }
399};
400
401class LowerDiagFunctor {
402 public:
403 bool operator()(int i, int j, float /* val */) {
404 return j <= i;
405 }
406};
407
408} // namespace
409
410template <>
412 return DispatchHelper<TensorTypes<float>>::call(this, Input(0));
413}
414
415template <>
416template <class T>
418 const Tensor* input = &Input(0);
419 const Tensor* sequence_lengths = nullptr;
420 const Tensor* window_centers = nullptr;
421
422 if (mode_ == "sequence") {
423 sequence_lengths = &Input(1);
424 } else if (mode_ == "window") {
425 window_centers = &Input(1);
426 }
427
428 auto* output = Output(0, input->sizes(), at::dtype<T>());
429
430 const auto canonical_axis = input->canonical_axis_index(axis_);
431
432 // canonical_batch is non-negative if batching, -1 otherwise
433 int canonical_batch = -1;
434 if ((HasArgument("batch"))) {
435 canonical_batch = input->canonical_axis_index(batch_);
436 }
437
438 // make sure batch < axis
439 if (canonical_batch >= 0) {
440 CAFFE_ENFORCE_LT(canonical_batch, canonical_axis);
441 }
442
443 // if no batch, then left is product of dims up to axis
444 // otherwise, left is product of dims between batch and axis
445 const int left =
446 (canonical_batch >= 0
447 ? input->size_between_dim(canonical_batch, canonical_axis)
448 : input->size_to_dim(canonical_axis));
449 const int right = input->size_from_dim(canonical_axis);
450
451 // product of dims from 1 to batch
452 const int batch_dim =
453 (canonical_batch >= 0
454 ? input->size_to_dim(canonical_batch) * input->size(canonical_batch)
455 : -1);
456
457 T fill_val = convert::To<float, T>(grad_ ? 0.0f : fill_val_);
458 if (mode_ == "sequence") {
460 sequence_lengths, "Sequence length not provided for mode 'sequence'!");
461 if (HasArgument("repeat_from_axis")) {
462 const int canonical_repeat_from =
463 input->canonical_axis_index(repeat_from_);
464 const int repeated_dims = input->size_from_dim(canonical_repeat_from);
465 const int masked_dims = right / repeated_dims;
467 left,
468 masked_dims,
469 repeated_dims,
470 input->data<T>(),
471 SequenceFunctor(
472 sequence_lengths->data<int>(), sequence_lengths->numel()),
473 fill_val,
474 output->template mutable_data<T>());
475 } else {
477 left,
478 right,
479 batch_dim,
480 input->data<T>(),
481 SequenceFunctor(
482 sequence_lengths->data<int>(), sequence_lengths->numel()),
483 fill_val,
484 output->template mutable_data<T>());
485 }
486 } else if (mode_ == "window") {
488 left,
489 right,
490 batch_dim,
491 input->data<T>(),
492 WindowFunctor(window_centers->data<int>(), radius_),
493 fill_val,
494 output->template mutable_data<T>());
495 } else if (mode_ == "upper") {
497 left,
498 right,
499 batch_dim,
500 input->data<T>(),
501 UpperFunctor(),
502 fill_val,
503 output->template mutable_data<T>());
504 } else if (mode_ == "lower") {
506 left,
507 right,
508 batch_dim,
509 input->data<T>(),
510 LowerFunctor(),
511 fill_val,
512 output->template mutable_data<T>());
513 } else if (mode_ == "upperdiag") {
515 left,
516 right,
517 batch_dim,
518 input->data<T>(),
519 UpperDiagFunctor(),
520 fill_val,
521 output->template mutable_data<T>());
522 } else if (mode_ == "lowerdiag") {
524 left,
525 right,
526 batch_dim,
527 input->data<T>(),
528 LowerDiagFunctor(),
529 fill_val,
530 output->template mutable_data<T>());
531 } else {
532 CAFFE_ENFORCE(false, "Unsupported mode for SequenceMaskOp!");
533 return false;
534 }
535
536 return true;
537}
538
539REGISTER_CPU_OPERATOR(SequenceMask, SequenceMaskOp<CPUContext>);
540
541OPERATOR_SCHEMA(SequenceMask)
542 .NumInputs(1, 2)
543 .NumOutputs(1)
544 .SetDoc(R"DOC(
545Mask op designed for use in attention mechanisms for sequence modeling tasks.
546Supports batching: given batch_dim, collapses dims 0 through batch_dim into a
547single dimension, e.g. if tensor dims are [4,2,1,3,4] and batch_dim=2, first
548collapse tensor to [4*2*1,3,4], then mask each batch [i,:,:].
549
550
551Two current operating modes:
552
553
5541) Given a 2D input tensor and 1D tensor of sequence lengths, for each row i in
555the input tensor, set elements in that row to -inf if their column index
556j >= sequence_lengths[i]. This mode takes two inputs and argument mode =
557'sequence'
558
559
5602) Triangular mask. Given row index i and column index j, set elements to -inf
561given the following conditions:
562
563 mode='upper', x_ij = -inf if j < i
564 mode='lower', x_ij = -inf if j > i
565 mode='upperdiag', x_ij = -inf if j <= i
566 mode='lowerdiag', x_ij = -inf if j >= i
567
568This mode takes one input.
569
570
5713) Window Mask. Given a 2D input tensor and 1D tensor of window centers,
572for each row i in the input tensor, set elements in that row to -inf
573if their column index j outside [center - radius, center + radius].
574This mode takes two inputs and argument mode = 'sequence'.
575Argument 'radius' should be provided.
576)DOC")
577 .Input(0, "input", "Tensor to apply masking to")
578 .Input(1, "sequence_lengths", "1D Tensor of sequence lengths for mode #1")
579 .Output(0, "masked_tensor", "Input tensor with masking applied")
580 .Arg(
581 "mode",
582 "(string) Mode selection. Possible values: "
583 "'sequence', 'upper', 'lower', 'upperdiag', 'lowerdiag'")
584 .Arg(
585 "axis",
586 "(int) Beginning axis of row elements. All dimensions to the left "
587 "will be treated as row indices and those to the right (inclusive) "
588 "will be treated as column indices in the 2D mask")
589 .Arg("grad", "(bool) operate in gradient mode")
590 .Arg("radius", "(int) radius of windows in window mode")
591 .Arg("batch", "(int) batch dimension of tensor (optional)")
592 .Arg(
593 "repeat_from_axis",
594 "(int) used when mask should be repeated for "
595 "one or more data dimensions (beginning at this axis). "
596 "(currently only supported for sequence mode without batch argument)");
597
598class GetSequenceMaskGradient : public GradientMakerBase {
600 vector<OperatorDef> GetGradientDefs() override {
601 vector<Argument> args;
602 args.reserve(Def().arg().size());
603 for (const auto& x : Def().arg()) {
604 args.push_back(x);
605 }
606 args.push_back(MakeArgument<bool>("grad", true));
607 if (def_.input_size() == 1) {
608 return SingleGradientDef(
609 "SequenceMask",
610 "",
611 vector<string>{GO(0)},
612 vector<string>{GI(0)},
613 args);
614 } else {
615 return SingleGradientDef(
616 "SequenceMask",
617 "",
618 vector<string>{GO(0), I(1)},
619 vector<string>{GI(0)},
620 args);
621 }
622 }
623
624 bool CopyArguments() const override {
625 return false;
626 }
627};
628
629REGISTER_GRADIENT(SequenceMask, GetSequenceMaskGradient);
630
631} // namespace caffe2
#define CAFFE_ENFORCE_LT(x, y,...)
Definition: Logging.h:259
OperatorDef def_
Args({2<< 5}) -> Args({2<< 8}) ->Args({2<< 12}) ->Args({2<< 14})
int32_t val
Definition: blob_test.cc:32
const size_t len_
const int * sl_
USE_OPERATOR_CONTEXT_FUNCTIONS
void CopyItemsSameDevice(const caffe2::TypeMeta meta, size_t n, const void *src, void *dst)
Definition: context_base.h:106
bool RunOnDevice() override
GradientMakerBase(const OperatorDef &def, const vector< GradientWrapper > &g_output)
bool RunOnDevice() override
int call(int id)
#define D(name, bit)
Definition: cpuid.h:72
CPUContext * context_
void forward(int64_t offset)
constexpr Symbol len(static_cast< unique_t >(_keys::aten_len))
Copyright (c) 2016-present, Facebook, Inc.
Definition: blob.h:13
const auto canonical_axis
REGISTER_CPU_OPERATOR(ATen, ATenOp< CPUContext >)
void RepeatedMaskWithFunctor(int N, int M, int D, const float *in, Functor fn, float fill_val, float *out)
d int long tensor contains the length in each of the output N dim Tensor where dim boolean false where packed_tensor is true otherwise Padding number in the packed segments Use true to pad infinity
OPERATOR_SCHEMA(ATen)
CAFFE_ENFORCE_EQ(in.size(), 1, "New shape must not be specified by the input blob and the " "argument `shape` at the same time.")
GRADIENT_OPERATOR_SCHEMA(FooGradient).NumInputs(1).NumOutputs(1)
NO_GRADIENT(SparseLengthsSumFused4BitRowwiseFakeFP16NNPI)
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Name of the Type of The sizes of any outputs besides the data and shortest side desired for image resize Defaults to[-1, -1] or no random resize desired data
REGISTER_CPU_GRADIENT_OPERATOR(BooleanMaskGradient, BooleanMaskOpGradient< CPUContext >)
Tensor of rank r
float T
Definition: cc_bmm_bg_op.h:11
we first initialize the output tensor to all and then do accumulation Any further calls to the The input tensor that has to be accumulated to the output tensor If the output size is not the same as input size
SparseLengths8BitsRowwiseOp< CPUContext, 0, 1 >::LENGTHS uint8 tensor obtained with Vector with the same sum of elements as the first dimension of DATA Input(3, "scale_bias", "Matrix of floats, each row r_i of which stores a pair " "s_i, b_i -- scale and bias for i-th row") .Output(0
Maximum number of candidates to carry over to next activation step float Tensor sized[max_activation_length, batch_size, alphabet_size] of network optional int vector containing sequence lengths
const vector< TensorShape > & in
See RoIPoolF dY
true SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, true >::WEIGHTS uint8 tensor obtained with Vector with the same sum of elements as the first dimension of DATA output
Unscaled log probabilities Optional blob to be used to weight the samples for the loss With spatial weighting is by x
const ArgumentHelper args(def)
we first initialize the output tensor to all and then do accumulation Any further calls to the input
int M
Definition: matmul_op.cc:22
See RoIPoolF Gradient of forward dX
required base learning rate default used only for inv policy type default sampling rate on iterations default True in alter policy int64_t
REGISTER_GRADIENT(CTC, GetCTCGradient)
The common world dst
SparseLengths8BitsRowwiseOp< CPUContext, 1, 1 >::LENGTHS uint8 tensor obtained with Integer vector containing indices of the first dimension of DATA for the slices that are being aggregated Matrix of each row r_i of which stores a pair b_i scale and bias for i th row Output(0, "output", "output")
The common world src
void MaskWithFunctor(int N, int M, int B, const float *in, Functor fn, float fill_val, float *out)
int N
Definition: im2col_op.cc:52
CAFFE_ENFORCE(dims.front() >=0, "Dimension ids must be non-negative.")
const float minf
string arg
Definition: setup.py:234
STL namespace.
static void accumulate(std::vector< Variable > &buffer, const size_t pos, Variable &&var)
uint8_t * data