pytorch  1.8.2
About: PyTorch provides Tensor computation (like NumPy) with strong GPU acceleration and Deep Neural Networks (in Python) built on a tape-based autograd system. LTS (Long Term Support) release.
  Fossies Dox: pytorch-1.8.2.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

input_buffer.cpp
Go to the documentation of this file.
2
5#include <c10/core/Event.h>
6#include <c10/util/Optional.h>
7
8#include <cstddef>
9#include <utility>
10#include <vector>
11
12namespace torch { namespace autograd {
13
14 static void accumulate(std::vector<Variable>& buffer,
15 const size_t pos,
16 Variable&& var) {
17 TORCH_INTERNAL_ASSERT(pos < buffer.size());
18 auto& old_var = buffer[pos];
19 // ATen doesn't route sparse additions correctly...
20 // do dense + sparse in-place if possible
21 if (old_var.is_sparse()) {
22 //storage use_count is a big hammer, but for anything lighter there's an adversarial example with unexpected inplace modification
23 if (!var.is_sparse() && var.is_contiguous() && var.storage().use_count() == 1) {
24 buffer[pos] = var.add_(old_var);
25 } else {
26 buffer[pos] = var + old_var;
27 }
28 } else {
29 if (var.is_sparse() && !old_var.is_sparse() && old_var.is_contiguous() && old_var.storage().use_count() == 1) {
30 buffer[pos] = old_var.add_(var);
31 } else {
32 buffer[pos] = old_var + var;
33 }
34 }
35 }
36
37 void InputBuffer::add(size_t pos,
38 Variable&& var,
39 const c10::optional<c10::Stream>& opt_producer_stream,
40 const c10::optional<c10::Stream>& opt_consumer_stream) {
41 TORCH_INTERNAL_ASSERT(pos < buffer.size());
42 if (!var.defined()) {
43 return;
44 }
45
46 // Switches to accumulate device
47 // The device (and stream) chosen for accumulation is:
48 // (1) var is not a CUDA variable. Accumulation happens on var's device.
49 // (2) var is a CUDA variable and it, the consumer, and the producer share the same device:
50 // (2a) Uses the consumer's stream as the accumulation stream
51 // (2b) Syncs the accumulation stream with the producer's stream (if different)
52 // (2c) Accumulates.
53 // (3) var is a CUDA variable and it shares a device with the consumer but not the producer:
54 // (3a) Uses the consumer's stream as the accumulation stream
55 // (3b) Syncs the accumulation stream with the consumer device's default stream
56 // (3c) Accumulates.
57 // (4) var is a CUDA variable and it shares a device with the producer but not the consumer:
58 // (4a) Uses the producer device's default stream as the accumulation stream
59 // (4b) Syncs the accumulation stream with the the producer's stream
60 // (4c) Accumulates.
61 // (5) var is a CUDA variable and it does not share a device with the consumer or producer.
62 // Accumulation happens on the var device's default stream.
63
65 c10::optional<c10::Stream> opt_accumulate_stream = c10::nullopt;
66 if (device_of(var)->is_cuda()) {
67 const auto on_producer = opt_producer_stream
68 && device_of(var) == opt_producer_stream->device();
69 const auto on_consumer = opt_consumer_stream
70 && device_of(var) == opt_consumer_stream->device();
71 if (on_producer && on_consumer) {
72 // (2a)
73 opt_accumulate_stream = opt_consumer_stream;
74 if (opt_accumulate_stream != opt_producer_stream) {
75 // (2b)
77 event.record(*opt_producer_stream);
78 opt_accumulate_stream->wait(event);
79 }
80 } else {
83 if (on_consumer && !on_producer) {
84 // (3a)
85 opt_accumulate_stream = opt_consumer_stream;
86 opt_sync_stream = guard.getDefaultStream(opt_consumer_stream->device());
87 } else if (on_producer && !on_consumer) {
88 // (4a)
89 opt_accumulate_stream = guard.getDefaultStream(opt_producer_stream->device());
90 opt_sync_stream = opt_producer_stream;
91 } else {
92 // (5)
93 opt_accumulate_stream = guard.getDefaultStream(*device_of(var));
94 }
95 if (opt_sync_stream && (opt_accumulate_stream != opt_sync_stream)) {
96 // (3b), (4b)
97 c10::OptionalDeviceGuard device_guard{opt_sync_stream->device()};
99 event.record(*opt_sync_stream);
100 opt_accumulate_stream->wait(event);
101 }
102 }
103 }
104
105 auto& old_var = buffer[pos];
106 if (!old_var.defined()) {
107 buffer[pos] = std::move(var);
108 } else {
109 if (opt_accumulate_stream) {
110 c10::OptionalStreamGuard stream_guard{opt_accumulate_stream};
112 } else {
113 // (1) non-CUDA variable
114 // Accumulation happens on variable's device
117 }
118 }
119}
120
122 // Since we pick the first non-CPU tensor, this won't work with
123 // mixed device-type operations (e.g., an op that is both CUDA
124 // and XLA). This is *incredibly* unlikely, so we don't worry
125 // about it.
126 for (auto& var : buffer) {
127 if (var.defined()) {
128 auto device = var.device();
129 if (device.type() != at::kCPU) {
130 return device;
131 }
132 }
133 }
134 // Only report to the CPU thread if there really were no tensors
135 // from other devices.
136 return at::kCPU;
137}
138
139auto InputBuffer::variables(InputBuffer&& g) -> std::vector<Variable> {
140 std::vector<Variable> result = std::move(g.buffer);
141 return result;
142}
143
144}} // namespace torch::autograd
#define TORCH_INTERNAL_ASSERT(cond,...)
Definition: Exception.h:290
A OptionalDeviceGuard is an RAII class that sets a device to some value on initialization,...
Definition: DeviceGuard.h:119
An implementation of DeviceGuardImplInterface which delegates to virtual dispatch on the DeviceGuardI...
Distributions kernel adapted from THRandom.cpp The kernels try to follow std::random distributions si...
optional< Device > device_of(const Tensor &t)
Return the Device of a Tensor, if the Tensor is defined.
Definition: DeviceGuard.h:17
constexpr remove_reference_t< T > && move(T &&t) noexcept
Definition: variant.h:418
constexpr Symbol is_cuda(static_cast< unique_t >(_keys::prim_is_cuda))
constexpr nullopt_t nullopt
Definition: Optional.h:145
constexpr DeviceType kCPU
Definition: DeviceType.h:36
default The input dimensional tensor of shape $NCHW$ or $NHWC$ depending on the order parameter The bias as a dimensional tensor of size $C$ to be applied to the output var
static void accumulate(std::vector< Variable > &buffer, const size_t pos, Variable &&var)
Copyright (c) 2016-present, Facebook, Inc.
Represents a a compute device on which a tensor is located.
Definition: Device.h:30
DeviceType type() const noexcept
Returns the type of device this is.
Definition: Device.h:65
A backend-generic movable, not copyable, not thread-safe event.
Definition: Event.h:40
An OptionalStreamGuard is an RAII class that sets a device to some value on initialization,...
Definition: StreamGuard.h:80
std::vector< Variable > buffer
Definition: input_buffer.h:43
void add(size_t pos, Variable &&var, const c10::optional< c10::Stream > &opt_producer_stream, const c10::optional< c10::Stream > &opt_consumer_stream)
static std::vector< Variable > variables(InputBuffer &&g)