pytorch  1.8.2
About: PyTorch provides Tensor computation (like NumPy) with strong GPU acceleration and Deep Neural Networks (in Python) built on a tape-based autograd system. LTS (Long Term Support) release.
  Fossies Dox: pytorch-1.8.2.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

operator_gradient.h
Go to the documentation of this file.
1#ifndef CAFFE2_CORE_OPERATOR_GRADIENT_H_
2#define CAFFE2_CORE_OPERATOR_GRADIENT_H_
3
4#include "c10/util/Registry.h"
8
9namespace caffe2 {
10
11/* @brief A struct that abstracts on top of dense and sparse blobs.
12 *
13 * For a dense blob, its gradient name should be written into dense_, and for
14 * a sparse blob, its gradient name should be written into indice_ for
15 * the sparse indices and value_ for the values.
16 */
18 string dense_;
19 string indices_;
20 string values_;
21
22 inline bool IsDense() const {
23 return (dense_.size() != 0);
24 }
25 inline bool IsSparse() const {
26 return (indices_.size() != 0 || values_.size() != 0);
27 }
28 inline bool IsEmpty() const {
29 return (!IsDense() && !IsSparse());
30 }
31};
32
33/**
34 * A struct that holds the gradient operators and related gradient maps.
35 */
37 vector<OperatorDef> ops_;
38 vector<GradientWrapper> g_input_;
39
42 const vector<OperatorDef>& ops,
43 const vector<GradientWrapper>& v)
44 : ops_(ops), g_input_(v) {}
45};
46
48 public:
50 const OperatorDef& def,
51 const vector<GradientWrapper>& g_output)
52 : def_(def), g_output_(g_output), g_input_(def.input_size()){};
53 virtual ~GradientMakerBase() {}
54 virtual bool CopyDeviceOption() const {
55 return true;
56 }
57 virtual bool CopyEngine() const {
58 return true;
59 }
60 virtual bool CopyArguments() const {
61 return true;
62 }
63
64 virtual void VerifyOp() const {
65 auto* schema = OpSchemaRegistry::Schema(def_.type());
66 if (schema) {
68 schema->Verify(def_),
69 "(GradientMaker) Operator def did not pass schema checking: ",
71 }
72 }
73
74 /**
75 * @brief Returns the gradient ops meta.
76 *
77 * If your gradient op generator only use standard input and output
78 * manipulations, you can simply implement GetGradientDefs() that
79 * returns vector<OperatorDef>. In that, you can call GI, GI_V and GI_I
80 * that will automatically create the gradient registration for you.
81 *
82 * If you need to do custom gradient name registration, overload this
83 * function directly.
84 */
85 virtual GradientOpsMeta Get() {
86 VerifyOp();
87 vector<OperatorDef> new_defs = GetGradientDefs();
88 for (auto& opdef : new_defs) {
89 opdef.set_is_gradient_op(true);
90 }
91 return GradientOpsMeta(new_defs, g_input_);
92 };
93
94 const OperatorDef& Def() const {
95 return def_;
96 }
97
98 protected:
99 virtual vector<OperatorDef> GetGradientDefs() {
101 }
102
103 // Helper functions to return names for the gradient computation.
104 // I(idx), O(idx): return the input and output names.
105 // GO(idx): return the name of the gradient for output idx.
106 // GI(idx), GI_I(idx), GI_V(idx): return the name of the gradient for
107 // input idx, and also registers that name into the gradient
108 // registry to be returned.
109 string I(const int i) {
110 CAFFE_ENFORCE((i >= 0) && (i < def_.input().size()));
111 return def_.input(i);
112 }
113 string O(const int i) {
114 CAFFE_ENFORCE((i >= 0) && (i < def_.output().size()));
115 return def_.output(i);
116 }
117 string GI(const int i) {
119 !g_input_.at(i).IsSparse(),
120 "Input ",
121 def_.input(i),
122 " already set to sparse.");
123 g_input_.at(i).dense_ = GradientName(def_.input(i));
124 return GradientName(def_.input(i));
125 }
126 string GI_I(const int i) {
128 !g_input_.at(i).IsDense(),
129 "Input ",
130 def_.input(i),
131 " already set to dense.");
132 g_input_.at(i).indices_ = GradientSliceIndices(def_.input(i));
133 return GradientSliceIndices(def_.input(i));
134 }
135 string GI_V(const int i) {
137 !g_input_.at(i).IsDense(),
138 "Input ",
139 def_.input(i),
140 " already set to dense.");
141 g_input_.at(i).values_ = GradientSliceValues(def_.input(i));
142 return GradientSliceValues(def_.input(i));
143 }
144 string GO(const int i) {
146 g_output_.at(i).IsDense(),
147 "Gradient of output ",
148 def_.output(i),
149 (g_output_.at(i).IsSparse() ? " is sparse (expected dense)."
150 : " is not provided!"));
151 return g_output_.at(i).dense_;
152 }
153 string GO_I(const int i) {
155 g_output_.at(i).IsSparse(),
156 "Gradient of output ",
157 def_.output(i),
158 (g_output_.at(i).IsDense() ? " is dense (expected sparse)."
159 : " is not provided!"));
160 return g_output_.at(i).indices_;
161 }
162 string GO_V(const int i) {
164 g_output_.at(i).IsSparse(),
165 "Gradient of output ",
166 def_.output(i),
167 (g_output_.at(i).IsDense() ? " is dense (expected sparse)."
168 : " is not provided!"));
169 return g_output_.at(i).values_;
170 }
171 const GradientWrapper& GradOut(int i) {
172 return g_output_.at(i);
173 }
174
175 // Function to add a gradient pair to map.
176 void SetDense(const int i, const string& name) {
178 !g_input_.at(i).IsSparse(),
179 "Input ",
180 def_.input(i),
181 " already set to sparse.");
182 g_input_.at(i).dense_ = name;
183 }
184 void SetSparse(const int i, const string& indices, const string& values) {
186 !g_input_.at(i).IsDense(),
187 "Input ",
188 def_.input(i),
189 " already set to dense.");
190 g_input_.at(i).indices_ = indices;
191 g_input_.at(i).values_ = values;
192 }
193
194 /**
195 * @brief a helper function to allow one to create one single operator
196 * def, which is usually the case for many simple operators.
197 */
198 template <class... Args>
199 inline static vector<OperatorDef> SingleGradientDef(const Args&... args) {
200 return vector<OperatorDef>{CreateOperatorDef(args...)};
201 }
202
203 public:
204 /**
205 * Returns map that returns the parameters that the gradients are for.
206 */
207 static CaffeMap<string, string> MatchGradsToParams(const OperatorDef& op) {
208 // NOTE: how to go beyond string-matching?
210 for (auto& out : op.output()) {
211 if (IsGradientBlob(out)) {
212 m[out] = out.substr(0, out.length() - 5);
213 }
214 }
215 return m;
216 }
217
218 private:
219 // Utility functions for gradient name computation. We don't expose them
220 // in order to discourage the use of such names explicitly.
221 static string GradientName(const string& name) {
222 return name + "_grad";
223 }
224
225 static bool IsGradientBlob(const string& name) {
226 return name.length() > 5 && name.find("_grad") == name.length() - 5;
227 }
228
229 static string GradientNameToParam(const string& name) {
230 CHECK(IsGradientBlob(name));
231 return name.substr(0, name.length() - 5);
232 }
233
234 static string GradientSliceIndices(const string& name) {
235 return name + "_grad_indices";
236 }
237
238 static string GradientSliceValues(const string& name) {
239 return name + "_grad_values";
240 }
241
242 protected:
243 // We make the member variables protected in case someone wants to write
244 // a fully custom Get() function.
245 const OperatorDef& def_;
246 const vector<GradientWrapper>& g_output_;
247 vector<GradientWrapper> g_input_;
248};
249
250/**
251 * @brief A helper class to indicate that the operator does not need gradient
252 * computation.
253 *
254 * Use the macro NO_GRADIENT to register operators that do not have gradients.
255 * Note that this is different fron SHOULD_NOT_DO_GRADIENT: the latter means
256 * that the gradient computation should not flow through it at all, and throws
257 * an error if it is called.
258 */
261 vector<OperatorDef> GetGradientDefs() override {
262 return vector<OperatorDef>();
263 }
264};
265
266/**
267 * @brief A helper class to indicate that the operator should have no gradient.
268 *
269 * This is used when the operator definition is designed to not have a gradient.
270 * Calling a gradient on this operator def will cause Caffe2 to quit.
271 */
274 GradientOpsMeta Get() override {
275 CAFFE_THROW("One should not call gradient for operator ", def_.type(), ".");
276 }
277};
278
279/**
280 * @brief A helper class to indicate that the gradient mechanism is not ready.
281 *
282 * This should only be used sparsely when the gradient does exist, but we have
283 * not implemented it yet and are using this as a lazy excuse. Eventually, a
284 * gradient operator should be implemented.
285 */
288 GradientOpsMeta Get() override {
290 "Operator ",
291 def_.type(),
292 " should have a gradient but is not implemented yet.");
293 }
294};
295
297 GradientRegistry,
299 const OperatorDef&,
300 const vector<GradientWrapper>&);
301
302#ifdef CAFFE2_NO_GRADIENT_OPS
303
304#define REGISTER_GRADIENT(name, ...) /* No gradients. */
305#define REGISTER_GRADIENT_STR(str_name, ...) /* No gradients. */
306
307#else
308
309#define REGISTER_GRADIENT(name, ...) \
310 C10_REGISTER_CLASS(GradientRegistry, name, __VA_ARGS__)
311#define REGISTER_GRADIENT_STR(str_name, ...) \
312 C10_REGISTER_TYPED_CLASS(GradientRegistry, str_name, __VA_ARGS__)
313
314#endif
315
316// NO_GRADIENT means that the operator does not need any gradient computation.
317#define NO_GRADIENT(name) REGISTER_GRADIENT(name, NoGradient)
318
319// SHOULD_NOT_DO_GRADIENT means that the operator is not designed to have
320// gradient operators. If you attempt to call the gradient, a log fatal will
321// occur.
322#define SHOULD_NOT_DO_GRADIENT(name) \
323 REGISTER_GRADIENT(name, ThrowInTheTowelIfGradientIsCalled)
324
325#define GRADIENT_NOT_IMPLEMENTED_YET(name) \
326 REGISTER_GRADIENT(name, GradientNotImplementedYet)
327
328/**
329 * @brief Gets the GradientOpsMeta for the given operator def.
330 */
331TORCH_API GradientOpsMeta GetGradientForOp(
332 const OperatorDef& def,
333 const vector<GradientWrapper>& g_output);
334
335} // namespace caffe2
336
337#endif // CAFFE2_CORE_OPERATOR_GRADIENT_H_
#define TORCH_API
Definition: Export.h:98
#define CAFFE_THROW(...)
Definition: Logging.h:125
OperatorDef def_
Args({2<< 5}) -> Args({2<< 8}) ->Args({2<< 12}) ->Args({2<< 14})
#define CAFFE_NOT_IMPLEMENTED
Definition: common.h:54
string GI_V(const int i)
string GO_V(const int i)
const OperatorDef & Def() const
virtual GradientOpsMeta Get()
Returns the gradient ops meta.
void SetSparse(const int i, const string &indices, const string &values)
const vector< GradientWrapper > & g_output_
const OperatorDef & def_
const GradientWrapper & GradOut(int i)
virtual bool CopyEngine() const
vector< GradientWrapper > g_input_
virtual bool CopyArguments() const
virtual bool CopyDeviceOption() const
static bool IsGradientBlob(const string &name)
GradientMakerBase(const OperatorDef &def, const vector< GradientWrapper > &g_output)
void SetDense(const int i, const string &name)
static vector< OperatorDef > SingleGradientDef(const Args &... args)
a helper function to allow one to create one single operator def, which is usually the case for many ...
static string GradientNameToParam(const string &name)
static string GradientName(const string &name)
static string GradientSliceIndices(const string &name)
virtual vector< OperatorDef > GetGradientDefs()
string GI_I(const int i)
static string GradientSliceValues(const string &name)
string GO_I(const int i)
static CaffeMap< string, string > MatchGradsToParams(const OperatorDef &op)
Returns map that returns the parameters that the gradients are for.
virtual void VerifyOp() const
A helper class to indicate that the operator does not need gradient computation.
vector< OperatorDef > GetGradientDefs() override
static const OpSchema * Schema(const string &key)
std::string name
#define CHECK(condition)
Copyright (c) 2016-present, Facebook, Inc.
Definition: blob.h:13
C10_EXPORT string ProtoDebugString(const Message &proto)
Definition: proto_utils.cc:195
C10_DECLARE_REGISTRY(BlobDeserializerRegistry, BlobDeserializerBase)
core.CreateOperator("Slice",["X"],["Y"], starts=(0, 1), ends=(-1, 3)) workspace.FeedBlob("X", np.array()) print("X:", workspace.FetchBlob("X")) workspace.RunOperatorOnce(op) print("Y:", workspace.FetchBlob("Y")) ``` **Result **``` X:Y:```</details >) DOC") .Input(0, "X", "(*Tensor *):tensor to extract slices from") .Input( 1, "starts", "(*Tensor`< int >` *):1D tensor of start-indices for each dimension of data(dimensions following the sliced one might be omitted)") .Input( 2, "ends", "(*Tensor`< int >` *):1D tensor of end-indices for each dimension of data(dimensions following the sliced one might be omitted)") .Arg("starts", "(*Tuple(int) *):list of starting indices") .Arg("ends", "(*Tuple(int) *):list of ending indices") .TensorInferenceFunction([](const OperatorDef& def, const vector<TensorShape>& in) { if (in.size() > 1) { return vector<TensorShape>() op
Definition: slice_op.cc:82
for each weights are accessed by indices[0..L-1]
const ArgumentHelper args(def)
bucketize it based on the boundary values and then do one hot encoding The lengths specifies the number of boundary values for each column The final number of buckets is this number plus This would also be the expanded feature size boundaries specifies all the boundary values Note that each bucket is right inclusive That given boundary values[b1, b2, b3]
Definition: one_hot_ops.cc:225
std::map< Key, Value > CaffeMap
Definition: common.h:41
OperatorDef CreateOperatorDef(const string &type, const string &name, const IterableInputs &inputs, const IterableOutputs &outputs, const IterableArgs &args, const DeviceOption &device_option=DeviceOption(), const string &engine="")
Definition: proto_utils.h:141
GradientOpsMeta GetGradientForOp(const OperatorDef &def, const vector< GradientWrapper > &g_output)
Gets the GradientOpsMeta for the given operator def.
Definition: operator.cc:408
CAFFE_ENFORCE(dims.front() >=0, "Dimension ids must be non-negative.")
Module caffe2.python.schema.
auto ops
A helper class to indicate that the gradient mechanism is not ready.
GradientOpsMeta Get() override
Returns the gradient ops meta.
A struct that holds the gradient operators and related gradient maps.
GradientOpsMeta(const vector< OperatorDef > &ops, const vector< GradientWrapper > &v)
vector< GradientWrapper > g_input_
vector< OperatorDef > ops_
A helper class to indicate that the operator should have no gradient.
GradientOpsMeta Get() override
Returns the gradient ops meta.