pytorch  1.8.2
About: PyTorch provides Tensor computation (like NumPy) with strong GPU acceleration and Deep Neural Networks (in Python) built on a tape-based autograd system. LTS (Long Term Support) release.
  Fossies Dox: pytorch-1.8.2.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

learning_rate_op.cc
Go to the documentation of this file.
2
3namespace caffe2 {
5
6OPERATOR_SCHEMA(LearningRate)
7 .NumInputs(1)
8 .NumOutputs(1)
9 .TensorInferenceFunction([](const OperatorDef&,
10 const vector<TensorShape>& in) {
12 out[0] = in[0];
13 return out;
14 })
15 .SetDoc(R"DOC(
16Learning rate is a decreasing function of time. With low learning rates the
17improvements will be linear. With high learning rates they will start to look
18more exponential. Learning rate is controlled by the following arguments:
20
21Required:
22 `iterations`
23 `base_lr`: base learning rate
24 `policy`: this controls how the learning rate is applied, options are:
25 `fixed`
26 `step`: uses `stepsize`, `gamma`
27 `exp`: uses `gamma`
28 `gate`: uses 'multiplier_1', 'multiplier_2', `num_iter``
29 `inv`: uses `gamma`, `power`
30 `linearWarmup`: uses `start_multiplier`, `num_iter`
31 `constantWarmup`: uses `multiplier`, `num_iter`
32 `alter`: uses `active_first`, `active_period`, `inactive_period`
33 `hill`: uses those in both `linearWarmup` and `inv`, plus `end_multiplier`
34 `composite`: uses `sub_policy_num_iters` and additional args with format
35 `cyclic`: uses `max_lr`, `stepsize`
36 `cosine`: uses `min_lr`, `max_lr`, `period`, `t_mult`, `lr_shrink`
37 `constantThenLinearWarmup`: uses `start_warmup_multiplier`, `constant_warmup_num_iter`, `linear_warmup_num_iter`
38 `compositeCyclical`: uses `start_warmup_multiplier`, `constant_warmup_num_iter`, `linear_warmup_num_iter`, `cyclical_max_lr`, `cyclical_step_size`, `cyclical_decay`
39 `compositeCosine`: uses `start_warmup_multiplier`, `constant_warmup_num_iter`, `linear_warmup_num_iter`, `cosine_max_lr`, `cosine_period`, `cosine_t_mult`, `cosine_lr_shrink`
40 sub_policy_{sub_policy_index}_{sub_policy_arg}, for example:
41 sub_policy_0_policy: "exp", sub_policy_0_gamma: 0.99,
42 sub_policy_0_lr_scale: 1.2
43 sub_policy_0_policy: "fixed", sub_policy_0_lr_scale: 1.0
44 sub_policy_num_iters: [1000, 1000]
45
46Optional:
47 `stepsize`: defaults to 0
48 `max_lr`: defaults to 0.005
49 `gamma`: defaults to 0
50 `power`: defaults to 0
51 `num_iter`: defaults to 0
52 `start_multiplier`: defaults to 0
53 `multiplier`: defaults to 0.5
54 `multiplier_1`: defaults to 1
55 `multiplier_2`: defaults to 1
56 `m1`: defaults to 0.5, the first piece lr of piece warmup
57 `n1`: defaults to 0, iter threshold of the first piece lr
58 `m2`: defaults to 0.5, the second piece lr of piece warmup
59 `n2`: defaults to 0, iter threshold of the second piece lr
60 `m3`: defaults to 0.5, the third piece lr of piece warmup
61 `start_warmup_multiplier`: defaults to 0.1, part of constantThenLinearWarmup
62 `constant_warmup_num_iter`: defaults to 10000000, part of constantThenLinearWarmup and constantThenLinearWarmup
63 `linear_warmup_num_iter`: defaults to 10000000, part of constantThenLinearWarmup, CompositeCyclicalLRPolicy, CompositeCosineLRPolicy
64 `cyclical_max_lr`: defaults to 0.05, part of CompositeCyclicalLRPolicy
65 `cyclical_step_size`: defaults to 1000000, part of CompositeCyclicalLRPolicy
66 `cyclical_decay`: defaults to 1.0, part of CompositeCyclicalLRPolicy
67 `cosine_min_lr`:defaults to 0.01, part of CompositeCosineLRPolicy
68 `cosine_max_lr`:defaults to 0.05, part of CompositeCosineLRPolicy
69 `cosine_period`:defaults to 50, part of CompositeCosineLRPolicy
70 `cosine_t_mult`:defaults to 1.0, part of CompositeCosineLRPolicy
71 `cosine_lr_shrink`:defaults to 0.99, part of CompositeCosineLRPolicy
72
73Usage:
74 train_net.LearningRate(*iterations*, "*label*", base_lr=*float*,
75 policy="policy_name", stepsize=*int*, gamma=*float*)
76
77
78Example usage:
79 train_net.LearningRate(200, "LR", base_lr=-0.1,
80 policy="step", stepsize=20, gamma=0.9)
81)DOC")
82 .Arg("base_lr", "(float, required) base learning rate")
83 .Arg("policy", "(float, default 1.0) strategy for gamma enforcement")
84 .Arg("power", "(float, default 1.0) used only for inv policy type")
85 .Arg("gamma", "(float, default 1.0) momentum of change")
86 .Arg("stepsize", "(float, default 1.0) sampling rate on iterations")
87 .Arg("max_lr", "(float, default 0.005) max learning rate")
88 .Arg("active_first", "(boolean, default True) in alter policy")
89 .Arg("active_period", "(int64_t, required) in alter policy")
90 .Arg("inactive_period", "(int64_t, required) in alter policy")
91 .Arg(
92 "max_iter",
93 "(int, default -1) maximum iterations in this training run")
94 .Arg(
95 "num_iter",
96 "(int, default 0) number of iterations over which to warmup lr")
97 .Arg(
98 "start_multiplier",
99 "(float, default 0) starting multiplier for learning rate")
100 .Arg(
101 "end_multiplier",
102 "(float, default 0) end multiplier for learning rate")
103 .Arg(
104 "multiplier",
105 "(float, default 0.5) constant multiplier for learning rate")
106 .Arg(
107 "multiplier_1",
108 "(float, default 1) start multiplier for learning rate")
109 .Arg("multiplier_2", "(float, default 1) end multiplier for learning rate")
110 .Arg(
111 "sub_policy_num_iters",
112 "(int array, default empty) number of iterations for each sub learning rate policy in composite policy")
113 .Arg("m1", "")
114 .Arg("n1", "")
115 .Arg("m2", "")
116 .Arg("n2", "")
117 .Arg("m3", "")
118 .Arg("start_warmup_multiplier", "defaults to 0.1")
119 .Arg("constant_warmup_num_iter", "defaults to 10000000")
120 .Arg("linear_warmup_num_iter", "defaults to 10000000")
121 .Arg(
122 "cyclical_max_lr",
123 "defaults to 0.05, part of CompositeCyclicalLRPolicy")
124 .Arg(
125 "cyclical_step_size",
126 "defaults to 1000000, part of CompositeCyclicalLRPolicy")
127 .Arg(
128 "cyclical_decay",
129 "defaults to 0.999, part of CompositeCyclicalLRPolicy")
130 .Arg("cosine_min_lr", "defaults to 0.01, part of CompositeCosineLRPolicy")
131 .Arg("cosine_max_lr", "defaults to 0.05, part of CompositeCosineLRPolicy")
132 .Arg("cosine_period", "defaults to 50, part of CompositeCosineLRPolicy")
133 .Arg("cosine_t_mult", "defaults to 1,0, part of CompositeCosineLRPolicy")
134 .Arg(
135 "cosine_lr_shrink",
136 "defaults to 0.99, part of CompositeCosineLRPolicy")
137 .Input(0, "input", "description needed")
138 .Output(0, "output", "description needed")
139 .DeviceInferenceFunction([](const OperatorDef& def) {
140 return std::make_pair(
141 std::vector<DeviceOption>{DeviceOption()},
142 std::vector<DeviceOption>{def.device_option()});
143 });
144
145NO_GRADIENT(LearningRate);
146} // namespace caffe2
147
150
152 LearningRate,
153 "_caffe2::LearningRate("
154 "Tensor iterations, "
155 "float base_lr,"
156 "str policy, "
157 "float? power = 1.0, "
158 "float? gamma = 1.0, "
159 "int? stepsize = 1, "
160 "float? max_lr = 0.005, "
161 "bool? active_first = True, "
162 "int? active_period = -1, "
163 "int? inactive_period = -1, "
164 "int? max_iter = -1, "
165 "int? num_iter = 0, "
166 "float? start_multiplier = 0, "
167 "float? end_multiplier = 0, "
168 "float? multiplier = 0.5, "
169 "float? multiplier_1 = 1.0, "
170 "float? multiplier_2 = 1.0, "
171 "int[]? sub_policy_num_iters = None, "
172 "float? m1 = 0.5, "
173 "float? n1 = 0, "
174 "float? m2 = 0.5, "
175 "float? n2 = 0, "
176 "float? m3 = 0.5, "
177 "float? start_warmup_multiplier = 0.1, "
178 "int? constant_warmup_num_iter = 10000000, "
179 "int? linear_warmup_num_iter = 10000000, "
180 "float? cyclical_max_lr = 0.05, "
181 "int? cyclical_step_size = 1000000, "
182 "float? cyclical_decay = 0.999, "
183 "float? cosine_min_lr = 0.01, "
184 "float? cosine_max_lr = 0.05, "
185 "int? cosine_period = 50, "
186 "float? cosine_t_mult = 1.0, "
187 "float? cosine_lr_shrink = 0.99, "
188 "float? decay = 1.0) -> Tensor output",
C10_EXPORT_CAFFE2_OP_TO_C10_CPU(LearningRate, "_caffe2::LearningRate(" "Tensor iterations, " "float base_lr," "str policy, " "float? power = 1.0, " "float? gamma = 1.0, " "int? stepsize = 1, " "float? max_lr = 0.005, " "bool? active_first = True, " "int? active_period = -1, " "int? inactive_period = -1, " "int? max_iter = -1, " "int? num_iter = 0, " "float? start_multiplier = 0, " "float? end_multiplier = 0, " "float? multiplier = 0.5, " "float? multiplier_1 = 1.0, " "float? multiplier_2 = 1.0, " "int[]? sub_policy_num_iters = None, " "float? m1 = 0.5, " "float? n1 = 0, " "float? m2 = 0.5, " "float? n2 = 0, " "float? m3 = 0.5, " "float? start_warmup_multiplier = 0.1, " "int? constant_warmup_num_iter = 10000000, " "int? linear_warmup_num_iter = 10000000, " "float? cyclical_max_lr = 0.05, " "int? cyclical_step_size = 1000000, " "float? cyclical_decay = 0.999, " "float? cosine_min_lr = 0.01, " "float? cosine_max_lr = 0.05, " "int? cosine_period = 50, " "float? cosine_t_mult = 1.0, " "float? cosine_lr_shrink = 0.99, " "float? decay = 1.0) -> Tensor output", LearningRateOpFloatCPU)
def DeviceOption(device_type, device_id=0, random_seed=None, node_name=None, numa_node_id=None, extra_info=None)
Definition: core.py:101
Copyright (c) 2016-present, Facebook, Inc.
Definition: blob.h:13
REGISTER_CPU_OPERATOR(ATen, ATenOp< CPUContext >)
OPERATOR_SCHEMA(ATen)
return vector< TensorShape >
Definition: slice_op.cc:110
NO_GRADIENT(SparseLengthsSumFused4BitRowwiseFakeFP16NNPI)
const vector< TensorShape > & in
SparseLengths8BitsRowwiseOp< CPUContext, 0, 1 >::LENGTHS SetDoc(R"DOC( Variation of SparseLengthsMean operator, where DATA is stored using 8bits. DATA was quantized with 8Bit row-wise quantization (see doc to FloatToRowwiseQuantized8Bits operator). To restore DATA from 8Bit, we use additional input that stores scales and biases. )DOC") .Input(0