32 const int excludeDim = -1) {
34 excludeDim >= -1 && excludeDim <
dims,
35 "expected excluded dim between -1 and dims - 1");
37 int64_t stopDim = (excludeDim == -1) ?
dims : excludeDim;
40 int64_t remappedExcludedDim = -1;
42 while (oldIndex <
dims) {
44 for (; oldIndex < stopDim; ++oldIndex) {
45 if (
sizes[oldIndex] == 1) {
57 for (; oldIndex < stopDim; ++oldIndex) {
58 if (
sizes[oldIndex] == 1) {
73 if (oldIndex !=
dims) {
78 remappedExcludedDim = newIndex;
87 if (newIndex == -1 || (newIndex == 0 &&
sizes[0] == 1)) {
92 return std::pair<int64_t, int64_t>(0, 1);
96 return std::pair<int64_t, int64_t>(remappedExcludedDim,
dims);
137 return strides[i1] > strides[i2];
143template <
typename T,
int N>
199 for (
size_t i = 1; i <
tensors.size(); i++) {
207 std::ostringstream oss;
208 oss <<
"inconsistent tensor size, expected ";
209 for (
size_t i = 0; i <
tensors.size() - 1; i++) {
210 oss <<
tensors[i].sizes() <<
", ";
213 <<
" to have the same number of elements, but got ";
214 for (
size_t i = 0; i <
tensors.size() - 1; i++) {
215 oss <<
tensors[i].numel() <<
", ";
218 <<
" elements respectively";
243template <
typename Arg,
typename...
Args>
254template <
typename Arg,
typename...
Args>
264template <
typename Arg,
typename...
Args>
273template <
typename Arg,
typename...
Args>
277 if (
iter.counter_[i] ==
iter.sizes_[i]) {
278 iter.counter_[i] = 0;
279 iter.counter_[i - 1]++;
281 iter.strides_[i - 1];
290template <
typename Arg,
typename...
Args>
295 multi = multi /
iter.sizes_[i];
297 iter.counter_[i] += inc;
306template <
typename Arg,
typename...
Args>
313template <
typename Op,
typename...
Args>
344template <
typename scalar1,
typename scalar2,
typename Op>
365template <
typename scalar1,
typename scalar2,
typename scalar3,
typename Op>
#define TORCH_CHECK(cond,...)
c10::SmallVector< int64_t, 5 > sizes
c10::SmallVector< int64_t, 5 > strides
Args({2<< 5}) -> Args({2<< 8}) ->Args({2<< 12}) ->Args({2<< 14})
int64_t ndimension() const
IntArrayRef strides() const
Distributions kernel adapted from THRandom.cpp The kernels try to follow std::random distributions si...
bool _apply_preamble(ArrayRef< Tensor > tensors)
void CPU_tensor_apply2(Tensor tensor1, Tensor tensor2, const Op op)
void CPU_tensor_apply4(Tensor tensor1, Tensor tensor2, Tensor tensor3, Tensor tensor4, const Op op)
void checkLayout(CheckedFrom c, const Tensor &t, Layout layout)
void checkDeviceType(CheckedFrom c, const Tensor &t, DeviceType device_type)
bool _all_equal_numel(at::ArrayRef< Tensor > tensors)
Tensor sort_strides(Tensor &tensor_)
void forward(int64_t offset)
void iterate(int64_t size)
void CPU_tensor_apply3(Tensor tensor1, Tensor tensor2, Tensor tensor3, const Op op)
int64_t max_iterate_size()
int64_t numel(const Tensor &tensor)
int64_t size(const Tensor &tensor, int64_t dim)
std::string _all_equal_numel_error(at::ArrayRef< Tensor > tensors)
int64_t _max_dim_tensors(ArrayRef< Tensor > tensors)
std::pair< int64_t, int64_t > collapse_dims(T *sizes, T *strides, int64_t dims, const int excludeDim=-1)
constexpr DeviceType kCPU
core.CreateOperator("Slice",["X"],["Y"], starts=(0, 1), ends=(-1, 3)) workspace.FeedBlob("X", np.array()) print("X:", workspace.FetchBlob("X")) workspace.RunOperatorOnce(op) print("Y:", workspace.FetchBlob("Y")) ``` **Result **``` X:Y:```</details >) DOC") .Input(0, "X", "(*Tensor *):tensor to extract slices from") .Input( 1, "starts", "(*Tensor`< int >` *):1D tensor of start-indices for each dimension of data(dimensions following the sliced one might be omitted)") .Input( 2, "ends", "(*Tensor`< int >` *):1D tensor of end-indices for each dimension of data(dimensions following the sliced one might be omitted)") .Arg("starts", "(*Tuple(int) *):list of starting indices") .Arg("ends", "(*Tuple(int) *):list of ending indices") .TensorInferenceFunction([](const OperatorDef& def, const vector<TensorShape>& in) { if (in.size() > 1) { return vector<TensorShape>() op
parameter efficient embedding termed TT which can be plugged in into any model and trained end to end The benefits of our compressed TT layer are twofold instead of storing huge embedding it stores a sequence of much smaller dimensional and dimensional tensors
for each weights are accessed by indices[0..L-1]
*and produces a single output tensor *expanded *The op also takes an argument *dims *with a list of dimensions for where to add the single dimensional entries If the same blob is provided as input and the operation is copy free This is the exact inverse operation of *Squeeze *Github dims
required base learning rate default used only for inv policy type default sampling rate on iterations default True in alter policy int64_t
c10::BFloat16 max(c10::BFloat16 a, c10::BFloat16 b)
c10::BFloat16 min(c10::BFloat16 a, c10::BFloat16 b)
typename function_traits< Func >::template arg< i >::type Arg
strided_tensor_iter_fixed(Tensor &tensor, bool sort_strides=false)
void operator=(strided_tensor_iter_fixed const &x)=delete
strided_tensor_iter_fixed(strided_tensor_iter_fixed &&)=default
strided_tensor_iter_fixed(strided_tensor_iter_fixed const &)=delete
std::vector< int64_t > strides_
strided_tensor_iter(Tensor &tensor)
std::vector< int64_t > counter_
strided_tensor_iter(strided_tensor_iter &&)=default
std::vector< int64_t > sizes_
strided_tensor_iter(strided_tensor_iter const &)=delete
void operator=(strided_tensor_iter const &x)=delete