pytorch
1.8.2
About: PyTorch provides Tensor computation (like NumPy) with strong GPU acceleration and Deep Neural Networks (in Python) built on a tape-based autograd system. LTS (Long Term Support) release.
![]() ![]() |
Distributions kernel adapted from THRandom.cpp The kernels try to follow std::random distributions signature For instance: in ATen auto gen = at::detail::createCPUGenerator(); at::uniform_real_distribution<double> uniform(0, 1); auto sample = uniform(gen.get());. More...
Namespaces | |
namespace | $ |
namespace | autocast |
namespace | cpp_custom_type_hack |
namespace | cpu |
namespace | cuda |
namespace | detail |
namespace | impl |
namespace | indexing |
namespace | internal |
namespace | meta |
namespace | metal |
namespace | namedinference |
namespace | native |
namespace | Reduction |
namespace | sequence_number |
namespace | sparse |
namespace | tracer |
namespace | transformation |
namespace | vec256 |
namespace | vitals |
namespace | vml |
namespace | vulkan |
Enumerations | |
enum class | NameType : uint8_t { BASIC , WILDCARD } |
enum class | MemOverlap { NO , YES , TOO_HARD } |
enum class | MemOverlapStatus { FULL , PARTIAL , NO , TOO_HARD } |
enum class | FastSetupType : uint8_t { NONE , CONTIGUOUS , CHANNELS_LAST , NON_OVERLAPPING_DENSE } |
Functions | |
static SmallVector< indexing::TensorIndex, kVmapStaticDimVecSize > | computeIndex (int64_t linear_idx, IntArrayRef sizes) |
static bool | areAllReturnsTensors (const FunctionSchema &schema) |
static bool | areAnyArgumentsTensorList (const FunctionSchema &schema) |
static bool | isInplaceOp (const c10::FunctionSchema &schema) |
static void | warnFallback (const c10::FunctionSchema &schema, bool is_inplace) |
void | batchedTensorInplaceForLoopFallback (const c10::OperatorHandle &op, torch::jit::Stack *stack) |
static Tensor | safeStack (TensorList tensors) |
void | batchedTensorForLoopFallback (const c10::OperatorHandle &op, torch::jit::Stack *stack) |
Tensor | makeBatched (const Tensor &tensor, BatchDims bdims) |
Tensor | addBatchDim (const Tensor &tensor, int64_t level, int64_t dim) |
bool | inplaceIsVmapCompatible (const Tensor &self, const Tensor &other) |
bool | isBatchedTensor (const Tensor &tensor) |
BatchedTensorImpl * | unsafeGetBatchedImpl (Tensor tensor) |
BatchedTensorImpl * | maybeGetBatchedImpl (Tensor tensor) |
std::bitset< kVmapMaxTensorDims > | createBatchDimBitset (BatchDimsRef bdims) |
std::bitset< kVmapNumLevels > | createVmapLevelsBitset (BatchDimsRef bdims) |
std::ostream & | operator<< (std::ostream &out, const BatchDim &bdim) |
static bool | is_allowed_dim_on_scalar_tensor (int64_t dim) |
Tensor | sum_batching_rule (const Tensor &self, IntArrayRef dims, bool keepdim, optional< ScalarType > dtype) |
bool | isPhysicalScalarTensor (const Tensor &logical_tensor) |
template<typename F , F Func, typename... ExtraArgs> | |
Tensor | binary_pointwise_batching_rule (const Tensor &self, const Tensor &other, ExtraArgs... args) |
Tensor | expand_batching_rule (const Tensor &self, IntArrayRef size, bool implicit) |
std::vector< Tensor > | chunk_batching_rule (const Tensor &self, int64_t chunks, int64_t dim) |
Tensor | clamp_batching_rule (const Tensor &self, optional< Scalar > min, optional< Scalar > max) |
Tensor | clamp_min_batching_rule (const Tensor &self, Scalar min) |
Tensor | clamp_max_batching_rule (const Tensor &self, Scalar max) |
std::vector< Tensor > | tensor_split_sections_batching_rule (const Tensor &self, int64_t sections, int64_t dim) |
std::vector< Tensor > | tensor_split_indices_batching_rule (const Tensor &self, IntArrayRef indices, int64_t dim) |
Tensor | unsqueeze_batching_rule (const Tensor &self, int64_t dim) |
Tensor & | fill_inplace_scalar_batching_rule (Tensor &self, Scalar value) |
Tensor & | fill_inplace_tensor_batching_rule (Tensor &self, const Tensor &value) |
Tensor & | zero_inplace_batching_rule (Tensor &self) |
Tensor | squeeze_batching_rule (const Tensor &self) |
Tensor | squeeze_dim_batching_rule (const Tensor &self, int64_t dim) |
Tensor | trace_batching_rule (const Tensor &self) |
Tensor | trace_backward_batching_rule (const Tensor &grad, IntArrayRef input_sizes) |
Tensor | transpose_int_batching_rule (const Tensor &self, int64_t dim0, int64_t dim1) |
Tensor | permute_batching_rule (const Tensor &self, IntArrayRef dims) |
Tensor | select_batching_rule (const Tensor &self, int64_t dim, int64_t index) |
static int64_t | getGradInputPhysicalDim (int64_t dim, IntArrayRef input_sizes, int64_t num_batch_dims) |
Tensor | select_backward_batching_rule (const Tensor &grad, IntArrayRef input_sizes, int64_t dim, int64_t index) |
Tensor | slice_batching_rule (const Tensor &self, int64_t dim, c10::optional< int64_t > start, c10::optional< int64_t > end, int64_t step) |
Tensor | slice_backward_batching_rule (const Tensor &grad, IntArrayRef input_sizes, int64_t dim, int64_t start, int64_t end, int64_t step) |
Tensor | diagonal_batching_rule (const Tensor &self, int64_t offset, int64_t dim1, int64_t dim2) |
Tensor | diagonal_backward_batching_rule (const Tensor &grad, IntArrayRef input_sizes, int64_t offset, int64_t dim1, int64_t dim2) |
Tensor | movedim_batching_rule (const Tensor &self, IntArrayRef source, IntArrayRef destination) |
Tensor | reshape_batching_rule (const Tensor &self, IntArrayRef shape) |
std::vector< Tensor > | split_batching_rule (const Tensor &self, int64_t split_size, int64_t dim) |
std::vector< Tensor > | split_with_sizes_batching_rule (const Tensor &self, IntArrayRef split_sizes, int64_t dim) |
std::vector< Tensor > | unbind_batching_rule (const Tensor &self, int64_t dim) |
Tensor | unfold_batching_rule (const Tensor &self, int64_t dim, int64_t size, int64_t step) |
Tensor | contiguous_batching_rule (const Tensor &self, MemoryFormat memory_format) |
Tensor | view_batching_rule (const Tensor &self, IntArrayRef size) |
Tensor | view_as_complex_batching_rule (const Tensor &self) |
static void | checkBatchDimsAtFrontInLayout (IntArrayRef physical_strides, int64_t num_batch_dims) |
static optional< int64_t > | maximum_indexable_location (IntArrayRef sizes, IntArrayRef strides, int64_t storage_offset) |
static void | checkBasicAsStridedValidForSlice (const Tensor &physical_tensor, int64_t num_batch_dims, IntArrayRef sizes, IntArrayRef strides, optional< int64_t > maybe_storage_offset) |
Tensor | as_strided_batching_rule (const Tensor &tensor, IntArrayRef sizes, IntArrayRef strides, optional< int64_t > storage_offset) |
template<typename F , F Func, typename... ExtraArgs> | |
Tensor | unwrap_and_call (const Tensor &input, ExtraArgs... args) |
template<typename F , F Func, typename... ExtraArgs> | |
Tensor | unwrap_and_call_method (const Tensor &input, ExtraArgs... extra_args) |
Tensor | pow_scalar_Tensor_batching_rule (Scalar other, const Tensor &self) |
Tensor | clone_batching_rule (const Tensor &self, optional< MemoryFormat > memory_format) |
Tensor | mv_batching_rule (const Tensor &self, const Tensor &other) |
Tensor | dot_batching_rule (const Tensor &self, const Tensor &other) |
Tensor | bmm_batching_rule (const Tensor &self, const Tensor &other) |
Tensor | mm_batching_rule (const Tensor &self, const Tensor &other) |
Tensor | cat_batching_rule (TensorList tensors, int64_t dim) |
Tensor | stack_batching_rule (TensorList tensors, int64_t dim) |
Tensor | to_dtype_layout_batching_rule (const Tensor &self, optional< ScalarType > dtype, optional< Layout > layout, optional< Device > device, optional< bool > pin_memory, bool non_blocking, bool copy, optional< MemoryFormat > memory_format) |
Tensor | new_zeros_batching_rule (const Tensor &self, IntArrayRef size, optional< ScalarType > dtype, optional< Layout > layout, optional< Device > device, optional< bool > pin_memory) |
Tensor | new_empty_batching_rule (const Tensor &self, IntArrayRef size, c10::optional< ScalarType > dtype, c10::optional< Layout > layout, c10::optional< Device > device, c10::optional< bool > pin_memory) |
Tensor | new_empty_strided_batching_rule (const Tensor &self, IntArrayRef size, IntArrayRef stride, optional< ScalarType > dtype, optional< Layout > layout, optional< Device > device, optional< bool > pin_memory) |
template<typename F , F Func> | |
Tensor | comparison_pointwise_batching_rule (const Tensor &self, const Tensor &other) |
TORCH_LIBRARY_IMPL (_, Batched, m) | |
TORCH_LIBRARY_IMPL (aten, Batched, m) | |
Context & | globalContext () |
Allocator * | getCPUAllocator () |
static void | init () |
static DeprecatedTypeProperties & | getDeprecatedTypeProperties (Backend p, ScalarType s) |
static DeprecatedTypeProperties & | CPU (ScalarType s) |
static DeprecatedTypeProperties & | CUDA (ScalarType s) |
static DeprecatedTypeProperties & | HIP (ScalarType s) |
static bool | hasCUDA () |
static bool | hasHIP () |
static bool | hasXLA () |
static size_t | getNumGPUs () |
static bool | hasOpenMP () |
static bool | hasMKL () |
static bool | hasLAPACK () |
static bool | hasMAGMA () |
static bool | hasMKLDNN () |
static void | manual_seed (uint64_t seed) |
TORCH_API bool | is_custom_op (const c10::OperatorName &opName) |
DeprecatedTypePropertiesRegistry & | globalDeprecatedTypePropertiesRegistry () |
std::ostream & | operator<< (std::ostream &out, const Dimname &dimname) |
static void | check_valid_identifier (const std::string &name) |
bool | operator== (const Dimname &lhs, const Dimname &rhs) |
bool | operator!= (const Dimname &lhs, const Dimname &rhs) |
std::ios_base & | defaultfloat (std::ios_base &__base) |
std::ostream & | operator<< (std::ostream &out, const DeprecatedTypeProperties &t) |
static std::tuple< double, int64_t > | __printFormat (std::ostream &stream, const Tensor &self) |
static void | __printIndent (std::ostream &stream, int64_t indent) |
static void | printScale (std::ostream &stream, double scale) |
static void | __printMatrix (std::ostream &stream, const Tensor &self, int64_t linesize, int64_t indent) |
void | __printTensor (std::ostream &stream, Tensor &self, int64_t linesize) |
std::ostream & | print (std::ostream &stream, const Tensor &tensor_, int64_t linesize) |
static std::ostream & | operator<< (std::ostream &out, const Tensor &t) |
static void | print (const Tensor &t, int64_t linesize=80) |
static std::ostream & | operator<< (std::ostream &out, Scalar s) |
TORCH_API void | launch (std::function< void()> func) |
Tensor & | internal_set_names_inplace (Tensor &tensor, optional< DimnameList > names) |
Tensor & | internal_set_names_inplace (Tensor &tensor, std::vector< Dimname > &&names, bool validate_names) |
DimnameList | default_names (size_t len) |
static void | check_unique_names (DimnameList names) |
void | check_names_valid_for (const Tensor &tensor, DimnameList names) |
void | check_names_valid_for (size_t tensor_dim, DimnameList names) |
std::ostream & | operator<< (std::ostream &out, const Range &range) |
Tensor | unsafeTensorFromTH (void *th_pointer, bool retain) |
Storage | unsafeStorageFromTH (void *th_pointer, bool retain) |
template<typename T > | |
std::pair< int64_t, int64_t > | collapse_dims (T *sizes, T *strides, int64_t dims, const int excludeDim=-1) |
Tensor | sort_strides (Tensor &tensor_) |
bool | _all_equal_numel (at::ArrayRef< Tensor > tensors) |
std::string | _all_equal_numel_error (at::ArrayRef< Tensor > tensors) |
bool | _apply_preamble (ArrayRef< Tensor > tensors) |
int64_t | _max_dim_tensors (ArrayRef< Tensor > tensors) |
void | iterate (int64_t size) |
template<typename Arg , typename... Args> | |
void | iterate (int64_t size, Arg &iter, Args &... iter_tail) |
bool | iterate_continue () |
template<typename Arg , typename... Args> | |
bool | iterate_continue (Arg &iter, Args &... iter_tail) |
int64_t | max_iterate_size () |
template<typename Arg , typename... Args> | |
int64_t | max_iterate_size (Arg &iter, Args &... iter_tail) |
void | iterate_overflow () |
template<typename Arg , typename... Args> | |
void | iterate_overflow (Arg &iter, Args &... iter_tail) |
void | forward (int64_t offset) |
template<typename Arg , typename... Args> | |
void | forward (int64_t offset, Arg &iter, Args &... iter_tail) |
int64_t | max_dim () |
template<typename Arg , typename... Args> | |
int64_t | max_dim (Arg &iter, Args &... iter_tail) |
void | apply_op () |
template<typename Op , typename... Args> | |
void | apply_op (int64_t numel, int64_t offset, const Op &op, Args... iters) |
template<typename scalar1 , typename scalar2 , typename Op > | |
void | CPU_tensor_apply2 (Tensor tensor1, Tensor tensor2, const Op op) |
template<typename scalar1 , typename scalar2 , typename scalar3 , typename Op > | |
void | CPU_tensor_apply3 (Tensor tensor1, Tensor tensor2, Tensor tensor3, const Op op) |
template<typename scalar1 , typename scalar2 , typename scalar3 , typename scalar4 , typename Op > | |
void | CPU_tensor_apply4 (Tensor tensor1, Tensor tensor2, Tensor tensor3, Tensor tensor4, const Op op) |
static | cpu_fixed_malloc (void *, ptrdiff_t) |
static | cpu_fixed_realloc (void *, void *, ptrdiff_t) |
static | cpu_fixed_free (void *state, void *allocation) |
C10_DECLARE_REGISTRY (CUDAHooksRegistry, CUDAHooksInterface, CUDAHooksArgs) | |
C10_DECLARE_REGISTRY (HIPHooksRegistry, HIPHooksInterface, HIPHooksArgs) | |
optional< Device > | device_of (const Tensor &t) |
Return the Device of a Tensor, if the Tensor is defined. More... | |
optional< Device > | device_of (TensorList t) |
Return the Device of a TensorList, if the list is non-empty and the first Tensor is defined. More... | |
constexpr bool | should_include_kernel_dtype (const char *kernel_tag_str, at::ScalarType scalar_type) |
The method should_include_kernel_dtype() returns true/false based on whether the switching code for a specific dtype should be included based on build time constants generated from tracing model execution. More... | |
DLDataType | getDLDataType (const Tensor &t) |
DLContext | getDLContext (const Tensor &tensor, const int64_t &device_id) |
static Device | getATenDevice (const DLContext &ctx) |
ScalarType | toScalarType (const DLDataType &dtype) |
void | deleter (DLManagedTensor *arg) |
DLManagedTensor * | toDLPack (const Tensor &src) |
Tensor | fromDLPack (const DLManagedTensor *src) |
static void * | checkDL (void *x) |
std::vector< int64_t > | infer_size (IntArrayRef a, IntArrayRef b) |
std::tuple< std::vector< int64_t >, std::vector< int64_t > > | inferExpandGeometry (IntArrayRef tensor_sizes, IntArrayRef tensor_strides, IntArrayRef sizes) |
std::vector< int64_t > | infer_dense_strides (IntArrayRef tensor_sizes, IntArrayRef tensor_strides) |
bool | are_expandable (IntArrayRef shape1, IntArrayRef shape2) |
void | check_defined (std::initializer_list< std::reference_wrapper< const Tensor > > tensors, const char *api_name) |
std::tuple< Tensor > | expand_inplace (const Tensor &tensor, const Tensor &to_expand) |
std::tuple< Tensor > | expand_inplace (const Tensor &tensor, const Tensor &to_expand, const char *api_name) |
std::tuple< Tensor, Tensor > | expand_inplace (const Tensor &tensor, const Tensor &to_expand1, const Tensor &to_expand2) |
std::tuple< Tensor, Tensor > | expand_inplace (const Tensor &tensor, const Tensor &to_expand1, const Tensor &to_expand2, const char *api_name) |
std::tuple< Tensor, Tensor > | expand_outplace (const Tensor &to_expand1, const Tensor &to_expand2) |
std::tuple< Tensor, Tensor > | expand_outplace (const Tensor &to_expand1, const Tensor &to_expand2, const char *api_name) |
std::tuple< Tensor, Tensor, Tensor > | expand_outplace (const Tensor &to_expand1, const Tensor &to_expand2, const Tensor &to_expand3) |
std::tuple< Tensor, Tensor, Tensor > | expand_outplace (const Tensor &to_expand1, const Tensor &to_expand2, const Tensor &to_expand3, const char *api_name) |
std::tuple< Tensor > | expand_size (const Tensor &to_expand, IntArrayRef sizes) |
std::tuple< Tensor > | expand_size (const Tensor &to_expand, IntArrayRef sizes, const char *api_name) |
std::vector< Tensor > | expand_outplace (TensorList to_expand) |
static Tensor | sum_to (Tensor tensor, const IntArrayRef shape) |
static bool | is_expandable_to (IntArrayRef shape, IntArrayRef desired) |
std::vector< int64_t > | infer_size (IntArrayRef shape, int64_t numel) |
TensorOptions | initialTensorOptions () |
MemOverlap | has_internal_overlap (const Tensor &tensor) |
MemOverlap | has_internal_overlap (TensorImpl *t) |
void | assert_no_internal_overlap (const Tensor &t) |
void | assert_no_internal_overlap (TensorImpl *t) |
MemOverlapStatus | get_overlap_status (const Tensor &a, const Tensor &b) |
MemOverlapStatus | get_overlap_status (TensorImpl *a, TensorImpl *b) |
void | assert_no_partial_overlap (const Tensor &a, const Tensor &b) |
void | assert_no_partial_overlap (TensorImpl *a, TensorImpl *b) |
void | assert_no_overlap (const Tensor &a, const Tensor &b) |
void | assert_no_overlap (TensorImpl *a, TensorImpl *b) |
static std::string | toDimnameRepr (const Tensor &tensor) |
int64_t | dimname_to_position (const Tensor &tensor, Dimname dim) |
std::vector< int64_t > | dimnames_to_positions (const Tensor &tensor, DimnameList dims) |
static void | report_positional_error (const Dimname &name, const Dimname &other_name, DimnameList names, DimnameList other_names, const char *action) |
static void | check_for_misalignment (const Dimname &name, DimnameList names, DimnameList other_names, const char *action) |
std::vector< Dimname > | unify_from_right (DimnameList names, DimnameList other_names, const char *action) |
bool | has_names (TensorList tensors) |
void | reportNYIDimnameOverload (const char *op_name) |
static bool | use_two_pass_reduction (TensorIteratorBase &iter) |
static void | two_pass_reduction (TensorIteratorBase &iter, loop2d_t loop) |
static void | parallel_dim_reduction (TensorIteratorBase &iter, loop2d_t loop) |
static int | find_split_dim (TensorIteratorBase &iter) |
Chooses a dimension over which to parallelize. More... | |
static std::tuple< int64_t, int64_t > | round_columns (TensorIteratorBase &iter, int dim, int multiple, int64_t begin, int64_t end) |
template<typename T , typename std::enable_if< std::is_integral< T >::value, int >::type = 0> | |
C10_HOST_DEVICE bool | _isnan (T val) |
template<typename T , typename std::enable_if< c10::is_complex< T >::value, int >::type = 0> | |
bool | _isnan (T val) |
template<typename T , typename std::enable_if< std::is_same< T, at::BFloat16 >::value, int >::type = 0> | |
C10_HOST_DEVICE bool | _isnan (at::BFloat16 val) |
template<typename T > | |
C10_HOST_DEVICE T | exp (T x) |
template<> | |
C10_HOST_DEVICE double | exp< double > (double x) |
template<typename T > | |
C10_HOST_DEVICE T | log (T x) |
template<> | |
C10_HOST_DEVICE double | log< double > (double x) |
template<typename T > | |
C10_HOST_DEVICE T | tan (T x) |
template<> | |
C10_HOST_DEVICE double | tan< double > (double x) |
int64_t | divup (int64_t x, int64_t y) |
TORCH_API void | init_num_threads () |
TORCH_API void | set_num_threads (int) |
TORCH_API int | get_num_threads () |
TORCH_API int | get_thread_num () |
TORCH_API bool | in_parallel_region () |
template<class F > | |
void | parallel_for (const int64_t begin, const int64_t end, const int64_t grain_size, const F &f) |
template<class scalar_t , class F , class SF > | |
scalar_t | parallel_reduce (const int64_t begin, const int64_t end, const int64_t grain_size, const scalar_t ident, const F &f, const SF &sf) |
TORCH_API std::string | get_parallel_info () |
TORCH_API void | set_num_interop_threads (int) |
TORCH_API int | get_num_interop_threads () |
TORCH_API void | intraop_launch (std::function< void()> func) |
TORCH_API std::shared_ptr< c10::ivalue::Future > | intraop_launch_future (std::function< void()> func) |
TORCH_API int | intraop_default_num_threads () |
template<typename F0 , typename F1 > | |
void | intraop_invoke (const F0 &f0, const F1 &f1) |
QuantizerPtr | make_per_tensor_affine_quantizer (double scale, int64_t zero_point, ScalarType scalar_type) |
QuantizerPtr | make_per_channel_affine_quantizer (const Tensor &scales, const Tensor &zero_points, int64_t axis, ScalarType scalar_type) |
QTensorImpl * | get_qtensorimpl (const Tensor &self) |
int64_t | get_sub_byte_tensor_size (int64_t size_bytes, at::ScalarType t) |
Tensor | new_qtensor (IntArrayRef sizes, const TensorOptions &options, QuantizerPtr quantizer) |
C10_EXPORT void | set_quantizer_ (const Tensor &self, ConstQuantizerPtr quantizer) |
const RecordFunctionTLS & | get_record_function_tls_ () |
void | set_record_function_tls_ (const RecordFunctionTLS &tls) |
RecordFunctionCallbacks | _getTLSCallbacks () |
void | _setTLSCallbacks (const RecordFunctionCallbacks &callbacks) |
bool | hasCallbacks () |
bool | hasGlobalCallbacks () |
hasGlobalCallbacks returns whether there're global callbacks registered with pushGlobalCallback More... | |
bool | hasThreadLocalCallbacks () |
hasThreadLocalCallbacks returns whether there're callbacks registered with addThreadLocalCallback More... | |
CallbackHandle | addThreadLocalCallback (RecordFunctionCallback cb) |
addThreadLocalCallback adds a thread local callback to run with RecordFunction, returns handle to use with removeThreadLocalCallback More... | |
CallbackHandle | addGlobalCallback (RecordFunctionCallback cb) |
addGlobalCallback adds a global callback to run with RecordFunction: More... | |
void | removeCallback (CallbackHandle handle) |
removeCallback removes a callback given the handle returned by addThreadLocalCallback or addGlobalCallback; More... | |
void | clearGlobalCallbacks () |
clearGlobalCallbacks removes all global callbacks WARNING: not thread safe More... | |
void | clearThreadLocalCallbacks () |
clearThreadLocalCallbacks removes all thread local callbacks More... | |
void | clearCallbacks () |
bool | isRecordFunctionEnabled () |
isRecordFunctionEnabled returns whether RecordFunction is enabled thread locally More... | |
void | enableRecordFunction (bool enable=true) |
enableRecordFunction enables RecordFunction thread locally More... | |
void | bumpRecordAllFunctions () |
void | releaseRecordAllFunctions () |
bool | checkRecordAllFunctions () |
bool | shouldRunRecordFunction (bool *pre_sampled) |
Tensor | var (const Tensor &self, int dim) |
std::tuple< Tensor, Tensor > | var_mean (const Tensor &self, int dim) |
Tensor | std (const Tensor &self, int dim) |
std::tuple< Tensor, Tensor > | std_mean (const Tensor &self, int dim) |
Tensor | from_blob (void *data, IntArrayRef sizes, IntArrayRef strides, const std::function< void(void *)> &deleter, const TensorOptions &options={}, const c10::optional< Device > target_device=c10::nullopt) |
Tensor | from_blob (void *data, IntArrayRef sizes, const std::function< void(void *)> &deleter, const TensorOptions &options={}) |
Tensor | from_blob (void *data, IntArrayRef sizes, IntArrayRef strides, const TensorOptions &options={}) |
Tensor | from_blob (void *data, IntArrayRef sizes, const TensorOptions &options={}) |
int64_t | numel (const Tensor &tensor) |
int64_t | size (const Tensor &tensor, int64_t dim) |
int64_t | stride (const Tensor &tensor, int64_t dim) |
TORCH_LIBRARY (aten, m) | |
int64_t | get_device (Tensor self) |
static DispatchKey | legacyExtractDispatchKey (const Tensor &t) |
static Tensor | operator+ (const Tensor &x, const Tensor &y) |
static Tensor | operator+ (const Tensor &x, Scalar y) |
static Tensor | operator+ (Scalar x, const Tensor &y) |
static Tensor | operator* (const Tensor &x, const Tensor &y) |
static Tensor | operator* (const Tensor &x, Scalar y) |
static Tensor | operator* (Scalar x, const Tensor &y) |
static Tensor | operator- (const Tensor &x, const Tensor &y) |
static Tensor | operator- (const Tensor &x, Scalar y) |
static Tensor | operator- (Scalar x, const Tensor &y) |
static Tensor | operator/ (const Tensor &x, const Tensor &y) |
static Tensor | operator/ (const Tensor &x, Scalar y) |
static Tensor | operator/ (Scalar x, const Tensor &y) |
static Tensor | operator% (const Tensor &x, const Tensor &y) |
static Tensor | operator% (const Tensor &x, Scalar y) |
static Tensor | operator% (Scalar x, const Tensor &y) |
static Tensor | operator& (const Tensor &x, const Tensor &y) |
static Tensor | operator& (const Tensor &x, Scalar y) |
static Tensor | operator& (Scalar x, const Tensor &y) |
static Tensor | operator| (const Tensor &x, const Tensor &y) |
static Tensor | operator| (const Tensor &x, Scalar y) |
static Tensor | operator| (Scalar x, const Tensor &y) |
static Tensor | operator^ (const Tensor &x, const Tensor &y) |
static Tensor | operator^ (const Tensor &x, Scalar y) |
static Tensor | operator^ (Scalar x, const Tensor &y) |
static Tensor | operator< (const Tensor &x, const Tensor &y) |
static Tensor | operator< (const Tensor &x, Scalar y) |
static Tensor | operator< (Scalar x, const Tensor &y) |
static Tensor | operator<= (const Tensor &x, const Tensor &y) |
static Tensor | operator<= (const Tensor &x, Scalar y) |
static Tensor | operator<= (Scalar x, const Tensor &y) |
static Tensor | operator> (const Tensor &x, const Tensor &y) |
static Tensor | operator> (const Tensor &x, Scalar y) |
static Tensor | operator> (Scalar x, const Tensor &y) |
static Tensor | operator>= (const Tensor &x, const Tensor &y) |
static Tensor | operator>= (const Tensor &x, Scalar y) |
static Tensor | operator>= (Scalar x, const Tensor &y) |
static Tensor | operator== (const Tensor &x, const Tensor &y) |
static Tensor | operator== (const Tensor &x, Scalar y) |
static Tensor | operator== (Scalar x, const Tensor &y) |
static Tensor | operator!= (const Tensor &x, const Tensor &y) |
static Tensor | operator!= (const Tensor &x, Scalar y) |
static Tensor | operator!= (Scalar x, const Tensor &y) |
std::ostream & | operator<< (std::ostream &out, TensorGeometryArg t) |
void | checkDim (CheckedFrom c, const Tensor &tensor, const char *name, int pos, int64_t dim) |
void | checkDim (CheckedFrom c, const TensorGeometryArg &t, int64_t dim) |
void | checkDimRange (CheckedFrom c, const TensorGeometryArg &t, int64_t dim_start, int64_t dim_end) |
void | checkContiguous (CheckedFrom c, const TensorGeometryArg &t) |
void | checkAllContiguous (CheckedFrom c, at::ArrayRef< TensorArg > ts) |
void | checkSize (CheckedFrom c, const TensorGeometryArg &t, IntArrayRef sizes) |
void | checkSize (CheckedFrom c, const TensorGeometryArg &t, int64_t dim, int64_t size) |
void | checkAllSame (CheckedFrom c, ArrayRef< TensorArg > tensors, void(*fn)(CheckedFrom, const TensorArg &, const TensorArg &)) |
void | checkSameSize (CheckedFrom c, const TensorArg &t1, const TensorArg &t2) |
void | checkAllSameSize (CheckedFrom c, ArrayRef< TensorArg > tensors) |
void | checkNumel (CheckedFrom c, const TensorGeometryArg &t, int64_t numel) |
void | checkSameNumel (CheckedFrom c, const TensorArg &t1, const TensorArg &t2) |
void | checkAllSameNumel (CheckedFrom c, ArrayRef< TensorArg > tensors) |
void | checkSameGPU (CheckedFrom c, const TensorArg &t1, const TensorArg &t2) |
void | checkAllSameGPU (CheckedFrom c, ArrayRef< TensorArg > tensors) |
void | checkSameType (CheckedFrom c, const TensorArg &t1, const TensorArg &t2) |
void | checkScalarType (CheckedFrom c, const TensorArg &t, ScalarType ty) |
void | checkScalarTypes (CheckedFrom c, const TensorArg &t, at::ArrayRef< ScalarType > l) |
void | checkAllSameType (CheckedFrom c, ArrayRef< TensorArg > tensors) |
void | checkSameDim (CheckedFrom c, const TensorGeometryArg &t1, const TensorGeometryArg &t2) |
void | checkDefined (CheckedFrom c, const TensorArg &t) |
void | checkAllDefined (CheckedFrom c, ArrayRef< TensorArg > ts) |
void | checkBackend (CheckedFrom c, const Tensor &t, Backend backend) |
void | checkBackend (CheckedFrom c, at::ArrayRef< Tensor > tensors, at::Backend backend) |
void | checkDeviceType (CheckedFrom c, const Tensor &t, DeviceType device_type) |
void | checkDeviceType (CheckedFrom c, at::ArrayRef< Tensor > tensors, at::DeviceType device_type) |
void | checkLayout (CheckedFrom c, const Tensor &t, Layout layout) |
void | checkLayout (CheckedFrom c, at::ArrayRef< Tensor > tensors, at::Layout layout) |
void * | maybe_data_ptr (const Tensor &tensor) |
void * | maybe_data_ptr (const TensorArg &tensor) |
bool | geometry_is_contiguous (IntArrayRef sizes, IntArrayRef strides) |
void | check_dim_size (const Tensor &tensor, int64_t dim, int64_t dim_size, int64_t size) |
TORCH_API void | checkSameNumel (CheckedFrom c, const TensorGeometryArg &t1, const TensorGeometryArg &t2) |
template<typename T > | |
std::function< T(void)> | wrapPropagateTLSState (std::function< T(void)> callback) |
int | _crash_if_asan (int arg) |
static TensorImpl * | checked_dense_tensor_unwrap (const Tensor &expr, const char *name, int pos, const char *api, bool allowNull, DeviceType device_type, ScalarType scalar_type) |
static std::vector< TensorImpl * > | checked_dense_tensor_list_unwrap (ArrayRef< Tensor > tensors, const char *name, int pos, DeviceType device_type, ScalarType scalar_type) |
template<size_t N> | |
std::array< int64_t, N > | check_intlist (ArrayRef< int64_t > list, const char *name, int pos) |
int64_t | sum_intlist (ArrayRef< int64_t > list) |
template<typename C , typename std::enable_if< std::is_integral< typename C::value_type >::value, int >::type = 0> | |
int64_t | prod_intlist (const C &container) |
template<typename Iter , typename std::enable_if< std::is_integral< typename std::iterator_traits< Iter >::value_type >::value, int >::type = 0> | |
int64_t | prod_intlist (Iter begin, Iter end) |
template<typename T > | |
static T * | check_generator (c10::optional< Generator > gen) |
Utility function to static cast input Generator* to the backend generator type (CPU/CUDAGeneratorImpl etc.) More... | |
template<typename T > | |
static T * | get_generator_or_default (const c10::optional< Generator > &gen, const Generator &default_gen) |
Utility function used in tensor implementations, which supplies the default generator to tensors, if an input generator is not supplied. More... | |
void | check_size_nonnegative (IntArrayRef size) |
std::string | get_mkl_version () |
std::string | get_mkldnn_version () |
std::string | get_openmp_version () |
std::string | used_cpu_capability () |
std::string | show_config () |
Returns a detailed string describing the configuration PyTorch. More... | |
std::string | get_cxx_flags () |
template<typename... Args> | |
Tensor | unsupportedRandomOp (Args... args) |
template<typename... Args> | |
Tensor & | unsupportedRandomOp_ (Args... args) |
TORCH_LIBRARY_IMPL (_, VmapMode, m) | |
TORCH_LIBRARY_IMPL (aten, VmapMode, m) | |
static bool | areBdimsAtFrontInOrder (BatchDimsRef bdims) |
static Tensor | permuteBatchDimsToFront (BatchedTensorImpl *batched) |
static BatchDims | computeFrontBatchDimsFromLevels (std::bitset< kVmapNumLevels > levels_bitset) |
static std::pair< Tensor, std::bitset< kVmapNumLevels > > | getPhysicalTensorAndLevels (const Tensor &self) |
static Tensor | alignBatchDimsAtFront (const Tensor &self, std::bitset< kVmapNumLevels > requested_levels, int64_t requested_example_dim) |
static std::pair< std::bitset< kVmapNumLevels >, int64_t > | getLevelsAndLargestLogicalDim (TensorList logical_tensors) |
static int64_t | maybe_wrap_dim (int64_t dim, int64_t dim_post_expr, bool wrap_scalar=true) |
static int64_t | maybe_wrap_dim (int64_t dim, TensorImpl *tensor) |
static int64_t | maybe_wrap_dim (int64_t dim, TensorList tensors) |
static int64_t | maybe_wrap_dim (int64_t dim, const std::vector< std::vector< int64_t > > &tensor_sizes) |
static void | maybe_wrap_dims_n (int64_t *dims, int64_t ndims, int64_t dim_post_expr) |
template<typename Container > | |
void | maybe_wrap_dims (Container &dims, int64_t dim_post_expr) |
static int64_t | legacy_cat_wrap_dim (int64_t dim, const std::vector< std::vector< int64_t > > &tensor_sizes) |
static int64_t | legacy_cat_wrap_dim (int64_t dim, TensorList tensors) |
static void | wrap_all_dims (std::vector< int64_t > &dims_to_wrap, int64_t tensor_total_dims) |
static std::bitset< dim_bitset_size > | dim_list_to_bitset (IntArrayRef dims, int64_t ndims) |
REGISTER_CONTEXT (DeviceType::CPU, caffe2::CPUContext) | |
REGISTER_COPY_BYTES_FUNCTION (DeviceType::CPU, DeviceType::CPU, caffe2::CopyBytesWrapper) | |
C10_DEFINE_TYPED_REGISTRY (ContextRegistry, at::DeviceType, at::BaseContext, std::unique_ptr, at::Device) | |
C10_DECLARE_TYPED_REGISTRY (ContextRegistry, at::DeviceType, at::BaseContext, std::unique_ptr, at::Device) | |
std::unique_ptr< at::BaseContext > | CreateContext (const at::Device &device) |
REGISTER_CONTEXT (DeviceType::IDEEP, caffe2::IDEEPContext) | |
REGISTER_COPY_BYTES_FUNCTION (DeviceType::IDEEP, DeviceType::CPU, CopyBytesWrapper) | |
REGISTER_COPY_BYTES_FUNCTION (DeviceType::CPU, DeviceType::IDEEP, CopyBytesWrapper) | |
REGISTER_COPY_BYTES_FUNCTION (DeviceType::IDEEP, DeviceType::IDEEP, CopyBytesWrapper) | |
Variables | |
constexpr int64_t | kVmapMaxTensorDims = 64 |
constexpr int64_t | kVmapNumLevels = 64 |
constexpr int64_t | kBatchDimsStackSize = 5 |
static const char | cublas_config_var_name [] = "CUBLAS_WORKSPACE_CONFIG" |
static const char *const | cublas_deterministic_configs [] = { ":4096:8", ":16:8" } |
thread_local bool | override_allow_tf32_flag = false |
static Symbol | kWildcard = Symbol::dimname("*") |
constexpr size_t | kDimVectorStaticSize = 5 |
thread_local bool | GradMode_enabled = true |
thread_local is a feature that is not enabled by Caffe2 mobile build (e.g. More... | |
constexpr int | MERSENNE_STATE_N = 624 |
constexpr int | MERSENNE_STATE_M = 397 |
constexpr uint32_t | MATRIX_A = 0x9908b0df |
constexpr uint32_t | UMASK = 0x80000000 |
constexpr uint32_t | LMASK = 0x7fffffff |
thread_local bool | NamesMode_enabled = true |
constexpr size_t | kMaxNamedTensorDim = 64 |
static Allocator | CPU_fixed_allocator |
constexpr const char * | CUDA_HELP |
enum C10_API_ENUM | RecordScope |
enum C10_API_ENUM | BACKWARD_FUNCTION |
enum C10_API_ENUM | TORCHSCRIPT_FUNCTION |
enum C10_API_ENUM | KERNEL_FUNCTION_DTYPE |
enum C10_API_ENUM | USER_SCOPE |
enum C10_API_ENUM | NUM_SCOPES |
constexpr std::size_t | kSoftLimitCallbacks = 4 |
$ | |
constexpr int64_t | kVmapTransformStaticInputSize = 4 |
constexpr int64_t | kVmapStaticDimVecSize = 8 |
constexpr size_t | dim_bitset_size = 64 |
Distributions kernel adapted from THRandom.cpp The kernels try to follow std::random distributions signature For instance: in ATen auto gen = at::detail::createCPUGenerator(); at::uniform_real_distribution<double> uniform(0, 1); auto sample = uniform(gen.get());.
This file provides distributions compatible with ATen/core/DistributionsHelper.h but backed with the std RNG implementation instead of the ATen one.
Contains the implementation of parallel reductions in TensorIterator.
This file contains some tensor-agnostic operations to be used in the core functions of the SobolEngine
This file contains tensor-agnostic SoboleEngine constants.
Flush-To-Zero and Denormals-Are-Zero mode.
vs std::random
std::mt19937 gen; std::uniform_real_distribution uniform(0, 1); auto sample = uniform(gen);
Flush-To-Zero (FTZ) and Denormals-Are-Zero (DAZ) are modes that bypass IEEE 754 methods of dealing with denormal floating-point numbers on x86-64 and some x86 CPUs. They result in reduced precision for values near zero, but increased performance.
Caffe2 mobile builds currently do not depend on all of ATen so this is required to allow using the faster ATen RNG for normal builds but keep the build size small on mobile. RNG performance typically doesn't matter on mobile builds since the models are small and rarely using random initialization.
using at::acc_type = typedef typename AccumulateType<T, is_cuda>::type |
Definition at line 52 of file AccumulateType.h.
using at::BatchDims = typedef SmallVector<BatchDim, kBatchDimsStackSize> |
Definition at line 42 of file BatchedTensorImpl.h.
using at::BatchDimsRef = typedef ArrayRef<BatchDim> |
Definition at line 43 of file BatchedTensorImpl.h.
typedef uint64_t at::CallbackHandle |
Definition at line 446 of file record_function.h.
typedef c10::SmallVector<uint64_t, kSoftLimitCallbacks> at::CallbackHandles |
Definition at line 88 of file record_function.h.
using at::CheckedFrom = typedef const char* |
Definition at line 40 of file TensorUtils.h.
typedef const c10::intrusive_ptr< Quantizer > & at::ConstQuantizerPtr |
Definition at line 8 of file QuantizerBase.h.
using at::DataType = typedef caffe2::TypeIdentifier |
using at::DimMask = typedef TensorIteratorBase::DimMask |
Definition at line 13 of file TensorIterator.cpp.
using at::DimnameList = typedef c10::ArrayRef<Dimname> |
using at::DimVector = typedef SmallVector<int64_t, kDimVectorStaticSize> |
A container for sizes or strides.
Definition at line 11 of file DimVector.h.
using at::dist_acctype = typedef typename DistAccumType<T>::type |
Definition at line 29 of file TransformationHelper.h.
Definition at line 12 of file TensorIteratorReduce.cpp.
using at::loop_t = typedef TensorIteratorBase::loop_t |
Definition at line 15 of file TensorIterator.cpp.
using at::NameVector = typedef SmallVector<Dimname, kDimVectorStaticSize> |
Definition at line 11 of file NamedTensorUtils.h.
typedef std::vector<std::unique_ptr<ObserverContext> > at::ObserverContextList |
Definition at line 89 of file record_function.h.
using at::PackedTensorAccessor32 = typedef GenericPackedTensorAccessor<T, N, PtrTraits, int32_t> |
Definition at line 228 of file TensorAccessor.h.
using at::PackedTensorAccessor64 = typedef GenericPackedTensorAccessor<T, N, PtrTraits, int64_t> |
Definition at line 231 of file TensorAccessor.h.
using at::PtrVector = typedef TensorIteratorBase::PtrVector |
Definition at line 14 of file TensorIterator.cpp.
using at::QuantizerPtr = typedef c10::intrusive_ptr<Quantizer> |
Definition at line 9 of file QuantizerBase.h.
typedef std::vector<std::pair<RecordFunctionCallback, CallbackHandle> > at::RecordFunctionCallbacks |
Definition at line 450 of file record_function.h.
typedef uint64_t at::RecordFunctionHandle |
Definition at line 90 of file record_function.h.
typedef c10::Stream at::Stream |
Definition at line 56 of file TensorBody.h.
using at::StrideVector = typedef TensorIteratorBase::StrideVector |
Definition at line 17 of file TensorIterator.cpp.
typedef ArrayRef< Tensor > at::TensorList |
Definition at line 54 of file TensorBody.h.
using at::VmapDimVector = typedef SmallVector<int64_t, kVmapStaticDimVecSize> |
Definition at line 39 of file VmapTransforms.h.
using at::VmapPhysicalViewVec = typedef SmallVector<VmapPhysicalView, kVmapTransformStaticInputSize> |
Definition at line 33 of file VmapTransforms.h.
|
strong |
Enumerator | |
---|---|
NONE | |
CONTIGUOUS | |
CHANNELS_LAST | |
NON_OVERLAPPING_DENSE |
Definition at line 134 of file TensorIterator.h.
|
strong |
Enumerator | |
---|---|
NO | |
YES | |
TOO_HARD |
Definition at line 14 of file MemoryOverlap.h.
|
strong |
Enumerator | |
---|---|
FULL | |
PARTIAL | |
NO | |
TOO_HARD |
Definition at line 16 of file MemoryOverlap.h.
|
strong |
|
static |
Definition at line 40 of file Formatting.cpp.
References std::ceil(), defaultfloat(), std::floor(), caffe2::int64_t, at::native::isfinite(), std::log10(), torch.jit._trace::make_tuple(), caffe2.perfkernels.hp_emblookup_codegen::offset, std::pow(), scale, size(), stream, sz, and at::native::metal::mpscnn::z.
Referenced by __printMatrix(), and print().
|
static |
Definition at line 128 of file Formatting.cpp.
References torch::jit::indent(), caffe2::int64_t, and stream.
Referenced by __printMatrix().
|
static |
Definition at line 139 of file Formatting.cpp.
References __printFormat(), __printIndent(), c, torch::jit::indent(), caffe2::int64_t, printScale(), caffe2::row, scale, size(), stream, and sz.
Referenced by __printTensor(), and print().
void at::__printTensor | ( | std::ostream & | stream, |
Tensor & | self, | ||
int64_t | linesize | ||
) |
Definition at line 190 of file Formatting.cpp.
References __printMatrix(), caffe2::counter, caffe2::int64_t, size(), torch.cuda.profiler::start(), stream, and tensor.
Referenced by print().
|
inline |
Definition at line 195 of file CPUApplyUtils.h.
References caffe2::int64_t, numel(), and caffe2::tensors.
Referenced by _apply_preamble().
|
inline |
Definition at line 206 of file CPUApplyUtils.h.
References caffe2::tensors.
Referenced by _apply_preamble().
Definition at line 222 of file CPUApplyUtils.h.
References _all_equal_numel(), _all_equal_numel_error(), AT_ERROR, checkDeviceType(), checkLayout(), c10::kCPU, c10::kStrided, at::Tensor::numel(), and caffe2::tensors.
Referenced by CPU_tensor_apply2(), CPU_tensor_apply3(), and CPU_tensor_apply4().
TORCH_API int at::_crash_if_asan | ( | int | arg | ) |
Definition at line 13 of file Utils.cpp.
References setup::arg, and bench_ops::x.
Referenced by THPModule_crashIfATenASAN().
TORCH_API RecordFunctionCallbacks at::_getTLSCallbacks | ( | ) |
Definition at line 338 of file record_function.cpp.
|
inline |
Definition at line 51 of file NumericUtils.h.
|
inline |
Definition at line 22 of file NumericUtils.h.
Referenced by _isnan(), at::native::MinMaxOps< scalar_t, acc_scalar_t, index_t >::combine(), at::native::detail::LessOrNan< scalar_t >::operator()(), at::native::detail::GreaterOrNan< scalar_t >::operator()(), and at::native::NanSumOps< acc_t, data_t >::reduce().
|
inline |
Definition at line 38 of file NumericUtils.h.
References c10::aten::isnan(), and val.
Definition at line 234 of file CPUApplyUtils.h.
References dim, caffe2::int64_t, std::max(), at::Tensor::ndimension(), and caffe2::tensors.
Referenced by CPU_tensor_apply2(), CPU_tensor_apply3(), and CPU_tensor_apply4().
TORCH_API void at::_setTLSCallbacks | ( | const RecordFunctionCallbacks & | callbacks | ) |
Definition at line 342 of file record_function.cpp.
References r, and caffe2.experiments.python.net_construct_bench::sort.
Definition at line 119 of file BatchedTensorImpl.cpp.
References dim, c10::SmallVectorImpl< T >::emplace_back(), caffe2.python.onnx.frontend::level, makeBatched(), maybeGetBatchedImpl(), and tensor.
Referenced by at::native::_add_batch_dim().
TORCH_API CallbackHandle at::addGlobalCallback | ( | RecordFunctionCallback | cb | ) |
addGlobalCallback adds a global callback to run with RecordFunction:
WARNING: not thread safe, typically addGlobalCallback can be called only during the program initialization
Definition at line 372 of file record_function.cpp.
Referenced by pytorch_jni::PytorchJni::preModuleLoadSetupOnce(), and THPAutograd_initExtension().
TORCH_API CallbackHandle at::addThreadLocalCallback | ( | RecordFunctionCallback | cb | ) |
addThreadLocalCallback adds a thread local callback to run with RecordFunction, returns handle to use with removeThreadLocalCallback
Definition at line 367 of file record_function.cpp.
Referenced by THPAutograd_initExtension().
|
static |
Definition at line 125 of file VmapTransforms.cpp.
References c10::aten::copy(), getPhysicalTensorAndLevels(), caffe2::int64_t, caffe2.python.onnx.frontend::level, c10::SmallVectorTemplateCommon< T, typename >::rbegin(), c10::ArrayRef< T >::size(), at::Tensor::sizes(), and TORCH_INTERNAL_ASSERT.
Referenced by at::MultiBatchVmapTransform::logicalToPhysical(), and at::BroadcastingVmapTransform::logicalToPhysical().
|
inline |
Definition at line 311 of file CPUApplyUtils.h.
Referenced by CPU_tensor_apply2(), CPU_tensor_apply3(), and CPU_tensor_apply4().
|
inline |
Definition at line 315 of file CPUApplyUtils.h.
References forward(), caffe2::int64_t, iterate(), iterate_continue(), iterate_overflow(), max_dim(), numel(), and caffe2.perfkernels.hp_emblookup_codegen::offset.
|
inline |
Definition at line 25 of file ExpandUtils.h.
References caffe2::int64_t, and c10::ArrayRef< T >::size().
|
static |
Definition at line 26 of file BatchedFallback.cpp.
References setup::arg.
Referenced by batchedTensorForLoopFallback().
|
static |
Definition at line 33 of file BatchedFallback.cpp.
References setup::arg.
Referenced by batchedTensorForLoopFallback().
|
static |
Definition at line 7 of file VmapTransforms.cpp.
References dim, c10::attr::idx(), caffe2::int64_t, and c10::ArrayRef< T >::size().
Referenced by permuteBatchDimsToFront().
Tensor at::as_strided_batching_rule | ( | const Tensor & | tensor, |
IntArrayRef | sizes, | ||
IntArrayRef | strides, | ||
optional< int64_t > | storage_offset | ||
) |
Definition at line 565 of file BatchingRegistrations.cpp.
References checkBasicAsStridedValidForSlice(), checkBatchDimsAtFrontInLayout(), c10::SmallVectorTemplateCommon< T, typename >::end(), c10::SmallVectorImpl< T >::insert(), at::MultiBatchVmapTransform::logicalToPhysical(), at::namedinference::num_batch_dims(), c10::SmallVectorImpl< T >::reserve(), sizes, strides, tensor, and TORCH_CHECK.
Referenced by TORCH_LIBRARY_IMPL().
Definition at line 28 of file MemoryOverlap.cpp.
References assert_no_internal_overlap(), and at::Tensor::unsafeGetTensorImpl().
Referenced by at::native::_cat_out_cpu(), assert_no_internal_overlap(), at::native::templates::bernoulli_impl_(), at::TensorIteratorBase::compute_mem_overlaps(), at::native::gather_out_cpu_cuda(), at::native::index_add_cpu_(), at::native::index_copy_(), at::native::index_out(), at::native::index_select_out_cpu_(), at::native::masked_scatter__cpu(), at::native::masked_select_out_impl_cpu(), at::native::scatter_(), at::native::scatter_add_(), at::native::scatter_fill_(), at::native::scatter_reduce_(), at::native::scatter_scalar_reduce_(), and at::native::take_out_cpu_template().
TORCH_API void at::assert_no_internal_overlap | ( | TensorImpl * | t | ) |
Definition at line 32 of file MemoryOverlap.cpp.
References has_internal_overlap(), TORCH_CHECK, and YES.
Definition at line 81 of file MemoryOverlap.cpp.
References caffe2.contrib.aten.docs.sample::a, assert_no_overlap(), and at::native::metal::mpscnn::b.
Referenced by at::native::_index_put_impl_(), assert_no_overlap(), at::native::gather_out_cpu_cuda(), at::native::index_add_cpu_(), at::native::index_copy_(), at::native::index_fill_(), at::native::index_out(), at::native::index_select_out_cpu_(), at::native::masked_select_out_impl_cpu(), at::native::scatter_(), at::native::scatter_add_(), at::native::scatter_fill_(), at::native::scatter_reduce_(), at::native::scatter_scalar_reduce_(), and at::native::take_out_cpu_template().
TORCH_API void at::assert_no_overlap | ( | TensorImpl * | a, |
TensorImpl * | b | ||
) |
Definition at line 85 of file MemoryOverlap.cpp.
References caffe2.contrib.aten.docs.sample::a, at::native::metal::mpscnn::b, FULL, get_overlap_status(), PARTIAL, and TORCH_CHECK.
Definition at line 70 of file MemoryOverlap.cpp.
References caffe2.contrib.aten.docs.sample::a, assert_no_partial_overlap(), and at::native::metal::mpscnn::b.
Referenced by assert_no_partial_overlap(), at::TensorIteratorBase::compute_mem_overlaps(), at::native::gather_out_cpu_cuda(), at::native::masked_fill__cuda(), at::native::masked_fill_impl_cpu(), and at::native::take_out_cpu_template().
void at::assert_no_partial_overlap | ( | TensorImpl * | a, |
TensorImpl * | b | ||
) |
Definition at line 74 of file MemoryOverlap.cpp.
References caffe2.contrib.aten.docs.sample::a, at::native::metal::mpscnn::b, get_overlap_status(), PARTIAL, and TORCH_CHECK.
void at::batchedTensorForLoopFallback | ( | const c10::OperatorHandle & | op, |
torch::jit::Stack * | stack | ||
) |
Definition at line 245 of file BatchedFallback.cpp.
References areAllReturnsTensors(), areAnyArgumentsTensorList(), caffe2::argument, arguments, batch_sizes, batchedTensorInplaceForLoopFallback(), c10::SmallVectorTemplateCommon< T, typename >::begin(), computeIndex(), at::Tensor::defined(), torch::jit::drop(), c10::SmallVectorTemplateCommon< T, typename >::end(), c10::attr::idx(), index, c10::SmallVectorImpl< T >::insert(), caffe2::int64_t, isInplaceOp(), torch::jit::last(), at::MultiBatchVmapTransform::logicalToPhysical(), maybeGetBatchedImpl(), at::namedinference::num_batch_dims(), prod_intlist(), torch::jit::push(), c10::SmallVectorTemplateBase< T, isPodLike >::push_back(), caffe2::returns(), safeStack(), c10::SmallVectorTemplateCommon< T, typename >::size(), torch.distributions.constraints::stack, tensor, TORCH_CHECK, TORCH_INTERNAL_ASSERT, and warnFallback().
void at::batchedTensorInplaceForLoopFallback | ( | const c10::OperatorHandle & | op, |
torch::jit::Stack * | stack | ||
) |
Definition at line 87 of file BatchedFallback.cpp.
References caffe2::argument, arguments, batch_sizes, c10::SmallVectorTemplateCommon< T, typename >::begin(), computeIndex(), createVmapLevelsBitset(), at::Tensor::defined(), torch::jit::drop(), c10::SmallVectorTemplateCommon< T, typename >::end(), llvm::findLastSet(), c10::attr::idx(), index, caffe2::int64_t, torch::jit::last(), at::MultiBatchVmapTransform::logicalToPhysical(), maybeGetBatchedImpl(), at::namedinference::num_batch_dims(), prod_intlist(), torch::jit::push(), c10::SmallVectorTemplateBase< T, isPodLike >::push_back(), c10::SmallVectorTemplateCommon< T, typename >::size(), torch.distributions.constraints::stack, tensor, TORCH_CHECK, TORCH_INTERNAL_ASSERT, and warnFallback().
Referenced by batchedTensorForLoopFallback().
Tensor at::binary_pointwise_batching_rule | ( | const Tensor & | self, |
const Tensor & | other, | ||
ExtraArgs... | args | ||
) |
Definition at line 85 of file BatchingRegistrations.cpp.
References compare-fastrnn-results::args, dim, at::Tensor::dim(), isPhysicalScalarTensor(), at::MultiBatchVmapTransform::logicalToPhysical(), at::BroadcastingVmapTransform::logicalToPhysical(), at::meta::other, at::native::result_type(), and tensor.
Referenced by TORCH_LIBRARY_IMPL().
Definition at line 833 of file BatchingRegistrations.cpp.
References dim, at::Tensor::dim(), at::BroadcastingVmapTransform::logicalToPhysical(), torch.backends.cuda::matmul, at::meta::other, at::Tensor::sizes(), tensor, and TORCH_CHECK.
Referenced by TORCH_LIBRARY_IMPL().
TORCH_API void at::bumpRecordAllFunctions | ( | ) |
Definition at line 487 of file record_function.cpp.
Referenced by at::CallbackManager::addGlobalCallback(), at::CallbackManager::addThreadLocalCallback(), and at::ThreadLocalStateGuard::ThreadLocalStateGuard().
at::C10_DECLARE_REGISTRY | ( | CUDAHooksRegistry | , |
CUDAHooksInterface | , | ||
CUDAHooksArgs | |||
) |
at::C10_DECLARE_REGISTRY | ( | HIPHooksRegistry | , |
HIPHooksInterface | , | ||
HIPHooksArgs | |||
) |
at::C10_DECLARE_TYPED_REGISTRY | ( | ContextRegistry | , |
at::DeviceType | , | ||
at::BaseContext | , | ||
std::unique_ptr | , | ||
at::Device | |||
) |
at::C10_DEFINE_TYPED_REGISTRY | ( | ContextRegistry | , |
at::DeviceType | , | ||
at::BaseContext | , | ||
std::unique_ptr | , | ||
at::Device | |||
) |
Tensor at::cat_batching_rule | ( | TensorList | tensors, |
int64_t | dim | ||
) |
Definition at line 872 of file BatchingRegistrations.cpp.
References torch.distributions.constraints::cat, dim, c10::fmap(), at::MultiBatchVmapTransform::logicalToPhysical(), caffe2::tensors, TORCH_INTERNAL_ASSERT, and at::native::metal::view().
Referenced by TORCH_LIBRARY_IMPL().
|
inline |
Definition at line 40 of file ExpandUtils.h.
References AT_ERROR, at::Tensor::defined(), and caffe2::tensors.
Referenced by expand_inplace(), expand_outplace(), and expand_size().
TORCH_API void at::check_dim_size | ( | const Tensor & | tensor, |
int64_t | dim, | ||
int64_t | dim_size, | ||
int64_t | size | ||
) |
Definition at line 286 of file TensorUtils.cpp.
References dim, at::Tensor::dim(), size(), at::Tensor::size(), tensor, and TORCH_CHECK.
Referenced by at::meta::upsample_nearest1d_backward().
|
static |
Definition at line 52 of file NamedTensorUtils.cpp.
References fastrnns.bench::action, c10::ArrayRef< T >::begin(), c10::ArrayRef< T >::end(), caffe2::it, name, microbenchmarks::names, and TORCH_CHECK.
Referenced by unify_from_right().
|
inlinestatic |
Utility function to static cast input Generator* to the backend generator type (CPU/CUDAGeneratorImpl etc.)
Definition at line 113 of file Utils.h.
References caffe2.python.gradient_check_test::device_type, tools.autograd.gen_python_functions::gen(), and TORCH_CHECK.
std::array<int64_t, N> at::check_intlist | ( | ArrayRef< int64_t > | list, |
const char * | name, | ||
int | pos | ||
) |
Definition at line 74 of file Utils.h.
References AT_ERROR, c10::prim::list(), N, name, and caffe2::python::res.
void at::check_names_valid_for | ( | const Tensor & | tensor, |
DimnameList | names | ||
) |
Definition at line 51 of file NamedTensor.cpp.
References at::impl::check_names_valid_for(), microbenchmarks::names, tensor, and at::Tensor::unsafeGetTensorImpl().
Referenced by at::native::align_to(), and at::native::refine_names().
void at::check_names_valid_for | ( | size_t | tensor_dim, |
DimnameList | names | ||
) |
Definition at line 55 of file NamedTensor.cpp.
References check_unique_names(), kMaxNamedTensorDim, microbenchmarks::names, and TORCH_CHECK.
|
inline |
Definition at line 131 of file Utils.h.
References size(), TORCH_CHECK, and bench_ops::x.
Referenced by at::detail::empty_cpu(), at::native::empty_out(), at::native::empty_strided_cpu(), and new_qtensor().
|
static |
Definition at line 37 of file NamedTensor.cpp.
References caffe2::it, microbenchmarks::names, and TORCH_CHECK.
Referenced by check_names_valid_for().
|
static |
Definition at line 37 of file Dimname.cpp.
References at::Dimname::isValidName(), name, and TORCH_CHECK.
Referenced by at::Dimname::fromSymbol().
TORCH_API void at::checkAllContiguous | ( | CheckedFrom | c, |
at::ArrayRef< TensorArg > | ts | ||
) |
Definition at line 62 of file TensorUtils.cpp.
References c, checkContiguous(), and at::Tensor::defined().
TORCH_API void at::checkAllDefined | ( | CheckedFrom | c, |
ArrayRef< TensorArg > | ts | ||
) |
Definition at line 209 of file TensorUtils.cpp.
References c, and checkDefined().
void at::checkAllSame | ( | CheckedFrom | c, |
ArrayRef< TensorArg > | tensors, | ||
void(*)(CheckedFrom, const TensorArg &, const TensorArg &) | fn | ||
) |
Definition at line 85 of file TensorUtils.cpp.
References c, at::Tensor::defined(), caffe2.perfkernels.hp_emblookup_codegen::fn, and caffe2::tensors.
Referenced by checkAllSameGPU(), checkAllSameNumel(), checkAllSameSize(), and checkAllSameType().
TORCH_API void at::checkAllSameGPU | ( | CheckedFrom | c, |
ArrayRef< TensorArg > | tensors | ||
) |
Definition at line 150 of file TensorUtils.cpp.
References c, checkAllSame(), checkSameGPU(), and caffe2::tensors.
TORCH_API void at::checkAllSameNumel | ( | CheckedFrom | c, |
ArrayRef< TensorArg > | tensors | ||
) |
Definition at line 126 of file TensorUtils.cpp.
References c, checkAllSame(), checkSameNumel(), and caffe2::tensors.
void at::checkAllSameSize | ( | CheckedFrom | c, |
ArrayRef< TensorArg > | tensors | ||
) |
Definition at line 105 of file TensorUtils.cpp.
References c, checkAllSame(), checkSameSize(), and caffe2::tensors.
TORCH_API void at::checkAllSameType | ( | CheckedFrom | c, |
ArrayRef< TensorArg > | tensors | ||
) |
Definition at line 190 of file TensorUtils.cpp.
References c, checkAllSame(), checkSameType(), and caffe2::tensors.
TORCH_API void at::checkBackend | ( | CheckedFrom | c, |
at::ArrayRef< Tensor > | tensors, | ||
at::Backend | backend | ||
) |
Definition at line 224 of file TensorUtils.cpp.
References c, checkBackend(), and caffe2::tensors.
void at::checkBackend | ( | CheckedFrom | c, |
const Tensor & | t, | ||
Backend | backend | ||
) |
Definition at line 216 of file TensorUtils.cpp.
References c10::TensorOptions::backend(), c, at::Tensor::defined(), at::Tensor::options(), TORCH_CHECK, and c10::toString().
Referenced by at::native::addcdiv_out(), at::native::addcmul_out(), at::native::batch_norm_cpu(), and checkBackend().
|
static |
Definition at line 506 of file BatchingRegistrations.cpp.
References maximum_indexable_location(), at::namedinference::num_batch_dims(), sizes, at::Tensor::sizes(), c10::ArrayRef< T >::slice(), at::Tensor::storage_offset(), strides, at::Tensor::strides(), TORCH_CHECK, and c10::optional< T >::value_or().
Referenced by as_strided_batching_rule().
|
static |
Definition at line 473 of file BatchingRegistrations.cpp.
References c10::ArrayRef< T >::begin(), c10::ArrayRef< T >::end(), at::namedinference::num_batch_dims(), and TORCH_CHECK.
Referenced by as_strided_batching_rule().
TORCH_API void at::checkContiguous | ( | CheckedFrom | c, |
const TensorGeometryArg & | t | ||
) |
Definition at line 55 of file TensorUtils.cpp.
References c, at::Tensor::is_contiguous(), and TORCH_CHECK.
Referenced by at::native::_embedding_bag_backward(), at::native::_embedding_bag_per_sample_weights_backward_cpu_template(), and checkAllContiguous().
TORCH_API void at::checkDefined | ( | CheckedFrom | c, |
const TensorArg & | t | ||
) |
Definition at line 202 of file TensorUtils.cpp.
References c, at::Tensor::defined(), and TORCH_CHECK.
Referenced by checkAllDefined().
TORCH_API void at::checkDeviceType | ( | CheckedFrom | c, |
at::ArrayRef< Tensor > | tensors, | ||
at::DeviceType | device_type | ||
) |
Definition at line 238 of file TensorUtils.cpp.
References c, checkDeviceType(), caffe2.python.gradient_check_test::device_type, and caffe2::tensors.
void at::checkDeviceType | ( | CheckedFrom | c, |
const Tensor & | t, | ||
DeviceType | device_type | ||
) |
Definition at line 230 of file TensorUtils.cpp.
References c, at::Tensor::defined(), at::Tensor::device(), caffe2.python.gradient_check_test::device_type, TORCH_CHECK, and c10::Device::type().
Referenced by _apply_preamble(), checkDeviceType(), at::native::einsum(), at::native::inner(), and at::native::inner_out().
TORCH_API void at::checkDim | ( | CheckedFrom | c, |
const Tensor & | tensor, | ||
const char * | name, | ||
int | pos, | ||
int64_t | dim | ||
) |
Definition at line 22 of file TensorUtils.cpp.
References c, dim, at::Tensor::dim(), name, tensor, and TORCH_CHECK.
Referenced by at::native::adaptive_avg_pool1d(), at::native::adaptive_max_pool1d(), at::native::avg_pool1d(), at::native::bmm_out_or_baddbmm_(), checkSize(), at::native::embedding_renorm_cpu_(), and at::native::max_pool1d_with_indices().
TORCH_API void at::checkDim | ( | CheckedFrom | c, |
const TensorGeometryArg & | t, | ||
int64_t | dim | ||
) |
Definition at line 41 of file TensorUtils.cpp.
References c, dim, at::Tensor::dim(), and TORCH_CHECK.
TORCH_API void at::checkDimRange | ( | CheckedFrom | c, |
const TensorGeometryArg & | t, | ||
int64_t | dim_start, | ||
int64_t | dim_end | ||
) |
Definition at line 47 of file TensorUtils.cpp.
References c, at::Tensor::dim(), and TORCH_CHECK.
|
static |
Definition at line 21 of file DynamicLibrary.cpp.
References AT_ERROR, and bench_ops::x.
Referenced by at::DynamicLibrary::DynamicLibrary(), and at::DynamicLibrary::sym().
|
inlinestatic |
Definition at line 51 of file Utils.h.
References AT_ERROR, caffe2.python.gradient_check_test::device_type, name, detail::scalar_type(), and caffe2::tensors.
|
inlinestatic |
Definition at line 29 of file Utils.h.
References AT_ERROR, at::Tensor::defined(), at::Tensor::device(), caffe2.python.gradient_check_test::device_type, at::Tensor::layout(), name, at::Tensor::scalar_type(), detail::scalar_type(), c10::Device::type(), and at::Tensor::unsafeGetTensorImpl().
Referenced by at::native::legacy::cuda::_th_copy_ignoring_overlaps_(), at::native::legacy::cuda::_th_cross_kernel(), at::native::legacy::cuda::_th_cross_kernel_out(), at::native::legacy::cuda::_th_gels(), at::native::legacy::cpu::_th_gels(), at::native::legacy::cuda::_th_gels_out(), at::native::legacy::cpu::_th_gels_out(), at::native::legacy::cuda::_th_geqrf(), at::native::legacy::cpu::_th_geqrf(), at::native::legacy::cuda::_th_geqrf_out(), at::native::legacy::cpu::_th_geqrf_out(), at::native::legacy::cpu::_th_histc(), at::native::legacy::cpu::_th_histc_out(), at::native::legacy::cuda::_th_index_copy_(), at::native::legacy::cpu::_th_index_copy_(), at::native::legacy::cuda::_th_masked_fill_(), at::native::legacy::cuda::_th_masked_fill_bool_(), at::native::legacy::cuda::_th_mode(), at::native::legacy::cpu::_th_mode(), at::native::legacy::cuda::_th_mode_out(), at::native::legacy::cpu::_th_mode_out(), at::native::legacy::cpu::_th_nonzero(), at::native::legacy::cpu::_th_nonzero_out(), at::native::legacy::cpu::_th_ormqr(), at::native::legacy::cpu::_th_ormqr_out(), at::native::legacy::cuda::_th_put_(), at::native::legacy::cpu::_th_put_(), at::native::legacy::cuda::_th_renorm(), at::native::legacy::cpu::_th_renorm(), at::native::legacy::cuda::_th_renorm_(), at::native::legacy::cpu::_th_renorm_(), at::native::legacy::cuda::_th_renorm_out(), at::native::legacy::cpu::_th_renorm_out(), at::native::legacy::cuda::_th_sort(), at::native::legacy::cuda::_th_sort_out(), at::native::legacy::cpu::_th_std(), at::native::legacy::cuda::_th_topk(), at::native::legacy::cuda::_th_topk_out(), at::native::legacy::cpu::_th_var(), at::native::legacy::cuda::_thnn_conv2d_backward(), at::native::legacy::cuda::_thnn_conv2d_backward_out(), at::native::legacy::cuda::_thnn_conv2d_forward(), at::native::legacy::cuda::_thnn_conv2d_forward_out(), at::native::legacy::cuda::_thnn_conv_depthwise2d_backward(), at::native::legacy::cuda::_thnn_conv_depthwise2d_backward_out(), at::native::legacy::cuda::_thnn_conv_depthwise2d_forward(), at::native::legacy::cuda::_thnn_conv_depthwise2d_forward_out(), at::native::legacy::cuda::_thnn_glu_backward(), at::native::legacy::cuda::_thnn_glu_backward_out(), at::native::legacy::cuda::_thnn_glu_forward(), at::native::legacy::cuda::_thnn_glu_forward_out(), at::native::legacy::cuda::_thnn_log_sigmoid_backward(), at::native::legacy::cuda::_thnn_log_sigmoid_backward_out(), at::native::legacy::cuda::_thnn_log_sigmoid_forward(), at::native::legacy::cuda::_thnn_log_sigmoid_forward_out(), at::native::legacy::cuda::_thnn_multi_margin_loss_backward(), at::native::legacy::cuda::_thnn_multi_margin_loss_backward_out(), at::native::legacy::cuda::_thnn_multi_margin_loss_forward(), at::native::legacy::cuda::_thnn_multi_margin_loss_forward_out(), at::native::legacy::cuda::_thnn_multilabel_margin_loss_backward(), at::native::legacy::cuda::_thnn_multilabel_margin_loss_backward_out(), at::native::legacy::cuda::_thnn_multilabel_margin_loss_forward(), at::native::legacy::cuda::_thnn_multilabel_margin_loss_forward_out(), at::native::legacy::cuda::_thnn_nll_loss2d_backward(), at::native::legacy::cuda::_thnn_nll_loss2d_backward_out(), at::native::legacy::cuda::_thnn_nll_loss2d_forward(), at::native::legacy::cuda::_thnn_nll_loss2d_forward_out(), at::native::legacy::cuda::_thnn_nll_loss_backward(), at::native::legacy::cuda::_thnn_nll_loss_backward_out(), at::native::legacy::cuda::_thnn_nll_loss_forward(), at::native::legacy::cuda::_thnn_nll_loss_forward_out(), at::native::legacy::cuda::_thnn_rrelu_with_noise_forward(), at::native::legacy::cuda::_thnn_rrelu_with_noise_forward_(), and at::native::legacy::cuda::_thnn_rrelu_with_noise_forward_out().
TORCH_API void at::checkLayout | ( | CheckedFrom | c, |
at::ArrayRef< Tensor > | tensors, | ||
at::Layout | layout | ||
) |
Definition at line 252 of file TensorUtils.cpp.
References c, checkLayout(), caffe2::layout, and caffe2::tensors.
TORCH_API void at::checkLayout | ( | CheckedFrom | c, |
const Tensor & | t, | ||
Layout | layout | ||
) |
Definition at line 244 of file TensorUtils.cpp.
References c, at::Tensor::defined(), at::Tensor::layout(), caffe2::layout, and TORCH_CHECK.
Referenced by _apply_preamble(), and checkLayout().
TORCH_API void at::checkNumel | ( | CheckedFrom | c, |
const TensorGeometryArg & | t, | ||
int64_t | numel | ||
) |
Definition at line 109 of file TensorUtils.cpp.
References c, at::Tensor::numel(), numel(), and TORCH_CHECK.
TORCH_API bool at::checkRecordAllFunctions | ( | ) |
Definition at line 495 of file record_function.cpp.
Referenced by at::ThreadLocalState::ThreadLocalState().
TORCH_API void at::checkSameDim | ( | CheckedFrom | c, |
const TensorGeometryArg & | t1, | ||
const TensorGeometryArg & | t2 | ||
) |
Definition at line 194 of file TensorUtils.cpp.
References c, pyspybench::t1, pyspybench::t2, and TORCH_CHECK.
TORCH_API void at::checkSameGPU | ( | CheckedFrom | c, |
const TensorArg & | t1, | ||
const TensorArg & | t2 | ||
) |
Definition at line 130 of file TensorUtils.cpp.
References AT_ERROR, c, pyspybench::t1, pyspybench::t2, and TORCH_CHECK.
Referenced by checkAllSameGPU().
void at::checkSameNumel | ( | CheckedFrom | c, |
const TensorArg & | t1, | ||
const TensorArg & | t2 | ||
) |
Definition at line 117 of file TensorUtils.cpp.
References c, pyspybench::t1, pyspybench::t2, and TORCH_CHECK.
Referenced by checkAllSameNumel().
TORCH_API void at::checkSameNumel | ( | CheckedFrom | c, |
const TensorGeometryArg & | t1, | ||
const TensorGeometryArg & | t2 | ||
) |
TORCH_API void at::checkSameSize | ( | CheckedFrom | c, |
const TensorArg & | t1, | ||
const TensorArg & | t2 | ||
) |
Definition at line 97 of file TensorUtils.cpp.
References c, pyspybench::t1, pyspybench::t2, and TORCH_CHECK.
Referenced by checkAllSameSize(), at::native::dequantize_tensor_per_channel_affine(), at::native::dequantize_tensor_per_channel_float_qparams(), at::native::dequantize_tensor_per_tensor_affine(), at::native::log_softmax_backward_cpu(), at::native::quantize_tensor_per_channel_affine(), at::native::quantize_tensor_per_channel_float_qparams(), at::native::quantize_tensor_per_tensor_affine(), at::native::softmax_backward_cpu(), and at::native::softmax_backward_sparse_input_preprocessing().
TORCH_API void at::checkSameType | ( | CheckedFrom | c, |
const TensorArg & | t1, | ||
const TensorArg & | t2 | ||
) |
Definition at line 154 of file TensorUtils.cpp.
References c, pyspybench::t1, pyspybench::t2, and TORCH_CHECK.
Referenced by at::native::_embedding_bag_backward(), at::native::_embedding_bag_cpu_impl(), and checkAllSameType().
TORCH_API void at::checkScalarType | ( | CheckedFrom | c, |
const TensorArg & | t, | ||
ScalarType | ty | ||
) |
Definition at line 162 of file TensorUtils.cpp.
References c, at::Tensor::scalar_type(), TORCH_CHECK, at::Tensor::toString(), and c10::toString().
TORCH_API void at::checkScalarTypes | ( | CheckedFrom | c, |
const TensorArg & | t, | ||
at::ArrayRef< ScalarType > | l | ||
) |
Definition at line 170 of file TensorUtils.cpp.
References AT_ERROR, c10::ArrayRef< T >::begin(), c, c10::ArrayRef< T >::end(), at::Tensor::scalar_type(), at::Tensor::toString(), and c10::toString().
Referenced by at::native::_embedding_bag_backward(), at::native::_embedding_bag_cpu_impl(), at::native::_embedding_bag_dense_backward_cpu(), at::native::_embedding_bag_per_sample_weights_backward_cpu_template(), at::native::embedding(), at::native::embedding_dense_backward_cpu(), at::native::embedding_renorm_cpu_(), and at::native::embedding_sparse_backward().
TORCH_API void at::checkSize | ( | CheckedFrom | c, |
const TensorGeometryArg & | t, | ||
int64_t | dim, | ||
int64_t | size | ||
) |
Definition at line 77 of file TensorUtils.cpp.
References c, dim, size(), at::Tensor::size(), and TORCH_CHECK.
TORCH_API void at::checkSize | ( | CheckedFrom | c, |
const TensorGeometryArg & | t, | ||
IntArrayRef | sizes | ||
) |
Definition at line 69 of file TensorUtils.cpp.
References c, checkDim(), c10::ArrayRef< T >::equals(), sizes, at::Tensor::sizes(), and TORCH_CHECK.
Definition at line 182 of file BatchingRegistrations.cpp.
References at::native::chunk(), dim, and at::MultiBatchVmapTransform::logicalToPhysical().
Referenced by TORCH_LIBRARY_IMPL().
Tensor at::clamp_batching_rule | ( | const Tensor & | self, |
optional< Scalar > | min, | ||
optional< Scalar > | max | ||
) |
Definition at line 190 of file BatchingRegistrations.cpp.
References c10::aten::clamp(), at::MultiBatchVmapTransform::logicalToPhysical(), max, and min.
Referenced by TORCH_LIBRARY_IMPL().
Definition at line 202 of file BatchingRegistrations.cpp.
References at::native::clamp_max(), at::MultiBatchVmapTransform::logicalToPhysical(), and max.
Referenced by TORCH_LIBRARY_IMPL().
Definition at line 196 of file BatchingRegistrations.cpp.
References at::native::clamp_min(), at::MultiBatchVmapTransform::logicalToPhysical(), and min.
Referenced by TORCH_LIBRARY_IMPL().
TORCH_API void at::clearCallbacks | ( | ) |
Definition at line 389 of file record_function.cpp.
References matmul_dlmc_bench::m.
Referenced by main(), and THPAutograd_initExtension().
TORCH_API void at::clearGlobalCallbacks | ( | ) |
clearGlobalCallbacks removes all global callbacks WARNING: not thread safe
Definition at line 381 of file record_function.cpp.
TORCH_API void at::clearThreadLocalCallbacks | ( | ) |
clearThreadLocalCallbacks removes all thread local callbacks
Definition at line 385 of file record_function.cpp.
Tensor at::clone_batching_rule | ( | const Tensor & | self, |
optional< MemoryFormat > | memory_format | ||
) |
Definition at line 718 of file BatchingRegistrations.cpp.
References at::native::clone(), at::MultiBatchVmapTransform::logicalToPhysical(), makeBatched(), c10::memory_format(), TORCH_CHECK, TORCH_INTERNAL_ASSERT, and unsafeGetBatchedImpl().
Referenced by TORCH_LIBRARY_IMPL().
|
inline |
Definition at line 28 of file CPUApplyUtils.h.
References caffe2::dims, caffe2::int64_t, sizes, strides, and TORCH_CHECK.
Referenced by at::strided_tensor_iter< T >::strided_tensor_iter(), and at::strided_tensor_iter_fixed< T, N >::strided_tensor_iter_fixed().
Tensor at::comparison_pointwise_batching_rule | ( | const Tensor & | self, |
const Tensor & | other | ||
) |
Definition at line 1010 of file BatchingRegistrations.cpp.
References at::BroadcastingVmapTransform::logicalToPhysical(), at::meta::other, and tensor.
|
static |
Definition at line 82 of file VmapTransforms.cpp.
References dim, c10::SmallVectorImpl< T >::emplace_back(), caffe2::int64_t, kVmapNumLevels, and caffe2.python.onnx.frontend::level.
Referenced by at::VmapPhysicalToLogicalMap::apply().
|
static |
Definition at line 13 of file BatchedFallback.cpp.
References matmul_dlmc_bench::end, caffe2::it, c10::SmallVectorTemplateBase< T, isPodLike >::push_back(), at::native::remainder(), c10::SmallVectorImpl< T >::reserve(), fastrnns.custom_lstms::reverse(), and sizes.
Referenced by batchedTensorForLoopFallback(), and batchedTensorInplaceForLoopFallback().
Tensor at::contiguous_batching_rule | ( | const Tensor & | self, |
MemoryFormat | memory_format | ||
) |
Definition at line 445 of file BatchingRegistrations.cpp.
References at::MultiBatchVmapTransform::logicalToPhysical(), c10::memory_format(), and TORCH_CHECK.
Referenced by TORCH_LIBRARY_IMPL().
|
inlinestatic |
Definition at line 247 of file Context.h.
References c10::CPU, at::DeprecatedTypePropertiesRegistry::getDeprecatedTypeProperties(), and globalDeprecatedTypePropertiesRegistry().
Referenced by torch::jit::initJITBindings(), and torch::jit::Unpickler::readInstruction().
|
static |
Definition at line 22 of file CPUFixedAllocator.h.
References caffe2::function, and state.
|
static |
Definition at line 14 of file CPUFixedAllocator.h.
References AT_ERROR.
|
static |
Definition at line 18 of file CPUFixedAllocator.h.
References AT_ERROR.
|
inline |
Definition at line 345 of file CPUApplyUtils.h.
References _apply_preamble(), _max_dim_tensors(), apply_op(), and at::Tensor::numel().
|
inline |
Definition at line 367 of file CPUApplyUtils.h.
References _apply_preamble(), _max_dim_tensors(), apply_op(), and at::Tensor::numel().
|
inline |
Definition at line 395 of file CPUApplyUtils.h.
References _apply_preamble(), _max_dim_tensors(), apply_op(), and at::Tensor::numel().
|
inline |
Definition at line 117 of file BatchedTensorImpl.h.
Referenced by at::BatchedTensorImpl::actualDim(), and permuteBatchDimsToFront().
|
inline |
Definition at line 157 of file context_base.h.
References device.
Referenced by caffe2::ContextFromProto(), caffe2::python::TensorFetcher::FetchTensor(), and caffe2::TensorSerializer::Serialize().
|
inline |
Definition at line 126 of file BatchedTensorImpl.h.
Referenced by batchedTensorInplaceForLoopFallback(), getLevelsAndLargestLogicalDim(), getPhysicalTensorAndLevels(), inplaceIsVmapCompatible(), and at::MultiBatchVmapTransform::logicalToPhysical().
|
inlinestatic |
Definition at line 252 of file Context.h.
References c10::CUDA, at::DeprecatedTypePropertiesRegistry::getDeprecatedTypeProperties(), and globalDeprecatedTypePropertiesRegistry().
DimnameList at::default_names | ( | size_t | len | ) |
Definition at line 29 of file NamedTensor.cpp.
References kMaxNamedTensorDim, c10::aten::len(), TORCH_INTERNAL_ASSERT, and at::Dimname::wildcard().
Referenced by at::impl::get_names().
|
inline |
Definition at line 18 of file Formatting.cpp.
Referenced by __printFormat(), print(), and printScale().
void at::deleter | ( | DLManagedTensor * | arg | ) |
Definition at line 181 of file DLConvertor.cpp.
References setup::arg.
Referenced by torch::jit::tensorexpr::LoopNest::eliminateDeadStores(), torch::from_blob(), fromDLPack(), c10::InefficientStdFunctionContext::makeDataPtr(), and toDLPack().
Return the Device of a Tensor, if the Tensor is defined.
Definition at line 17 of file DeviceGuard.h.
References at::Tensor::defined(), at::Tensor::device(), c10::make_optional(), and c10::nullopt.
Referenced by at::native::legacy::cuda::_thnn_conv2d_backward(), at::native::legacy::cuda::_thnn_conv2d_backward_out(), at::native::legacy::cuda::_thnn_conv2d_forward(), at::native::legacy::cuda::_thnn_conv2d_forward_out(), at::native::legacy::cuda::_thnn_conv_depthwise2d_backward(), at::native::legacy::cuda::_thnn_conv_depthwise2d_backward_out(), at::native::legacy::cuda::_thnn_conv_depthwise2d_forward(), at::native::legacy::cuda::_thnn_conv_depthwise2d_forward_out(), at::native::legacy::cuda::_thnn_glu_backward(), at::native::legacy::cuda::_thnn_glu_backward_out(), at::native::legacy::cuda::_thnn_glu_forward(), at::native::legacy::cuda::_thnn_glu_forward_out(), at::native::legacy::cuda::_thnn_log_sigmoid_backward(), at::native::legacy::cuda::_thnn_log_sigmoid_backward_out(), at::native::legacy::cuda::_thnn_log_sigmoid_forward(), at::native::legacy::cuda::_thnn_log_sigmoid_forward_out(), at::native::legacy::cuda::_thnn_multi_margin_loss_backward(), at::native::legacy::cuda::_thnn_multi_margin_loss_backward_out(), at::native::legacy::cuda::_thnn_multi_margin_loss_forward(), at::native::legacy::cuda::_thnn_multi_margin_loss_forward_out(), at::native::legacy::cuda::_thnn_multilabel_margin_loss_backward(), at::native::legacy::cuda::_thnn_multilabel_margin_loss_backward_out(), at::native::legacy::cuda::_thnn_multilabel_margin_loss_forward(), at::native::legacy::cuda::_thnn_multilabel_margin_loss_forward_out(), at::native::legacy::cuda::_thnn_nll_loss2d_backward(), at::native::legacy::cuda::_thnn_nll_loss2d_backward_out(), at::native::legacy::cuda::_thnn_nll_loss2d_forward(), at::native::legacy::cuda::_thnn_nll_loss2d_forward_out(), at::native::legacy::cuda::_thnn_nll_loss_backward(), at::native::legacy::cuda::_thnn_nll_loss_backward_out(), at::native::legacy::cuda::_thnn_nll_loss_forward(), at::native::legacy::cuda::_thnn_nll_loss_forward_out(), at::native::legacy::cuda::_thnn_rrelu_with_noise_forward(), at::native::legacy::cuda::_thnn_rrelu_with_noise_forward_(), at::native::legacy::cuda::_thnn_rrelu_with_noise_forward_out(), torch::autograd::InputBuffer::add(), device_of(), torch::autograd::dispatch_contiguous(), torch::autograd::dispatch_copy_(), torch::autograd::dispatch_invert(), torch::autograd::dispatch_nonzero(), torch::autograd::dispatch_nonzero_numpy(), torch::autograd::dispatch_range(), torch::autograd::dispatch_to_Bool(), torch::autograd::dispatch_to_CComplexDouble(), torch::autograd::dispatch_to_CDouble(), torch::autograd::dispatch_to_CLong(), torch::autograd::Engine::evaluate_function(), tools.codegen.dest.register_dispatch_key.RegisterDispatchKey::gen_unstructured(), at::Tensor::index(), at::Tensor::index_put_(), torch::autograd::THPVariable_getitem(), torch::autograd::THPVariable_new(), torch::autograd::THPVariable_new_ones(), torch::autograd::THPVariable_new_tensor(), and torch::autograd::THPVariable_setitem().
|
inline |
Return the Device of a TensorList, if the list is non-empty and the first Tensor is defined.
(This function implicitly assumes that all tensors in the list have the same device.)
Definition at line 28 of file DeviceGuard.h.
References device_of(), and c10::nullopt.
Tensor at::diagonal_backward_batching_rule | ( | const Tensor & | grad, |
IntArrayRef | input_sizes, | ||
int64_t | offset, | ||
int64_t | dim1, | ||
int64_t | dim2 | ||
) |
Definition at line 390 of file BatchingRegistrations.cpp.
References getGradInputPhysicalDim(), grad, at::MultiBatchVmapTransform::logicalToPhysical(), caffe2.perfkernels.hp_emblookup_codegen::offset, and caffe2::zeros.
Referenced by TORCH_LIBRARY_IMPL().
Tensor at::diagonal_batching_rule | ( | const Tensor & | self, |
int64_t | offset, | ||
int64_t | dim1, | ||
int64_t | dim2 | ||
) |
Definition at line 382 of file BatchingRegistrations.cpp.
References at::native::diagonal(), at::MultiBatchVmapTransform::logicalToPhysical(), and caffe2.perfkernels.hp_emblookup_codegen::offset.
Referenced by TORCH_LIBRARY_IMPL().
|
inlinestatic |
Definition at line 15 of file WrapDimUtilsMulti.h.
References dim, dim_bitset_size, caffe2::dims, caffe2::int64_t, maybe_wrap_dim(), and TORCH_CHECK.
Referenced by at::native::_sparse_sum(), at::native::_sparse_sum_backward_cpu(), at::native::_trilinear(), at::namedinference::compute_included_idxs(), at::native::flip_cpu(), at::native::make_dim_mask(), at::native::squeeze_multiple(), at::native::sumproduct_pair(), at::native::tensordot(), and torch::autograd::generated::details::unsqueeze_multiple().
Definition at line 16 of file NamedTensorUtils.cpp.
References dim, at::Tensor::has_names(), caffe2::it, at::Tensor::names(), microbenchmarks::names, tensor, toDimnameRepr(), TORCH_CHECK, and WILDCARD.
Referenced by at::native::_sparse_log_softmax(), at::native::_sparse_softmax(), at::native::cat(), at::native::cat_out(), at::native::cummax(), at::native::cummax_out(), at::native::cummin(), at::native::cummin_out(), at::native::cumprod(), at::native::cumprod_(), at::native::cumprod_out(), at::native::cumsum(), at::native::cumsum_(), at::native::cumsum_out(), at::native::diagonal(), dimnames_to_positions(), at::native::flatten(), at::native::index_fill(), at::native::index_fill_(), at::native::kthvalue(), at::native::kthvalue_out(), at::native::log_softmax(), at::native::logcumsumexp(), at::native::logcumsumexp_out(), at::native::max(), at::native::max_out(), at::native::median(), at::native::median_out(), at::native::min(), at::native::min_out(), at::native::mode(), at::native::mode_out(), at::native::nanmedian(), at::native::nanmedian_out(), at::native::prod(), at::native::prod_out(), at::native::select(), at::native::size(), at::native::softmax(), at::native::squeeze(), at::native::stride(), at::native::transpose(), at::native::unbind(), and at::native::unflatten().
TORCH_API std::vector< int64_t > at::dimnames_to_positions | ( | const Tensor & | tensor, |
DimnameList | dims | ||
) |
Definition at line 30 of file NamedTensorUtils.cpp.
References dimname_to_position(), caffe2::dims, name, and tensor.
Referenced by at::native::flatten(), at::native::logsumexp(), at::native::logsumexp_out(), at::native::mean(), at::native::mean_out(), at::native::mean_out_quantized_cpu(), at::native::mean_quantized_cpu(), at::native::norm(), at::native::norm_out(), at::native::std(), at::native::std_mean(), at::native::std_out(), at::native::sum(), at::native::sum_out(), at::native::var(), at::native::var_mean(), and at::native::var_out().
|
inline |
Definition at line 8 of file Parallel.h.
References bench_ops::x, and at::native::metal::mpscnn::y.
Referenced by at::internal::calc_num_tasks_and_chunk_size(), at::native::PoolingParams1D::valid_output_end(), and at::native::PoolingParams1D::valid_output_start().
Definition at line 797 of file BatchingRegistrations.cpp.
References dim, at::Tensor::dim(), isBatchedTensor(), at::MultiBatchVmapTransform::logicalToPhysical(), torch.backends.cuda::matmul, at::meta::other, at::Tensor::sizes(), tensor, TORCH_CHECK, TORCH_INTERNAL_ASSERT, and at::native::unsqueeze().
Referenced by TORCH_LIBRARY_IMPL().
TORCH_API void at::enableRecordFunction | ( | bool | enable | ) |
enableRecordFunction enables RecordFunction thread locally
Definition at line 399 of file record_function.cpp.
References torch.jit._state::enable().
Referenced by main(), at::RecordFunctionGuard::RecordFunctionGuard(), torch::jit::mobile::InterpreterState::run(), THPAutograd_initExtension(), and at::RecordFunctionGuard::~RecordFunctionGuard().
|
inline |
Definition at line 60 of file NumericUtils.h.
References exp(), value, and bench_ops::x.
Referenced by at::native::_kl_div_log_target(), exp(), exp< double >(), at::transformation::log_normal(), at::native::logsumexp_out_impl(), at::native::poisson_nll_loss(), at::native::soft_margin_loss_backward_out(), and TORCH_LIBRARY_IMPL().
|
inline |
Definition at line 71 of file NumericUtils.h.
References exp(), and bench_ops::x.
Tensor at::expand_batching_rule | ( | const Tensor & | self, |
IntArrayRef | size, | ||
bool | implicit | ||
) |
Definition at line 144 of file BatchingRegistrations.cpp.
References c10::SmallVectorTemplateCommon< T, typename >::begin(), c10::aten::copy(), at::MultiBatchVmapTransform::logicalToPhysical(), size(), TORCH_CHECK, and TORCH_INTERNAL_ASSERT.
Referenced by TORCH_LIBRARY_IMPL().
Definition at line 48 of file ExpandUtils.h.
References c10::ArrayRef< T >::equals(), torch.jit._trace::make_tuple(), at::Tensor::sizes(), and tensor.
Referenced by at::native::templates::cuda::bernoulli_kernel(), at::indexing::copy_to(), expand_inplace(), torch::utils::map2_(), torch::utils::map_(), at::native::masked_fill__cuda(), and at::native::masked_scatter__cpu().
|
inline |
Definition at line 56 of file ExpandUtils.h.
References check_defined(), expand_inplace(), and tensor.
|
inline |
Definition at line 61 of file ExpandUtils.h.
References c10::ArrayRef< T >::equals(), torch.jit._trace::make_tuple(), at::Tensor::sizes(), and tensor.
|
inline |
Definition at line 71 of file ExpandUtils.h.
References check_defined(), expand_inplace(), and tensor.
|
inline |
Definition at line 77 of file ExpandUtils.h.
References c10::ArrayRef< T >::equals(), infer_size(), torch.jit._trace::make_tuple(), and at::Tensor::sizes().
Referenced by at::native::broadcast_tensors(), expand_outplace(), at::native::make_info(), at::native::masked_fill(), at::native::masked_scatter(), at::native::masked_select_out_impl_cpu(), and at::native::where().
|
inline |
Definition at line 88 of file ExpandUtils.h.
References check_defined(), and expand_outplace().
|
inline |
Definition at line 93 of file ExpandUtils.h.
References c10::ArrayRef< T >::equals(), infer_size(), torch.jit._trace::make_tuple(), and at::Tensor::sizes().
|
inline |
Definition at line 108 of file ExpandUtils.h.
References check_defined(), and expand_outplace().
|
inline |
Definition at line 129 of file ExpandUtils.h.
References infer_size(), c10::ArrayRef< T >::size(), sizes, and c10::ArrayRef< T >::vec().
|
inline |
Definition at line 116 of file ExpandUtils.h.
References c10::ArrayRef< T >::equals(), torch.jit._trace::make_tuple(), sizes, and at::Tensor::sizes().
Referenced by at::native::_sparse_sum_backward_cpu(), at::native::addbmm_out(), at::native::addmm_cpu_out(), at::native::addmm_out_sparse_dense_cpu(), at::native::addmm_sparse_dense_cpu(), at::native::baddbmm_out_cpu(), at::native::build_addr_iter(), and expand_size().
|
inline |
Definition at line 124 of file ExpandUtils.h.
References check_defined(), expand_size(), and sizes.
Definition at line 236 of file BatchingRegistrations.cpp.
References at::MultiBatchVmapTransform::logicalToPhysical(), and value.
Referenced by TORCH_LIBRARY_IMPL().
Definition at line 242 of file BatchingRegistrations.cpp.
References isBatchedTensor(), at::MultiBatchVmapTransform::logicalToPhysical(), at::BroadcastingVmapTransform::logicalToPhysical(), tensor, and value.
Referenced by TORCH_LIBRARY_IMPL().
|
static |
Chooses a dimension over which to parallelize.
Prefers the outer-most dimension thats larger than the number of available threads.
Definition at line 70 of file TensorIteratorReduce.cpp.
References AT_ASSERT, dim, get_num_threads(), iter, and shape.
Referenced by at::TensorIteratorBase::foreach_reduced_elt(), and parallel_dim_reduction().
|
inline |
Definition at line 288 of file CPUApplyUtils.h.
Referenced by torch.distributed.nn.api.remote_module._RemoteModule::__init__(), torch::autograd::Function< T >::apply(), apply_op(), torch::python::bind_module(), torch.fx.graph.Graph::call_module(), torch.fx.symbolic_trace.Tracer::call_module(), torch.utils.checkpoint::checkpoint_sequential(), torch.quantization.fx.utils::collect_producer_nodes(), torch._jit_internal::export(), torch::onnx::export(), fastrnns.factory::flatten_list(), torch.jit._async::fork(), forward(), torch.jit._freeze::freeze(), operator_benchmark.benchmark_test_generator::generate_pt_tests_from_op_list(), torch::nn::AnyModule::get_(), audio_text_models::get_deepspeech(), vision_models::get_detr(), vision_models::get_fcn_resnet(), audio_text_models::get_multiheadattn(), torch.distributed.pipeline.sync.phony::get_phony(), vision_models::get_resnet18(), ppl_models::get_robust_regression(), ppl_models::get_simple_regression(), audio_text_models::get_transformer(), audio_text_models::get_wav2letter(), torch._jit_internal::ignore(), torch.distributed.pipeline.sync.checkpoint::is_recomputing(), torch::jit::isinstance(), fastrnns.factory::layernorm_pytorch_lstm_creator(), torch.fx.subgraph_rewriter::replace_pattern(), operator_benchmark.benchmark_caffe2.Caffe2OperatorTestCase::run_forward(), torch.jit._serialization::save(), torch.jit._script::script(), torch.distributed.pipeline.sync.skip.skippable::skippable(), torch.jit._trace::trace(), torch.jit._trace::trace_module(), torch._jit_internal::unused(), fastrnns.factory::varlen_pytorch_lstm_creator(), and torch::detail::wrap_pybind_function_impl_().
|
inline |
Definition at line 291 of file CPUApplyUtils.h.
References forward(), caffe2::int64_t, iter, and caffe2.perfkernels.hp_emblookup_codegen::offset.
|
inline |
Definition at line 95 of file Functions.h.
|
inline |
Definition at line 126 of file Functions.h.
|
inline |
Definition at line 69 of file Functions.h.
Referenced by torch::jit::tensorexpr::constructTensors(), caffe2::Workspace::CopyForwardedTensors(), at::sparse::flatten_indices(), torch::from_blob(), fromDLPack(), at::native::index_select_sparse(), parse_conv_serialized_state(), serialize_conv(), caffe2::ATenOp< Context >::tensorWrapping(), and PackedEmbeddingBagWeight::unpack().
|
inline |
Definition at line 103 of file Functions.h.
TORCH_API Tensor at::fromDLPack | ( | const DLManagedTensor * | src | ) |
Definition at line 208 of file DLConvertor.cpp.
References deleter(), device, from_blob(), getATenDevice(), caffe2::src, and toScalarType().
Referenced by THPModule_fromDLPack().
TORCH_API bool at::geometry_is_contiguous | ( | IntArrayRef | sizes, |
IntArrayRef | strides | ||
) |
Definition at line 267 of file TensorUtils.cpp.
References dim, caffe2::int64_t, sizes, and strides.
Referenced by at::TensorGeometry::is_contiguous().
TORCH_API std::string at::get_cxx_flags | ( | ) |
Definition at line 201 of file Version.cpp.
References caffe2::GetBuildOptions(), and TORCH_CHECK.
Referenced by THPModule_cxxFlags().
int64_t at::get_device | ( | Tensor | self | ) |
Referenced by torch.distributed.pipeline.sync.copy.Copy::backward(), torch.distributed.pipeline.sync.copy.Copy::forward(), torch.distributed.pipeline.sync.skip.portal.PortalCopy::forward(), torch.nn.parallel._functions.ReduceAddCoalesced::forward(), torch.nn.parallel._functions.Broadcast::forward(), torch::cuda::nccl::detail::get_communicators(), torch.nn.parallel.parallel_apply::parallel_apply(), and torch::autograd::THPVariable_get_device().
|
inlinestatic |
Utility function used in tensor implementations, which supplies the default generator to tensors, if an input generator is not supplied.
The input Generator* is also static casted to the backend generator type (CPU/CUDAGeneratorImpl etc.)
Definition at line 127 of file Utils.h.
References tools.autograd.gen_python_functions::gen().
TORCH_API std::string at::get_mkl_version | ( | ) |
Definition at line 21 of file Version.cpp.
References version.
Referenced by get_parallel_info(), and show_config().
TORCH_API std::string at::get_mkldnn_version | ( | ) |
Definition at line 37 of file Version.cpp.
Referenced by get_parallel_info(), and show_config().
TORCH_API int at::get_num_interop_threads | ( | ) |
Referenced by get_parallel_info(), main(), and THPModule_getNumInteropThreads().
TORCH_API int at::get_num_threads | ( | ) |
Referenced by at::native::_cat_out_cpu(), at::internal::calc_num_tasks_and_chunk_size(), at::native::can_use_native_serial_stack(), find_split_dim(), at::TensorIteratorBase::for_each(), at::TensorIteratorBase::foreach_reduced_elt(), get_parallel_info(), main(), at::native::masked_select_out_impl_cpu(), at::TensorIteratorBase::parallel_reduce(), THPModule_getNumThreads(), and two_pass_reduction().
TORCH_API std::string at::get_openmp_version | ( | ) |
Definition at line 55 of file Version.cpp.
Referenced by get_parallel_info(), and show_config().
TORCH_API MemOverlapStatus at::get_overlap_status | ( | const Tensor & | a, |
const Tensor & | b | ||
) |
Definition at line 39 of file MemoryOverlap.cpp.
References caffe2.contrib.aten.docs.sample::a, at::native::metal::mpscnn::b, and get_overlap_status().
Referenced by at::native::_cat_out_cpu(), assert_no_overlap(), assert_no_partial_overlap(), at::native::can_use_native_serial_stack(), and get_overlap_status().
TORCH_API MemOverlapStatus at::get_overlap_status | ( | TensorImpl * | a, |
TensorImpl * | b | ||
) |
Definition at line 43 of file MemoryOverlap.cpp.
References caffe2.contrib.aten.docs.sample::a, at::native::metal::mpscnn::b, FULL, NO, PARTIAL, and TOO_HARD.
std::string at::get_parallel_info | ( | ) |
Definition at line 45 of file ParallelCommon.cpp.
References get_mkl_version(), get_mkldnn_version(), get_num_interop_threads(), get_num_threads(), and get_openmp_version().
Referenced by torch::throughput_benchmark::detail::BenchmarkHelper< Input, Output, Model >::benchmark(), main(), and THPModule_parallelInfo().
TORCH_API QTensorImpl * at::get_qtensorimpl | ( | const Tensor & | self | ) |
Definition at line 72 of file Quantizer.cpp.
References at::native::is_quantized(), fastrnns.scratch::requires_grad, TORCH_CHECK, and TORCH_INTERNAL_ASSERT.
Referenced by at::native::alias_with_sizes_and_strides(), at::native::as_strided_qtensorimpl(), at::native::dequantize_quant(), at::native::equal_quantized_cpu(), new_qtensor(), at::native::q_per_channel_axis(), at::native::q_per_channel_scales(), at::native::q_per_channel_zero_points(), at::native::q_scale_quant(), at::native::q_zero_point_quant(), at::native::qscheme_quant(), at::Tensor::quantizer(), set_quantizer_(), at::native::squeeze_qtensor(), and at::native::unsqueeze_qtensor().
TORCH_API const RecordFunctionTLS & at::get_record_function_tls_ | ( | ) |
Definition at line 58 of file record_function.cpp.
Referenced by at::ThreadLocalState::ThreadLocalState().
int64_t at::get_sub_byte_tensor_size | ( | int64_t | size_bytes, |
at::ScalarType | t | ||
) |
Definition at line 80 of file Quantizer.cpp.
References std::ceil(), and caffe2::int64_t.
Referenced by new_qtensor().
TORCH_API int at::get_thread_num | ( | ) |
Definition at line 96 of file DLConvertor.cpp.
References c10::CPU, c10::CUDA, torch.distributed.pipeline.sync._balance::Device, DLContext::device_id, DLContext::device_type, c10::HIP, kDLCPU, kDLGPU, kDLOpenCL, kDLROCM, caffe2::OPENCL, and std::to_string().
Referenced by fromDLPack().
Definition at line 235 of file Context.cpp.
References c10::GetCPUAllocator().
Referenced by at::detail::empty_cpu(), new_qtensor(), and torch::distributed::rpc::wireDeserialize().
|
inlinestatic |
Definition at line 242 of file Context.h.
References at::DeprecatedTypePropertiesRegistry::getDeprecatedTypeProperties(), globalDeprecatedTypePropertiesRegistry(), and diagnose_protobuf::p.
Referenced by torch::autograd::VariableType::allTypesForBackends(), torch::jit::python::operator<<(), and torch::utils::options_from_string().
Definition at line 68 of file DLConvertor.cpp.
References c10::CPU, c10::CUDA, at::Tensor::device(), DLContext::device_id, DLContext::device_type, c10::HIP, kDLCPU, kDLGPU, kDLOpenCL, kDLROCM, caffe2::OPENCL, c10::Device::str(), tensor, and c10::Device::type().
Referenced by toDLPack().
TORCH_API DLDataType at::getDLDataType | ( | const Tensor & | t | ) |
Definition at line 10 of file DLConvertor.cpp.
References c10::BFloat16, c10::Bool, c10::ComplexDouble, c10::Double, caffe2::dtype, at::Tensor::element_size(), c10::Float, c10::Half, c10::Int, kDLFloat, kDLInt, kDLUInt, at::Tensor::scalar_type(), and c10::Undefined.
Referenced by toDLPack().
|
static |
Definition at line 350 of file BatchingRegistrations.cpp.
References dim, maybe_wrap_dim(), at::namedinference::num_batch_dims(), and c10::ArrayRef< T >::size().
Referenced by diagonal_backward_batching_rule(), select_backward_batching_rule(), and slice_backward_batching_rule().
|
static |
Definition at line 229 of file VmapTransforms.cpp.
References createVmapLevelsBitset(), at::Tensor::dim(), caffe2::int64_t, maybeGetBatchedImpl(), c10::ArrayRef< T >::size(), tensor, and TORCH_INTERNAL_ASSERT.
Referenced by at::BroadcastingVmapTransform::logicalToPhysical().
|
inlinestatic |
Definition at line 275 of file Context.h.
References at::detail::getCUDAHooks(), at::detail::getHIPHooks(), at::CUDAHooksInterface::getNumGPUs(), at::HIPHooksInterface::getNumGPUs(), hasCUDA(), and hasHIP().
|
static |
Definition at line 98 of file VmapTransforms.cpp.
References createVmapLevelsBitset(), maybeGetBatchedImpl(), and permuteBatchDimsToFront().
Referenced by alignBatchDimsAtFront().
Definition at line 31 of file Context.cpp.
Referenced by at::native::_choose_qparams_per_tensor(), at::native::_convolution(), at::native::legacy::cuda::_th_copy_ignoring_overlaps_(), at::native::legacy::cuda::_th_cross_kernel(), at::native::legacy::cuda::_th_cross_kernel_out(), at::native::legacy::cuda::_th_gels(), at::native::legacy::cuda::_th_gels_out(), at::native::legacy::cuda::_th_geqrf(), at::native::legacy::cuda::_th_geqrf_out(), at::native::legacy::cuda::_th_index_copy_(), at::native::legacy::cuda::_th_masked_fill_(), at::native::legacy::cuda::_th_masked_fill_bool_(), at::native::legacy::cuda::_th_mode(), at::native::legacy::cuda::_th_mode_out(), at::native::legacy::cuda::_th_put_(), at::native::legacy::cuda::_th_renorm(), at::native::legacy::cuda::_th_renorm_(), at::native::legacy::cuda::_th_renorm_out(), at::native::legacy::cuda::_th_sort(), at::native::legacy::cuda::_th_sort_out(), at::native::legacy::cuda::_th_topk(), at::native::legacy::cuda::_th_topk_out(), at::native::legacy::cuda::_thnn_conv2d_backward(), at::native::legacy::cuda::_thnn_conv2d_backward_out(), at::native::legacy::cuda::_thnn_conv2d_forward(), at::native::legacy::cuda::_thnn_conv2d_forward_out(), at::native::legacy::cuda::_thnn_conv_depthwise2d_backward(), at::native::legacy::cuda::_thnn_conv_depthwise2d_backward_out(), at::native::legacy::cuda::_thnn_conv_depthwise2d_forward(), at::native::legacy::cuda::_thnn_conv_depthwise2d_forward_out(), at::native::legacy::cuda::_thnn_glu_backward(), at::native::legacy::cuda::_thnn_glu_backward_out(), at::native::legacy::cuda::_thnn_glu_forward(), at::native::legacy::cuda::_thnn_glu_forward_out(), at::native::legacy::cuda::_thnn_log_sigmoid_backward(), at::native::legacy::cuda::_thnn_log_sigmoid_backward_out(), at::native::legacy::cuda::_thnn_log_sigmoid_forward(), at::native::legacy::cuda::_thnn_log_sigmoid_forward_out(), at::native::legacy::cuda::_thnn_multi_margin_loss_backward(), at::native::legacy::cuda::_thnn_multi_margin_loss_backward_out(), at::native::legacy::cuda::_thnn_multi_margin_loss_forward(), at::native::legacy::cuda::_thnn_multi_margin_loss_forward_out(), at::native::legacy::cuda::_thnn_multilabel_margin_loss_backward(), at::native::legacy::cuda::_thnn_multilabel_margin_loss_backward_out(), at::native::legacy::cuda::_thnn_multilabel_margin_loss_forward(), at::native::legacy::cuda::_thnn_multilabel_margin_loss_forward_out(), at::native::legacy::cuda::_thnn_nll_loss2d_backward(), at::native::legacy::cuda::_thnn_nll_loss2d_backward_out(), at::native::legacy::cuda::_thnn_nll_loss2d_forward(), at::native::legacy::cuda::_thnn_nll_loss2d_forward_out(), at::native::legacy::cuda::_thnn_nll_loss_backward(), at::native::legacy::cuda::_thnn_nll_loss_backward_out(), at::native::legacy::cuda::_thnn_nll_loss_forward(), at::native::legacy::cuda::_thnn_nll_loss_forward_out(), at::native::legacy::cuda::_thnn_rrelu_with_noise_forward(), at::native::legacy::cuda::_thnn_rrelu_with_noise_forward_(), at::native::legacy::cuda::_thnn_rrelu_with_noise_forward_out(), at::native::adaptive_avg_pool2d_quantized_cpu(), at::native::adaptive_avg_pool3d_out_quantized_cpu(), at::Context::alertNotDeterministic(), torch::autograd::VariableType::allCUDATypes(), at::native::avg_pool2d_quantized_cpu(), at::cuda::blas::bgemm< at::Half >(), at::cuda::blas::bgemm< c10::complex< double > >(), at::cuda::blas::bgemm< c10::complex< float > >(), at::cuda::blas::bgemm< double >(), at::cuda::blas::bgemm< float >(), torch::jit::tensorexpr::CudaCodeGen::CompileToNVRTC(), at::native::convolution(), at::native::cudnn_convolution_deprecated2(), at::native::cudnn_convolution_transpose_deprecated2(), at::native::cudnn_is_acceptable(), deserialize_conv(), torch::jit::fuser::cuda::FusedKernelCUDA::FusedKernelCUDA(), at::cuda::blas::gemm< at::Half >(), at::cuda::blas::gemm< c10::complex< double > >(), at::cuda::blas::gemm< c10::complex< float > >(), at::cuda::blas::gemm< double >(), at::cuda::blas::gemm< float >(), at::cuda::blas::gemv< c10::complex< double > >(), at::cuda::blas::gemv< c10::complex< float > >(), at::cuda::blas::gemv< double >(), at::cuda::blas::gemv< float >(), tools.codegen.dest.register_dispatch_key.RegisterDispatchKey::gen_unstructured(), at::cuda::getCurrentCUDABlasHandle(), at::cuda::getPinnedMemoryAllocator(), at::native::hardsigmoid_quantized_cpu(), hasCUDA(), hasHIP(), hasLAPACK(), hasMAGMA(), hasMKL(), hasMKLDNN(), hasOpenMP(), hasXLA(), at::native::index_copy_(), init(), isPinned(), manual_seed(), at::native::mean_out_quantized_cpu(), new_qtensor(), torch::jit::fuser::cuda::nvrtc(), torch::jit::tensorexpr::nvrtc(), torch::jit::fuser::cuda::executor_utils::nvrtcCompile(), pytorch_jni::PytorchJni::preModuleLoadSetupOnce(), at::native::quantized_hardswish(), at::native::quantized_max_pool2d(), register_linear_params(), torch::jit::RegisterCudaFuseGraph::registerPass(), at::native::relu_quantized_cpu(), torch::jit::fuser::cuda::FusionExecutor::runFusion(), at::native::sigmoid_quantized_cpu(), at::native::tanh_quantized_cpu(), THCPModule_initExtension(), THPModule_allowTF32CuBLAS(), THPModule_allowTF32CuDNN(), THPModule_are_vmap_fallback_warnings_enabled(), THPModule_benchmarkCuDNN(), THPModule_deterministicAlgorithms(), THPModule_deterministicCuDNN(), THPModule_isEnabledXNNPACK(), THPModule_qEngine(), THPModule_set_display_vmap_fallback_warnings_mode(), THPModule_setAllowTF32CuBLAS(), THPModule_setAllowTF32CuDNN(), THPModule_setBenchmarkCuDNN(), THPModule_setDeterministicAlgorithms(), THPModule_setDeterministicCuDNN(), THPModule_setFlushDenormal(), THPModule_setQEngine(), THPModule_setUserEnabledCuDNN(), THPModule_setUserEnabledMkldnn(), THPModule_supportedQEngines(), THPModule_userEnabledCuDNN(), THPModule_userEnabledMkldnn(), torch::jit::to_dispatch(), at::native::ConvParams::use_mkldnn(), and warnFallback().
TORCH_API DeprecatedTypePropertiesRegistry & at::globalDeprecatedTypePropertiesRegistry | ( | ) |
Definition at line 28 of file DeprecatedTypePropertiesRegistry.cpp.
Referenced by at::Tensor::C10_DEPRECATED_MESSAGE(), CPU(), CUDA(), getDeprecatedTypeProperties(), HIP(), at::DeprecatedTypeProperties::toBackend(), and at::DeprecatedTypeProperties::toScalarType().
TORCH_API MemOverlap at::has_internal_overlap | ( | const Tensor & | tensor | ) |
Definition at line 6 of file MemoryOverlap.cpp.
References has_internal_overlap(), tensor, and at::Tensor::unsafeGetTensorImpl().
Referenced by at::native::_debug_has_internal_overlap(), at::native::_index_put_impl_(), assert_no_internal_overlap(), has_internal_overlap(), at::native::index_fill_(), at::native::masked_fill__cuda(), and at::native::masked_fill_impl_cpu().
TORCH_API MemOverlap at::has_internal_overlap | ( | TensorImpl * | t | ) |
Definition at line 10 of file MemoryOverlap.cpp.
References AT_ASSERT, at::Tensor::is_contiguous(), c10::kStrided, at::Tensor::layout(), NO, sizes, at::Tensor::sizes(), strides, at::Tensor::strides(), TOO_HARD, and YES.
|
inline |
Definition at line 13 of file NamedTensorUtils.h.
References caffe2::tensors.
Referenced by at::namedinference::compute_broadcast_outnames(), at::namedinference::compute_cat_outnames(), at::namedinference::compute_cdist_outnames(), at::namedinference::compute_matmul_outnames(), at::namedinference::propagate_names_for_expand(), at::native::resize_(), at::native::resize_named_tensor_(), at::native::select(), and at::native::unflatten().
TORCH_API bool at::hasCallbacks | ( | ) |
Definition at line 354 of file record_function.cpp.
References matmul_dlmc_bench::m.
Referenced by torch::jit::InterpreterStateImpl::checkAndStartRecordFunction().
|
inlinestatic |
Definition at line 262 of file Context.h.
References globalContext(), and at::Context::hasCUDA().
Referenced by getNumGPUs(), manual_seed(), torch::jit::RegisterCudaFuseGraph::registerPass(), and show_config().
TORCH_API bool at::hasGlobalCallbacks | ( | ) |
hasGlobalCallbacks returns whether there're global callbacks registered with pushGlobalCallback
Definition at line 359 of file record_function.cpp.
Referenced by torch::jit::mobile::InterpreterState::run(), and shouldRunRecordFunction().
|
inlinestatic |
Definition at line 266 of file Context.h.
References globalContext(), and at::Context::hasHIP().
Referenced by getNumGPUs(), and torch::jit::RegisterCudaFuseGraph::registerPass().
|
inlinestatic |
Definition at line 302 of file Context.h.
References globalContext(), and at::Context::hasLAPACK().
Referenced by initModule().
|
inlinestatic |
Definition at line 306 of file Context.h.
References globalContext(), and at::Context::hasMAGMA().
Referenced by THCPModule_initExtension().
|
inlinestatic |
Definition at line 298 of file Context.h.
References globalContext(), and at::Context::hasMKL().
Referenced by at::native::bmm_out_or_baddbmm_(), and initModule().
|
inlinestatic |
Definition at line 310 of file Context.h.
References globalContext(), and at::Context::hasMKLDNN().
Referenced by initModule().
|
inlinestatic |
Definition at line 294 of file Context.h.
References globalContext(), and at::Context::hasOpenMP().
Referenced by initModule().
TORCH_API bool at::hasThreadLocalCallbacks | ( | ) |
hasThreadLocalCallbacks returns whether there're callbacks registered with addThreadLocalCallback
Definition at line 363 of file record_function.cpp.
|
inlinestatic |
Definition at line 270 of file Context.h.
References globalContext(), and at::Context::hasXLA().
|
inlinestatic |
Definition at line 257 of file Context.h.
References at::DeprecatedTypePropertiesRegistry::getDeprecatedTypeProperties(), globalDeprecatedTypePropertiesRegistry(), and c10::HIP.
TORCH_API bool at::in_parallel_region | ( | ) |
TORCH_API std::vector< int64_t > at::infer_dense_strides | ( | IntArrayRef | tensor_sizes, |
IntArrayRef | tensor_strides | ||
) |
Definition at line 103 of file ExpandUtils.cpp.
References matmul_dlmc_bench::comparison, c10::attr::idx(), caffe2::int64_t, c10::attr::perm(), c10::ArrayRef< T >::size(), std::swap(), and TORCH_CHECK.
Referenced by denseAndNonOverlapping(), and at::native::empty_like().
TORCH_API std::vector< int64_t > at::infer_size | ( | IntArrayRef | a, |
IntArrayRef | b | ||
) |
Definition at line 6 of file ExpandUtils.cpp.
References caffe2.contrib.aten.docs.sample::a, at::native::metal::mpscnn::b, caffe2::int64_t, caffe2.perfkernels.hp_emblookup_codegen::offset, and TORCH_CHECK.
Referenced by at::native::_linalg_broadcast_batch_dims(), at::native::cdist_impl(), at::TensorIteratorBase::compute_shape(), expand_outplace(), torch::jit::fuser::getMapSize(), at::native::masked_select_backward(), at::native::matmul(), at::native::reshape(), at::native::vulkan::detail::reshape_copy(), at::native::reshape_out(), THPModule_inferSize(), at::native::view(), and at::native::vulkan::aten::view().
|
inline |
Definition at line 12 of file InferSize.h.
References AT_ERROR, dim, caffe2::int64_t, numel(), caffe2::python::res, shape, c10::SmallVectorTemplateCommon< T, typename >::size(), and TORCH_CHECK.
TORCH_API std::tuple< std::vector< int64_t >, std::vector< int64_t > > at::inferExpandGeometry | ( | IntArrayRef | tensor_sizes, |
IntArrayRef | tensor_strides, | ||
IntArrayRef | sizes | ||
) |
Definition at line 33 of file ExpandUtils.cpp.
References dim, caffe2::int64_t, caffe2.perfkernels.hp_emblookup_codegen::offset, c10::ArrayRef< T >::size(), size(), sizes, stride(), and TORCH_CHECK.
Referenced by at::native::expand().
|
inlinestatic |
Definition at line 236 of file Context.h.
References globalContext().
Referenced by initModule(), at::internal::lazy_init_num_threads(), and at::native::Descriptor< T, ctor, dtor >::mut_desc().
TORCH_API void at::init_num_threads | ( | ) |
|
inline |
Definition at line 10 of file InitialTensorOptions.h.
References c10::TensorOptions::dtype(), c10::kCPU, c10::kFloat, c10::kStrided, c10::TensorOptions::layout(), and c10::TensorOptions::requires_grad().
Referenced by at::native::hspmm_out_sparse_cpu(), and torch::jit::InferShapeTypeForUninitializedOutput().
Definition at line 132 of file BatchedTensorImpl.cpp.
References createVmapLevelsBitset(), maybeGetBatchedImpl(), and at::meta::other.
Referenced by torch::autograd::AccumulateGrad::accumulateGrad(), torch::autograd::generated::details::binary_cross_entropy_double_backward(), torch::autograd::generated::details::binary_cross_entropy_double_backward_grad_output(), and torch::autograd::generated::details::symeig_backward().
TORCH_API Tensor & at::internal_set_names_inplace | ( | Tensor & | tensor, |
optional< DimnameList > | names | ||
) |
Definition at line 19 of file NamedTensor.cpp.
References at::impl::internal_set_names_inplace(), microbenchmarks::names, tensor, and at::Tensor::unsafeGetTensorImpl().
Referenced by at::native::align(), at::native::align_to(), at::native::empty(), at::native::flatten(), at::native::refine_names(), at::native::rename(), at::native::rename_(), THPVariable_set_names(), and at::native::unflatten().
TORCH_API Tensor & at::internal_set_names_inplace | ( | Tensor & | tensor, |
std::vector< Dimname > && | names, | ||
bool | validate_names | ||
) |
Definition at line 24 of file NamedTensor.cpp.
References at::impl::internal_set_names_inplace(), microbenchmarks::names, tensor, and at::Tensor::unsafeGetTensorImpl().
int at::intraop_default_num_threads | ( | ) |
Definition at line 94 of file ParallelCommon.cpp.
References c10::TaskThreadPoolBase::defaultNumThreads(), and TORCH_CHECK.
void at::intraop_invoke | ( | const F0 & | f0, |
const F1 & | f1 | ||
) |
Definition at line 103 of file ParallelNativeTBB.h.
TORCH_API void at::intraop_launch | ( | std::function< void()> | func | ) |
TORCH_API std::shared_ptr<c10::ivalue::Future> at::intraop_launch_future | ( | std::function< void()> | func | ) |
|
static |
Definition at line 53 of file BatchingRegistrations.cpp.
References dim.
Referenced by sum_batching_rule(), and transpose_int_batching_rule().
bool at::is_custom_op | ( | const c10::OperatorName & | opName | ) |
Definition at line 28 of file ATenOpList.cpp.
References $, torch::jit::aten_ops, c10::OperatorName::name, ops, and c10::OperatorName::overload_name.
|
inlinestatic |
Definition at line 181 of file ExpandUtils.h.
References caffe2::int64_t, shape, c10::ArrayRef< T >::size(), c10::SmallVectorTemplateCommon< T, typename >::size(), size(), and caffe2::target().
Referenced by at::native::make_index_put_iterator(), at::native::sum_to_size(), and torch::autograd::validate_outputs().
|
inline |
Definition at line 99 of file BatchedTensorImpl.h.
References c10::Batched, c10::DispatchKeySet::has(), c10::TensorImpl::key_set(), tensor, and at::Tensor::unsafeGetTensorImpl().
Referenced by dot_batching_rule(), fill_inplace_tensor_batching_rule(), makeBatched(), maybeGetBatchedImpl(), mm_batching_rule(), mv_batching_rule(), THPEngine_run_backward(), and at::VmapPhysicalView::VmapPhysicalView().
|
static |
Definition at line 46 of file BatchedFallback.cpp.
References caffe2::it.
Referenced by batchedTensorForLoopFallback().
bool at::isPhysicalScalarTensor | ( | const Tensor & | logical_tensor | ) |
Definition at line 73 of file BatchingRegistrations.cpp.
References at::Tensor::dim(), and maybeGetBatchedImpl().
Referenced by binary_pointwise_batching_rule().
TORCH_API bool at::isRecordFunctionEnabled | ( | ) |
isRecordFunctionEnabled returns whether RecordFunction is enabled thread locally
Definition at line 395 of file record_function.cpp.
Referenced by torch::jit::mobile::InterpreterState::run().
|
inline |
Definition at line 241 of file CPUApplyUtils.h.
Referenced by apply_op(), and iterate().
|
inline |
Definition at line 244 of file CPUApplyUtils.h.
|
inline |
Definition at line 250 of file CPUApplyUtils.h.
Referenced by apply_op(), and iterate_continue().
|
inline |
Definition at line 255 of file CPUApplyUtils.h.
References iter, and iterate_continue().
|
inline |
Definition at line 271 of file CPUApplyUtils.h.
Referenced by apply_op(), and iterate_overflow().
|
inline |
Definition at line 274 of file CPUApplyUtils.h.
References caffe2::int64_t, iter, and iterate_overflow().
TORCH_API void at::launch | ( | std::function< void()> | func | ) |
|
inlinestatic |
Definition at line 63 of file WrapDimUtils.h.
References dim, maybe_wrap_dim(), and sizes.
Referenced by at::native::cat(), at::native::cat_out(), and torch::autograd::generated::details::cat_tensors_backward().
|
inlinestatic |
Definition at line 73 of file WrapDimUtils.h.
References dim, at::Tensor::dim(), maybe_wrap_dim(), at::Tensor::sizes(), tensor, and caffe2::tensors.
|
inlinestatic |
Definition at line 805 of file TensorBody.h.
References at::Tensor::key_set(), and legacyExtractDispatchKey().
Referenced by torch::utils::_sparse_coo_tensor_unsafe_ctor(), torch::utils::_validate_sparse_coo_tensor_args(), torch::autograd::applySlicing(), at::Tensor::C10_DEPRECATED_MESSAGE(), legacyExtractDispatchKey(), torch::utils::sparse_coo_tensor_ctor(), torch::tensors::Tensor_instancecheck(), torch::autograd::THPVariable_new(), torch::autograd::THPVariable_new_ones(), and torch::autograd::THPVariable_new_tensor().
|
inline |
Definition at line 76 of file NumericUtils.h.
References log(), value, and bench_ops::x.
Referenced by at::native::_kl_div_non_log_target(), at::transformation::exponential(), at::transformation::geometric(), log(), log< double >(), log_nnc_fast(), log_nnc_sleef(), at::native::poisson_nll_loss(), and TORCH_LIBRARY_IMPL().
|
inline |
Definition at line 87 of file NumericUtils.h.
References log(), and bench_ops::x.
TORCH_API QuantizerPtr at::make_per_channel_affine_quantizer | ( | const Tensor & | scales, |
const Tensor & | zero_points, | ||
int64_t | axis, | ||
ScalarType | scalar_type | ||
) |
Definition at line 44 of file Quantizer.cpp.
References caffe2::axis, at::Tensor::contiguous(), c10::isFloatingType(), c10::kDouble, c10::kFloat, c10::kLong, at::Tensor::scalar_type(), detail::scalar_type(), and TORCH_CHECK.
Referenced by at::native::empty_per_channel_affine_quantized(), at::native::quantize_per_channel_cpu(), at::native::squeeze_qtensor(), and at::native::unsqueeze_qtensor().
TORCH_API QuantizerPtr at::make_per_tensor_affine_quantizer | ( | double | scale, |
int64_t | zero_point, | ||
ScalarType | scalar_type | ||
) |
Definition at line 36 of file Quantizer.cpp.
References detail::scalar_type(), scale, and zero_point.
Referenced by at::native::empty_affine_quantized(), and at::native::quantize_per_tensor().
Definition at line 105 of file BatchedTensorImpl.cpp.
References c10::SmallVectorTemplateCommon< T, typename >::begin(), at::Tensor::dim(), c10::SmallVectorTemplateCommon< T, typename >::end(), isBatchedTensor(), kVmapMaxTensorDims, kVmapNumLevels, tensor, TORCH_CHECK, and TORCH_INTERNAL_ASSERT.
Referenced by addBatchDim(), at::VmapPhysicalToLogicalMap::apply(), clone_batching_rule(), pow_scalar_Tensor_batching_rule(), at::native::remove_existing_batch_dim(), to_dtype_layout_batching_rule(), unwrap_and_call(), and unwrap_and_call_method().
|
inlinestatic |
Definition at line 314 of file Context.h.
References c10::CPU, at::Context::defaultGenerator(), torch.distributed.pipeline.sync._balance::Device, tools.autograd.gen_python_functions::gen(), at::detail::getCUDAHooks(), at::CUDAHooksInterface::getNumGPUs(), globalContext(), hasCUDA(), c10::kCUDA, lock, and torch.cuda.random::seed().
Referenced by torch::jit::Node::hasSideEffects(), and torch.utils.data.dataset::random_split().
|
inline |
Definition at line 302 of file CPUApplyUtils.h.
Referenced by apply_op(), and max_dim().
|
inline |
Definition at line 307 of file CPUApplyUtils.h.
References iter, std::max(), and max_dim().
|
inline |
|
inline |
Definition at line 265 of file CPUApplyUtils.h.
References iter, max_iterate_size(), and std::min().
|
static |
Definition at line 493 of file BatchingRegistrations.cpp.
References c10::nullopt, sizes, at::native::storage_size_for(), and strides.
Referenced by checkBasicAsStridedValidForSlice().
Definition at line 258 of file TensorUtils.cpp.
References at::Tensor::data_ptr(), at::Tensor::defined(), and tensor.
Definition at line 262 of file TensorUtils.cpp.
References at::Tensor::data_ptr(), at::Tensor::defined(), and tensor.
|
inlinestatic |
Definition at line 25 of file WrapDimUtils.h.
References dim, maybe_wrap_dim(), and size().
|
inlinestatic |
Definition at line 9 of file WrapDimUtils.h.
References dim, and c10::maybe_wrap_dim().
Referenced by at::native::_allocate_or_resize_output_with_indices(), at::native::_aminmax_out_impl(), at::native::_reduction_with_indices_allocate_or_resize_output(), torch::autograd::generated::details::_safe_size(), at::native::_stack(), at::native::_stack_cpu(), at::BatchedTensorImpl::actualDim(), at::native::all_out(), at::native::any_out(), at::native::argmax_out(), at::native::argmin_out(), at::native::cat_sparse(), at::native::count_nonzero(), at::native::cross_out(), at::native::cummax_out(), at::native::cummin_out(), at::native::cumprod_backward(), at::native::default_alldims(), at::native::diag_embed(), at::native::diagonal(), at::native::diff_check_compatible_shape(), dim_list_to_bitset(), torch::autograd::generated::details::fft_r2c_backward(), at::native::flatten(), torch::cuda::gather(), torch::cuda::gather_out(), getGradInputPhysicalDim(), torch::jit::fuser::getMapSize(), torch::jit::getNativeOperation(), at::VmapPhysicalView::getPhysicalDim(), at::VmapPhysicalView::getPhysicalDims(), torch::jit::tensorexpr::TensorExprKernel::getReductionInfo(), at::native::glu_backward_out(), at::native::glu_out(), at::native::index_add_cpu_(), at::native::index_copy_(), at::native::index_fill_(), at::native::index_select_out_cpu_(), at::native::index_select_sparse(), legacy_cat_wrap_dim(), at::native::log_softmax_backward_cpu(), at::native::log_softmax_cpu(), at::native::max_out_impl(), maybe_wrap_dim(), at::native::min_out_impl(), at::native::mode_out(), at::native::movedim(), at::native::narrow(), at::native::narrow_copy_dense_cpu_out(), at::native::permute(), torch::autograd::generated::details::permute_backwards(), torch::autograd::generated::details::prod_backward(), at::namedinference::propagate_names_except(), at::native::quantized_topk_out_cpu(), torch::cuda::scatter(), torch::cuda::scatter_out(), at::native::select(), c10::TensorImpl::size(), at::TensorGeometry::size(), at::native::slice(), at::native::softmax_backward_cpu(), at::native::softmax_backward_sparse_input_preprocessing(), at::native::softmax_cpu(), torch::autograd::generated::details::split_backward(), torch::autograd::generated::details::split_with_sizes_backward(), at::native::squeeze(), at::native::squeeze_(), at::native::stack(), stack_batching_rule(), at::native::stack_out(), c10::TensorImpl::stride(), at::MetalTensorImpl< OpaqueHandle >::stride(), at::VulkanOpaqueTensorImpl< OpaqueHandle >::stride(), at::TensorGeometry::stride(), at::native::tensor_split(), at::namedinference::TensorNames::TensorNames(), at::native::topk_out_cpu(), at::native::transpose(), at::native::transpose_(), at::native::trapz(), at::native::unbind(), at::native::unflatten(), at::native::unfold(), at::native::unsqueeze(), at::native::unsqueeze_(), unsqueeze_batching_rule(), at::native::unsqueeze_qtensor(), torch::autograd::generated::details::unsqueeze_to(), and wrap_all_dims().
|
inlinestatic |
Definition at line 13 of file WrapDimUtils.h.
References dim, at::Tensor::dim(), maybe_wrap_dim(), and tensor.
|
inlinestatic |
Definition at line 17 of file WrapDimUtils.h.
References dim, maybe_wrap_dim(), and caffe2::tensors.
|
inline |
Definition at line 54 of file WrapDimUtils.h.
References caffe2::dims, and maybe_wrap_dims_n().
Referenced by at::native::_fft_fill_with_conjugate_symmetry_(), at::native::_linalg_norm_matrix_out(), at::native::_linalg_norm_vector_out(), at::native::_sparse_sum(), at::native::_sparse_sum_backward_cpu(), at::native::frobenius_norm_out(), at::native::norm_sparse(), and at::native::nuclear_norm_out().
|
inlinestatic |
Definition at line 34 of file WrapDimUtils.h.
References dim, caffe2::dims, caffe2::int64_t, max, min, and TORCH_CHECK_INDEX.
Referenced by maybe_wrap_dims().
|
inline |
Definition at line 109 of file BatchedTensorImpl.h.
References isBatchedTensor(), tensor, and unsafeGetBatchedImpl().
Referenced by at::native::_remove_batch_dim(), addBatchDim(), batchedTensorForLoopFallback(), batchedTensorInplaceForLoopFallback(), getLevelsAndLargestLogicalDim(), getPhysicalTensorAndLevels(), at::native::has_level(), inplaceIsVmapCompatible(), isPhysicalScalarTensor(), and at::MultiBatchVmapTransform::logicalToPhysical().
Definition at line 844 of file BatchingRegistrations.cpp.
References dim, at::Tensor::dim(), isBatchedTensor(), at::MultiBatchVmapTransform::logicalToPhysical(), torch.backends.cuda::matmul, at::meta::other, at::Tensor::sizes(), tensor, TORCH_CHECK, and TORCH_INTERNAL_ASSERT.
Referenced by TORCH_LIBRARY_IMPL().
Tensor at::movedim_batching_rule | ( | const Tensor & | self, |
IntArrayRef | source, | ||
IntArrayRef | destination | ||
) |
Definition at line 399 of file BatchingRegistrations.cpp.
References at::MultiBatchVmapTransform::logicalToPhysical(), and c10::aten::movedim().
Referenced by TORCH_LIBRARY_IMPL().
Definition at line 760 of file BatchingRegistrations.cpp.
References dim, at::Tensor::dim(), isBatchedTensor(), at::MultiBatchVmapTransform::logicalToPhysical(), torch.backends.cuda::matmul, at::meta::other, at::Tensor::sizes(), tensor, TORCH_CHECK, TORCH_INTERNAL_ASSERT, and at::native::unsqueeze().
Referenced by TORCH_LIBRARY_IMPL().
Tensor at::new_empty_batching_rule | ( | const Tensor & | self, |
IntArrayRef | size, | ||
c10::optional< ScalarType > | dtype, | ||
c10::optional< Layout > | layout, | ||
c10::optional< Device > | device, | ||
c10::optional< bool > | pin_memory | ||
) |
Definition at line 937 of file BatchingRegistrations.cpp.
References device, caffe2::dtype, caffe2::layout, at::MultiBatchVmapTransform::logicalToPhysical(), at::native::pin_memory(), and size().
Referenced by TORCH_LIBRARY_IMPL().
Tensor at::new_empty_strided_batching_rule | ( | const Tensor & | self, |
IntArrayRef | size, | ||
IntArrayRef | stride, | ||
optional< ScalarType > | dtype, | ||
optional< Layout > | layout, | ||
optional< Device > | device, | ||
optional< bool > | pin_memory | ||
) |
Definition at line 950 of file BatchingRegistrations.cpp.
References at::detail::defaultStrides(), device, caffe2::dtype, caffe2::layout, at::MultiBatchVmapTransform::logicalToPhysical(), at::native::pin_memory(), size(), at::native::storage_size_for(), stride(), and TORCH_CHECK.
Referenced by TORCH_LIBRARY_IMPL().
|
inline |
Definition at line 92 of file Quantizer.cpp.
References allocator, check_size_nonnegative(), c10::CUDA, caffe2::dtype, c10::TensorImpl::empty_tensor_restride(), get_qtensorimpl(), get_sub_byte_tensor_size(), getCPUAllocator(), at::CUDAHooksInterface::getCUDADeviceAllocator(), at::detail::getCUDAHooks(), c10::GetDefaultMobileCPUAllocator(), globalContext(), caffe2::int64_t, c10::isQIntType(), c10::memory_format(), caffe2.perfkernels.hp_emblookup_codegen::options, prod_intlist(), detail::scalar_type(), c10::TensorImpl::set_sizes_contiguous(), sizes, caffe2::storage, tensor, TORCH_CHECK, and c10::typeMetaToScalarType().
Referenced by at::native::empty_affine_quantized(), at::native::empty_per_channel_affine_quantized(), at::PerTensorAffineQuantizer::quantize(), at::PerChannelAffineQuantizer::quantize(), and at::PerChannelAffineFloatQParamsQuantizer::quantize().
Tensor at::new_zeros_batching_rule | ( | const Tensor & | self, |
IntArrayRef | size, | ||
optional< ScalarType > | dtype, | ||
optional< Layout > | layout, | ||
optional< Device > | device, | ||
optional< bool > | pin_memory | ||
) |
Definition at line 919 of file BatchingRegistrations.cpp.
References c10::TensorOptions::device(), device, c10::TensorOptions::dtype(), caffe2::dtype, c10::TensorOptions::layout(), caffe2::layout, at::MultiBatchVmapTransform::logicalToPhysical(), caffe2.perfkernels.hp_emblookup_codegen::options, at::native::pin_memory(), c10::TensorOptions::pinned_memory(), and size().
Referenced by TORCH_LIBRARY_IMPL().
|
inline |
Definition at line 133 of file Functions.h.
References at::Tensor::numel(), and tensor.
Referenced by _all_equal_numel(), apply_op(), at::native::calculate_quant_loss(), checkNumel(), at::native::choose_qparams_optimized(), at::detail::computeStride(), at::native::dot(), at::native::dot_check(), at::native::equal_quantized_cpu(), infer_size(), and at::native::vdot().
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
|
inline |
Definition at line 134 of file BatchedTensorImpl.h.
References at::BatchDim::dim(), at::BatchDim::level(), and out.
TORCH_API std::ostream & at::operator<< | ( | std::ostream & | out, |
const DeprecatedTypeProperties & | t | ||
) |
Definition at line 36 of file Formatting.cpp.
References out, and at::Tensor::toString().
Definition at line 9 of file Dimname.cpp.
References c10::namespaces::dimname(), out, c10::Symbol::toUnqualString(), and WILDCARD.
std::ostream & at::operator<< | ( | std::ostream & | out, |
const Range & | range | ||
) |
Definition at line 7 of file Range.cpp.
References out, and c10::prim::range().
|
inlinestatic |
Definition at line 18 of file Formatting.h.
|
inlinestatic |
Definition at line 25 of file Formatting.h.
References out.
TORCH_API std::ostream & at::operator<< | ( | std::ostream & | out, |
TensorGeometryArg | t | ||
) |
Definition at line 11 of file TensorUtils.cpp.
References at::Tensor::name(), and out.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 40 of file Dimname.h.
References at::Dimname::symbol().
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
Definition at line 110 of file TensorOperators.h.
|
static |
Definition at line 98 of file TensorIteratorReduce.cpp.
References AT_ASSERT, dim, matmul_dlmc_bench::end, find_split_dim(), caffe2::int64_t, iter, caffe2.python.helpers.control_ops::loop(), parallel_for(), and round_columns().
Referenced by at::TensorIteratorBase::parallel_reduce().
|
inline |
Definition at line 34 of file ParallelNative.h.
References at::internal::_parallel_run(), matmul_dlmc_bench::end, in_parallel_region(), caffe2::int64_t, torch.cuda.profiler::start(), and TORCH_CHECK.
Referenced by at::native::_embedding_bag_dense_backward_cpu_sum_mean(), at::native::_grid_sampler_2d_cpu_fallback(), at::native::_grid_sampler_2d_cpu_fallback_backward(), _launch_tasks_tree(), at::native::_sparse_sum_backward_cpu(), at::native::add_dense_sparse_worker_cpu(), at::native::apply_triu_tril(), at::native::apply_triu_tril_single(), at::native::baddbmm_cpu_kernel(), at::native::batch_norm_backward_cpu_template(), at::native::batch_norm_cpu_transform_input_template(), at::native::batch_norm_cpu_update_stats_template(), compute_cpu(), at::sparse::coo_to_csr(), at::native::copy_impl(), at::native::dim_apply(), caffe2::RMSNormOp< Context >::DoRunWithType(), at::native::flip_cpu_kernel(), at::TensorIteratorBase::for_each(), at::TensorIteratorBase::foreach_reduced_elt(), at::native::index_select_out_cpu_(), torch::nn::parallel::parallel_apply(), parallel_dim_reduction(), at::native::prelu_cpu_backward_kernel_multi_weights(), at::native::prelu_cpu_kernel_multi_weights(), at::native::prelu_cpu_kernel_share_weights(), PackedEmbeddingBagWeight::prepack(), at::native::range_cpu_out(), caffe2::RMSNormGradientOp< Context >::RMSNormBackward(), at::native::slow_conv2d_forward_out_cpu(), at::native::slow_conv3d_forward_out_cpu(), at::native::sparse_mask_out_cpu_kernel(), two_pass_reduction(), and PackedEmbeddingBagWeight::unpack().
|
inline |
Definition at line 58 of file ParallelNative.h.
References at::internal::_parallel_run(), at::internal::calc_num_tasks_and_chunk_size(), matmul_dlmc_bench::end, in_parallel_region(), caffe2::int64_t, compare-fastrnn-results::results, scalar_t, torch.cuda.profiler::start(), and TORCH_CHECK.
Referenced by at::native::prelu_cpu_backward_kernel_share_weights().
Tensor at::permute_batching_rule | ( | const Tensor & | self, |
IntArrayRef | dims | ||
) |
Definition at line 326 of file BatchingRegistrations.cpp.
References caffe2::dims, c10::SmallVectorTemplateCommon< T, typename >::end(), c10::SmallVectorImpl< T >::insert(), caffe2::int64_t, at::MultiBatchVmapTransform::logicalToPhysical(), c10::SmallVectorTemplateBase< T, isPodLike >::push_back(), and c10::SmallVectorImpl< T >::reserve().
Referenced by TORCH_LIBRARY_IMPL().
|
static |
Definition at line 18 of file VmapTransforms.cpp.
References areBdimsAtFrontInOrder(), at::BatchedTensorImpl::bdims(), createBatchDimBitset(), c10::attr::idx(), caffe2::int64_t, ptr, c10::SmallVectorImpl< T >::reserve(), sizes, at::Tensor::sizes(), and at::BatchedTensorImpl::value().
Referenced by getPhysicalTensorAndLevels(), and at::MultiBatchVmapTransform::logicalToPhysical().
Definition at line 711 of file BatchingRegistrations.cpp.
References makeBatched(), at::meta::other, at::native::pow(), and unsafeGetBatchedImpl().
Referenced by TORCH_LIBRARY_IMPL().
|
inlinestatic |
Definition at line 21 of file Formatting.h.
References print().
TORCH_API std::ostream & at::print | ( | std::ostream & | stream, |
const Tensor & | tensor_, | ||
int64_t | linesize | ||
) |
Definition at line 230 of file Formatting.cpp.
References __printFormat(), __printMatrix(), __printTensor(), at::Tensor::contiguous(), at::Tensor::data_ptr(), defaultfloat(), at::Tensor::defined(), at::Tensor::fw_grad(), caffe2::int64_t, at::Tensor::is_quantized(), at::Tensor::is_sparse(), c10::kCPU, c10::kDouble, c10::kPerChannelAffine, c10::kPerChannelAffineFloatQParams, c10::kPerTensorAffine, at::Tensor::ndimension(), at::Tensor::numel(), print(), printScale(), scale, at::Tensor::size(), at::Tensor::sizes(), stream, sz, tensor, at::Tensor::toString(), and c10::toString().
Referenced by torch.utils.benchmark.utils.valgrind_wrapper.timer_interface._ValgrindWrapper::__init__(), torchaudio_models.ScaledDotProduct::__init__(), torchaudio_models.MultiheadAttentionContainer::__init__(), torch.futures.Future::_add_done_callback(), update-caffe2-models.SomeClass::_download(), torch.nn.parallel.distributed::_dump_DDP_relevant_env_vars(), torch.utils.cpp_extension::_find_cuda_home(), torch.utils.cpp_extension::_find_rocm_home(), torch.utils.cpp_extension::_get_build_directory(), torch.utils.cpp_extension::_get_num_workers(), caffe2.python.onnx.backend.Caffe2Backend::_graph_to_net(), torch.utils.cpp_extension::_jit_compile(), torch.functional::_lu_impl(), operator_benchmark.benchmark_core.BenchmarkRunner::_measure_time(), torch.onnx.utils::_model_to_graph(), tools.nightly::_move_single(), tools.nightly::_nightly_version(), caffe2.python.onnx.backend.Caffe2Backend::_onnx_model_to_caffe2_net(), torch::nn::functional::_pad(), torch::nn::functional::_pad_circular(), torch.utils.cpp_extension::_prepare_ldflags(), update-caffe2-models.SomeClass::_prepare_model_data(), caffe2.python.rnn.rnn_cell_test_util::_prepare_rnn(), benchmark_ddp_rpc::_print_cont(), operator_benchmark.benchmark_core.BenchmarkRunner::_print_header(), operator_benchmark.benchmark_pytorch.PyTorchOperatorTestCase::_print_per_iter(), operator_benchmark.benchmark_core.BenchmarkRunner::_print_perf_result(), operator_benchmark.benchmark_core.BenchmarkRunner::_print_test_case_info(), torch.optim.lr_scheduler.ReduceLROnPlateau::_reduce_lr(), torch.distributed.rpc.internal::_run_function(), torch.jit._trace::_time(), caffe2.python.parallel_workers.WorkerCoordinator::_wait_finish(), torch.utils.cpp_extension::_write_ninja_file_and_build_library(), torch.utils.cpp_extension::_write_ninja_file_and_compile_objects(), torch.utils.tensorboard.writer.SummaryWriter::add_embedding(), torch.nn.modules.module.Module::apply(), torch.distributed.rpc.functions::async_execution(), torch.utils.tensorboard.summary::audio(), caffe2.experiments.python.convnet_benchmarks::Benchmark(), caffe2.python.convnet_benchmarks::Benchmark(), caffe2.contrib.nccl.nccl_ops_test::benchmark(), caffe2.contrib.nnpack.nnpack_ops_test::benchmark(), concat_benchmark::benchmark_concat(), utils::benchmark_module(), framework_overhead_benchmark::benchmark_simple_fn(), sparse_lengths_sum_nbit_benchmark::benchmark_sparse_lengths_sum(), sparse_normalize_benchmark::benchmark_sparse_normalize(), utils::benchmark_using_throughput_benchmark(), torch.nn.modules.module.Module::buffers(), caffe2.python.onnx.helper::c2_native_run_net(), update-models-from-caffe2::caffe2_to_onnx(), caffe2.python.device_checker.DeviceChecker::CheckNet(), caffe2.python.gradient_checker.GradientChecker::CheckSimple(), caffe2.python.device_checker.DeviceChecker::CheckSimple(), update-caffe2-models::cleanup(), torch.package.exporter.PackageExporter::close(), torch.futures::collect_all(), caffe2.python.rnn.lstm_comparison::Compare(), torch.quantization._numeric_suite::compare_model_outputs(), torch.quantization._numeric_suite_fx::compare_model_outputs_fx(), torch.quantization._numeric_suite::compare_model_stub(), torch.quantization._numeric_suite_fx::compare_model_stub_fx(), caffe2.python.lengths_reducer_fused_8bit_rowwise_ops_test::compare_rowwise(), torch.quantization._numeric_suite::compare_weights(), torch.quantization._numeric_suite_fx::compare_weights_fx(), torch.utils.hipify.hipify_python::compute_stats(), caffe2.python.lstm_benchmark::create_model(), torch._jit_internal::createResolutionCallbackFromFrame(), torch.nn.utils.prune::custom_from_mask(), caffe2.python.helpers.array_helpers::depth_concat(), caffe2.python.cnn.CNNModelHelper::DepthConcat(), caffe2.experiments.python.device_reduce_sum_bench.Benchmark::display(), tools.download_mnist::download(), update-models-from-caffe2::download_caffe2_model(), tools.clang_format_utils::download_clang_format(), update-caffe2-models::download_models(), update-models-from-caffe2::download_onnx_model(), caffe2.python.models.download::downloadFromURLToFile(), caffe2.python.models.download::downloadModel(), benchmark.Benchmark::dump_result(), package.tool.clang_coverage::export(), fast_nvcc::fast_nvcc_warn(), caffe2.python.helpers.fc::fc_sparse(), tools.clang_format_all::file_clang_formatted_correctly(), tools.clang_tidy::filter_files(), caffe2.python.mkl.rewrite_graph::fix_BoxWithNMSLimit(), torch.jit._freeze::freeze(), caffe2.python.fused_8bit_rowwise_conversion_ops_test::fused_rowwise_8bit_quantize_reference(), gen_vulkan_spv::genCppH(), gen_vulkan_glsl::genCppH(), tools.setup_helpers.cmake.CMake::generate(), caffe2.python.lstm_benchmark::generate_data(), update-caffe2-models::generate_models(), caffe2.python.docs.generator.OperatorDoc::generateInterface(), tools.clang_format_utils::get_and_check_clang_format(), caffe2.python.binarysize::GetSymbolTrie(), torch.nn.utils.prune::global_unstructured(), torch.utils.tensorboard._pytorch_graph::graph(), categorize.Categorizer::handle_commit(), torch.hub::help(), torch.utils.hipify.hipify_python::hipify(), torch.nn.utils.prune::identity(), caffe2.python.dyndep::InitOpsLibrary(), process::insert(), torch.nn.utils.prune::is_pruned(), torch::jit::isinstance(), package.tool.print_report::line_oriented_report(), torch.jit._serialization::load(), matmul_dlmc_bench::load_dataset(), caffe2.python.parallel_workers.Metrics::log_metrics(), tools.nightly::logging_manager(), benchmark::main(), diff::main(), launcher::main(), framework_overhead_benchmark::main(), compare::main(), functional_autograd_benchmark::main(), bench::main(), __main__::main(), caffe2.python.net_drawer::main(), caffe2.python.numa_benchmark::main(), tools.clang_tidy::main(), tools.download_mnist::main(), torch.distributed.launch::main(), torch.utils.bottleneck.__main__::main(), torch.utils.collect_env::main(), bench_gen::main(), caffe2.python.benchmark_generator::main(), caffe2.python.mint.app::main(), fused_rowwise_nbit_conversion_bench::main(), torch.utils.tensorboard.summary::make_video(), package.tool.clang_coverage::merge(), torch.nn.modules.module.Module::modules(), freeze.Freezer::msg(), torch.nn.modules.module.Module::named_buffers(), torch.nn.modules.module.Module::named_children(), torch.nn.modules.module.Module::named_modules(), torch.nn.modules.module.Module::named_parameters(), caffe2.experiments.python.convnet_benchmarks::net_DAG_Builder(), caffe2.experiments.python.SparseTransformer::netbuilder(), fast_nvcc::nvcc_dryrun_data(), torch.quantization._learnable_fake_quantize._LearnableFakeQuantize::observe_quant_params(), operator<<(), c10::operator<<(), caffe2.python.ideep.transform_ideep_net::Optimize(), torch.nn.modules.module.Module::parameters(), torch.utils.tensorboard._onnx_graph::parse(), torch.jit._state.EnabledProxy::parse_env(), package.tool.summarize_jsons::parse_json(), common::parse_pr_number(), torch.utils.hipify.hipify_python::preprocess(), torch.utils.hipify.hipify_python::preprocess_file_and_save_result(), torch.utils.hipify.hipify_python::preprocessor(), print(), torch.utils.bottleneck.__main__::print_autograd_prof_summary(), launcher::print_benchmark_results(), setup::print_box(), torch.utils.bottleneck.__main__::print_cprofile_summary(), fast_nvcc::print_dot_graph(), package.util.utils::print_error(), package.tool.print_report::print_file_oriented_report(), package.tool.print_report::print_file_summary(), fastrnns.bench::print_json_oss_format(), fastrnns.bench::print_json_pep_format(), package.util.utils::print_log(), torch.optim.lr_scheduler._LRScheduler::print_lr(), framework_overhead_benchmark::print_results(), fastrnns.bench::print_stderr(), torch.fx.graph.Graph::print_tabular(), package.tool.print_report::print_test_by_type(), package.tool.print_report::print_test_condition(), package.util.utils::print_time(), fast_nvcc::print_verbose_output(), torch.utils.tensorboard._pytorch_graph.GraphPy::printall(), caffe2.python.binarysize::PrintTrie(), torch.nn.utils.prune::random_structured(), caffe2.python.lazy_dyndep::RegisterOpsLibrary(), caffe2.python.nomnigraph::render(), setup::report(), torch.package.exporter.PackageExporter::require_module(), namespace_check::run(), caffe2.python.utils.DebugMode::run(), operator_benchmark.benchmark_core.BenchmarkRunner::run(), caffe2.python.timeout_guard.WatcherThread::run(), tools.setup_helpers.cmake.CMake::run(), torch.utils.bottleneck.__main__::run_autograd_prof(), record_function_bench::run_bench(), microbenchmarks::run_benchmarks(), tools.clang_format_all::run_clang_format(), tools.clang_format_all::run_clang_format_on_file(), coordinator.CoordinatorBase::run_coordinator(), torch.utils.bottleneck.__main__::run_cprofile(), torch.utils.bottleneck.__main__::run_env_analysis(), caffe2.python.models.seq2seq.translate::run_seq2seq_beam_decoder(), tools.clang_tidy::run_shell_command(), caffe2.python.muji_test.TestMuji::RunningAllreduceWithGPUs(), torch.package.exporter.PackageExporter::save_pickle(), torch.package.exporter.PackageExporter::save_source_string(), torch.jit._script::script(), generate_test_torchscripts::scriptAndSave(), torch.distributed.rpc.options.TensorPipeRpcBackendOptions::set_device_map(), operator_benchmark.benchmark_utils::set_mkl_threads(), operator_benchmark.benchmark_utils::set_omp_threads(), torch.futures.Future::set_result(), caffe2.python.models.download::signalHandler(), caffe2.python.workspace::StartImmediate(), caffe2.python.workspace::StartMint(), caffe2.contrib.aten.gen_op::supports(), benchmark::sweep(), fastrnns.profile::system(), caffe2.python.ideep.concat_split_op_test.TestConcatSplitOps::test_concat_with_TensorCPU(), caffe2.python.nomnigraph_test.TestBindings::test_convertToProto(), caffe2.python.ideep.convfusion_op_test.ConvFusionTest::test_convolution_affch_folding(), caffe2.python.ideep.convfusion_op_test.ConvFusionTest::test_convolution_bn_folding(), caffe2.python.ideep.convfusion_op_test.ConvFusionTest::test_convolution_grouped_sum_relu_fusion(), caffe2.python.ideep.convfusion_op_test.ConvFusionTest::test_convolution_relu_fusion(), caffe2.python.ideep.convfusion_op_test.ConvFusionTest::test_convolution_sum_fusion(), caffe2.python.ideep.convfusion_op_test.ConvFusionTest::test_convolution_sum_relu_fusion(), caffe2.python.hypothesis_test.TestOperators::test_dag_net_forking(), caffe2.python.ideep.conv_op_test.ConvTest::test_depthwise_convolution(), caffe2.python.ideep.elementwise_sum_op_test.ElementwiseSumTest::test_elementwise_sum_fallback(), caffe2.python.ideep.expanddims_squeeze_op_test.ExpandDimsSqueezeTest::test_expand_dims_fallback(), caffe2.python.ideep.fc_op_test.FcTest::test_fc_with_axis(), caffe2.python.ideep.fc_op_test.FcTest::test_fc_with_axis_w(), caffe2.python.models.imagenet_trainer_test_utils::test_forward_only_fast_simplenet(), caffe2.python.memonger_test.MemongerTest::test_forward_optim_tree_harder(), caffe2.quantization.server.fully_connected_fp16_test.FullyConnectedFP16Test::test_fully_connected(), caffe2.python.ideep.operator_fallback_op_test.TestFallbackOps::test_in_place(), caffe2.python.ideep.elementwise_sum_op_test.ElementwiseSumTest::test_int8_elementwise_sum(), caffe2.python.ideep.fc_op_test.FcTest::test_int8_fc_4_dims(), caffe2.python.ideep.pool_op_test.PoolTest::test_int8_pooling(), caffe2.python.ideep.relu_op_test.ReluTest::test_int8_relu(), caffe2.python.nomnigraph_test.TestBindings::test_match_graph(), caffe2.python.predictor.predictor_exporter_test.PredictorExporterTest::test_meta_net_def_net_runs(), caffe2.python.mkl.rewrite_graph_test.MKLRewriteTest::test_mkl_multi_output_rewrite(), caffe2.python.ideep.order_switch_op_test.OrderSwitchTest::test_nhwc2nchw(), caffe2.python.observer_test.TestObservers::test_observer_rnn_executor(), caffe2.python.optimizer_test.TestOptimizerContext::test_optimizer_context(), caffe2.python.ideep.pre_convert_test.PreConvertTest::test_preConvert(), caffe2.contrib.nnpack.nnpack_ops_test.NNPackOpsTest::test_relu_timings(), caffe2.python.ideep.shape_op_test.ShapeTest::test_shape(), caffe2.python.ideep.shape_op_test.ShapeTest::test_shape_with_axes(), caffe2.python.lengths_reducer_fused_8bit_rowwise_ops_test.TestLengthsReducerOpsFused8BitRowwise::test_sparse_lengths_mean(), caffe2.python.sparse_to_dense_test.TestSparseToDense::test_sparse_to_dense(), caffe2.python.hypothesis_test.TestOperators::test_sparse_to_dense(), caffe2.python.sparse_to_dense_test.TestSparseToDense::test_sparse_to_dense_with_data_to_infer_dim(), caffe2.python.ideep.spatial_bn_op_test.TestSpatialBN::test_spatialbn_train_mode(), caffe2.python.ideep.expanddims_squeeze_op_test.ExpandDimsSqueezeTest::test_squeeze_fallback(), caffe2.contrib.nccl.nccl_ops_test.NCCLOpsTest::test_timings(), caffe2.contrib.nnpack.nnpack_ops_test.NNPackOpsTest::test_timings(), caffe2.python.transformations_test.TestTransformations::test_transformer_FuseConvBNNoConvBiasDuplicatedName(), caffe2.python.core_gradients_test.TestGradientCalculation::test_two_grads(), caffe2.contrib.aten.aten_test.TestATen::test_unique(), caffe2.python.net_printer_test.TestNetPrinter::test_valid_job(), caffe2.python.optimizer_test.TestWeightDecay::test_weight_decay(), caffe2.python.hypothesis_test.TestOperators::test_weighted_sample_blobs_queue(), caffe2.python.core_gradients_test.TestGradientsAccumulationWithPassThroughGradients::testAccumulationRuns(), caffe2.python.core_gradients_test.TestGradientsAccumulationWithPassThroughGradients::testAddAndStaticConstant(), caffe2.python.muji_test.TestMuji::testAllreduceWithEightGPUs(), caffe2.python.muji_test.TestMuji::testAllreduceWithFourGPUs(), caffe2.python.muji_test.TestMuji::testAllreduceWithFourGPUsAndTwoGroups(), caffe2.python.muji_test.TestMuji::testAllreduceWithTwoGPUs(), caffe2.python.mkl.mkl_pool_speed_test.TestMKLBasic::testAveragePoolingSpeed(), caffe2.python.caffe_translator_test.TestNumericalEquivalence::testBlobs(), caffe2.python.mkl.mkl_speed_test.TestMKLBasic::testConvSpeed(), caffe2.python.mkl.mkl_fc_speed_test.TestMKLBasic::testFCSpeed(), caffe2.python.core_gradients_test.TestGradientCalculation::testGradientCalculationWithPrint(), caffe2.python.core_gradients_test.TestGradientCalculation::testGradientMappingUsingSumOp(), caffe2.python.mkl.mkl_LRN_speed_test.TestMKLBasic::testLRNSpeed(), caffe2.python.mkl.mkl_pool_speed_test.TestMKLBasic::testMaxPoolingSpeed(), caffe2.python.core_gradients_test.TestGradientCalculation::testMultiUseInputAndMultipleVersionsBig(), caffe2.python.observer_test.TestObservers::testObserver(), caffe2.python.mkl.mkl_speed_test.TestMKLBasic::testReLUSpeed(), caffe2.python.mkl.mkl_sbn_speed_test.TestMKLBasic::testSpatialBNTestingSpeed(), caffe2.python.mkl.mkl_sbn_speed_test.TestMKLBasic::testSpatialBNTrainingSpeed(), caffe2.python.core_gradients_test.TestGradientsAccumulationWithPassThroughGradients::testSubOpInMiddle(), caffe2.python.core_gradients_test.TestGradientCalculation::testVersionMismatch(), torch.futures.Future::then(), pipe::train(), caffe2.experiments.python.SparseTransformer::transFCRelu(), tools.download_mnist::unzip(), upload_scribe.ScribeUploader::upload(), update-caffe2-models::upload_models(), update-models-from-caffe2::upload_onnx_model(), caffe2.contrib.warpctc.ctc_ops_test.CTCOpsTest::verify_cost(), and caffe2.python.memonger::verify_graph_equality().
|
static |
Definition at line 135 of file Formatting.cpp.
References defaultfloat(), scale, and stream.
Referenced by __printMatrix(), and print().
|
inline |
Definition at line 99 of file Utils.h.
References torch::autograd::accumulate(), and caffe2::int64_t.
Referenced by at::native::_cdist_backward(), at::native::vulkan::detail::add(), at::native::detail::as_cufft_embed(), batchedTensorForLoopFallback(), batchedTensorInplaceForLoopFallback(), at::native::vulkan::detail::VulkanTensor::Impl::buffer_size_for_sizes(), at::native::cdist_impl(), at::detail::computeStride(), at::detail::empty_cpu(), at::native::mobile::empty_with_tail_padding(), at::native::flatten(), at::native::group_norm(), torch::autograd::generated::details::infinitely_differentiable_native_layer_norm_backward(), at::native::matmul(), at::native::vulkan::detail::mul(), at::native::ConvParams::needs_64bit_indexing_no_split(), new_qtensor(), at::native::quantized_group_norm_impl(), torch::jit::resizeConstantScalarOrTensorToShape(), torch::autograd::generated::details::sparse_constructor_values_backward(), at::native::unflatten(), at::native::metal::unpack(), at::meta::upsample_nearest2d(), and at::native::zero_().
|
inline |
Definition at line 105 of file Utils.h.
References torch::autograd::accumulate(), matmul_dlmc_bench::end, and caffe2::int64_t.
at::REGISTER_CONTEXT | ( | DeviceType::CPU | , |
caffe2::CPUContext | |||
) |
at::REGISTER_CONTEXT | ( | DeviceType::IDEEP | , |
caffe2::IDEEPContext | |||
) |
at::REGISTER_COPY_BYTES_FUNCTION | ( | DeviceType::CPU | , |
DeviceType::CPU | , | ||
caffe2::CopyBytesWrapper | |||
) |
at::REGISTER_COPY_BYTES_FUNCTION | ( | DeviceType::CPU | , |
DeviceType::IDEEP | , | ||
CopyBytesWrapper | |||
) |
at::REGISTER_COPY_BYTES_FUNCTION | ( | DeviceType::IDEEP | , |
DeviceType::CPU | , | ||
CopyBytesWrapper | |||
) |
at::REGISTER_COPY_BYTES_FUNCTION | ( | DeviceType::IDEEP | , |
DeviceType::IDEEP | , | ||
CopyBytesWrapper | |||
) |
TORCH_API void at::releaseRecordAllFunctions | ( | ) |
Definition at line 491 of file record_function.cpp.
References TORCH_CHECK.
Referenced by at::CallbackManager::removeCallback(), and at::ThreadLocalStateGuard::~ThreadLocalStateGuard().
TORCH_API void at::removeCallback | ( | CallbackHandle | handle | ) |
removeCallback removes a callback given the handle returned by addThreadLocalCallback or addGlobalCallback;
WARNING: removing a global callback is not thread safe, no other code can run simultaneously
Definition at line 377 of file record_function.cpp.
References handle.
Referenced by torch.autograd.profiler::disableProfilerLegacy().
|
static |
Definition at line 39 of file NamedTensorUtils.cpp.
References fastrnns.bench::action, name, microbenchmarks::names, and TORCH_CHECK.
Referenced by unify_from_right().
|
inline |
Definition at line 34 of file NamedTensorUtils.h.
References TORCH_CHECK.
Referenced by at::native::all(), at::native::all_out(), at::native::any(), at::native::any_out(), at::native::argmax(), at::native::argmin(), at::native::argsort(), at::native::gather(), at::native::gather_out(), at::native::index_add(), at::native::index_add_(), at::native::index_copy(), at::native::index_copy_(), at::native::index_select(), at::native::index_select_out(), at::native::scatter(), at::native::scatter_(), at::native::scatter_add(), at::native::scatter_add_(), at::native::sort(), at::native::sort_out(), and at::native::squeeze_().
Tensor at::reshape_batching_rule | ( | const Tensor & | self, |
IntArrayRef | shape | ||
) |
Definition at line 407 of file BatchingRegistrations.cpp.
References at::MultiBatchVmapTransform::logicalToPhysical(), and shape.
Referenced by TORCH_LIBRARY_IMPL().
|
static |
Definition at line 89 of file TensorIteratorReduce.cpp.
References dim, matmul_dlmc_bench::end, iter, and torch.jit._trace::make_tuple().
Referenced by parallel_dim_reduction().
|
static |
Definition at line 206 of file BatchedFallback.cpp.
References at::Tensor::defined(), torch.distributions.constraints::stack, caffe2::tensors, and TORCH_CHECK.
Referenced by batchedTensorForLoopFallback().
Tensor at::select_backward_batching_rule | ( | const Tensor & | grad, |
IntArrayRef | input_sizes, | ||
int64_t | dim, | ||
int64_t | index | ||
) |
Definition at line 354 of file BatchingRegistrations.cpp.
References dim, getGradInputPhysicalDim(), grad, index, at::MultiBatchVmapTransform::logicalToPhysical(), and caffe2::zeros.
Referenced by TORCH_LIBRARY_IMPL().
Definition at line 343 of file BatchingRegistrations.cpp.
References dim, index, and at::MultiBatchVmapTransform::logicalToPhysical().
Referenced by TORCH_LIBRARY_IMPL().
TORCH_API void at::set_num_interop_threads | ( | int | ) |
Referenced by main(), and THPModule_setNumInteropThreads().
TORCH_API void at::set_num_threads | ( | int | ) |
Referenced by main(), and THPModule_setNumThreads().
TORCH_API void at::set_quantizer_ | ( | const Tensor & | self, |
ConstQuantizerPtr | quantizer | ||
) |
Definition at line 210 of file Quantizer.cpp.
References get_qtensorimpl(), and at::QTensorImpl::set_quantizer_().
Referenced by at::native::copy_impl().
TORCH_API void at::set_record_function_tls_ | ( | const RecordFunctionTLS & | tls | ) |
Definition at line 62 of file record_function.cpp.
Referenced by at::ThreadLocalState::setThreadLocalState().
|
inlineconstexpr |
The method should_include_kernel_dtype() returns true/false based on whether the switching code for a specific dtype should be included based on build time constants generated from tracing model execution.
This method will be implmeneted via code-generation and included in this file when code-gen is ready.
Definition at line 24 of file Dispatch.h.
TORCH_API bool at::shouldRunRecordFunction | ( | bool * | pre_sampled | ) |
Definition at line 499 of file record_function.cpp.
References hasGlobalCallbacks().
Referenced by c10::Dispatcher::callBoxed(), c10::Dispatcher::callWithDispatchKey(), torch::jit::InterpreterStateImpl::checkAndStartRecordFunction(), torch::autograd::Node::operator()(), and runPureRecordFunctionBench().
TORCH_API std::string at::show_config | ( | ) |
Returns a detailed string describing the configuration PyTorch.
Definition at line 124 of file Version.cpp.
References get_mkl_version(), get_mkldnn_version(), get_openmp_version(), caffe2::GetBuildOptions(), at::detail::getCUDAHooks(), hasCUDA(), at::CUDAHooksInterface::showConfig(), and used_cpu_capability().
Referenced by THPModule_showConfig().
|
inline |
Definiti