pytorch
1.8.2
About: PyTorch provides Tensor computation (like NumPy) with strong GPU acceleration and Deep Neural Networks (in Python) built on a tape-based autograd system. LTS (Long Term Support) release.
![]() ![]() |
#include <benchmark/benchmark.h>
#include <torch/csrc/jit/tensorexpr/ir_simplifier.h>
#include <torch/csrc/jit/tensorexpr/loopnest.h>
#include <torch/csrc/jit/tensorexpr/tensor.h>
#include <torch/csrc/jit/tensorexpr/llvm_codegen.h>
#include <torch/torch.h>
#include "caffe2/operators/tanh_op.h"
#include "caffe2/operators/logit_op.h"
Go to the source code of this file.
Functions | |
void | optimizeSleef (tensorexpr::LoopNest *ln, tensorexpr::Tensor *target) |
void | optimizePointwise (tensorexpr::LoopNest *ln, tensorexpr::Tensor *target) |
static void | log_nnc_sleef (benchmark::State &state) |
static void | log_nnc_fast (benchmark::State &state) |
static void | log_aten (benchmark::State &state) |
static void | logit_nnc_sleef (benchmark::State &state) |
static void | logit_nnc_fast (benchmark::State &state) |
static void | logit_aten (benchmark::State &state) |
template<typename T > | |
void | logit_caffe2_impl (int size, const T *X, T *Y, float eps_=1e-6f) |
static void | logit_caffe2 (benchmark::State &state) |
static void | tanh_nnc_fast (benchmark::State &state) |
static void | tanh_aten (benchmark::State &state) |
static void | tanh_caffe2 (benchmark::State &state) |
Args ({2<< 5}) -> Args({2<< 8}) ->Args({2<< 12}) ->Args({2<< 14}) | |
Args | ( | {2<< 5} | ) | -> Args({2<< 8}) ->Args({2<< 12}) ->Args({2<< 14}) |
Referenced by torch::autograd::Function< T >::apply(), c10::impl::boxArgs(), c10::TypedOperatorHandle< Return(Args...)>::call(), c10::KernelFunction::call(), c10::Dispatcher::call(), c10::impl::BoxedKernelWrapper< Result(Args...), std::enable_if_t< can_box_all< Args... >::value &&is_tuple_of_mutable_tensor_refs< Result >::value &&!std::is_same< Result, guts::typelist::to_tuple_t< guts::typelist::take_t< guts::typelist::typelist< Args... >, sizeof...(Args) >=std::tuple_size< Result >::value ? std::tuple_size< Result >::value :sizeof...(Args) > > >::value, void >::call(), c10::impl::BoxedKernelWrapper< Result(Args...), std::enable_if_t< can_box_all< Args... >::value &&is_tuple_of_mutable_tensor_refs< Result >::value &&std::is_same< Result, guts::typelist::to_tuple_t< guts::typelist::take_t< guts::typelist::typelist< Args... >, sizeof...(Args) >=std::tuple_size< Result >::value ? std::tuple_size< Result >::value :sizeof...(Args) > > >::value, void >::call(), c10::callUnboxedKernelFunction(), c10::Dispatcher::callWithDispatchKey(), c10::TypedOperatorHandle< Return(Args...)>::callWithDispatchKey(), c10::Dispatcher::callWithDispatchKeySlowPath(), c10::optional< T >::emplace(), c10::SmallVectorImpl< T >::emplace_back(), torch::jit::TuplePacker< remaining, Args >::execute(), c10::guts::filter_map(), c10::generic_to(), at::native::metal::mpscnn::if(), torch::jit::pack(), and c10::Dispatcher::redispatch().
|
static |
Definition at line 90 of file bench_approx.cpp.
References caffe2.contrib.aten.gen_op::_, c10::prim::abs(), at::native::log_out(), at::native::randn(), and state.
|
static |
Definition at line 60 of file bench_approx.cpp.
References caffe2.contrib.aten.gen_op::_, caffe2::A, c10::prim::abs(), at::native::allclose(), compare-fastrnn-results::args, B, torch::jit::tensorexpr::Compute(), at::Tensor::data_ptr(), torch::jit::tensorexpr::fast_log(), torch::jit::i, c10::kFloat, c10::kInt, at::log(), N, optimizePointwise(), at::native::randn(), operator_benchmark.common.repeat_benchmark::s, torch::jit::tensorexpr::IRSimplifier::simplify(), state, and TORCH_CHECK.
|
static |
Definition at line 30 of file bench_approx.cpp.
References caffe2.contrib.aten.gen_op::_, caffe2::A, c10::prim::abs(), at::native::allclose(), compare-fastrnn-results::args, B, torch::jit::tensorexpr::Compute(), at::Tensor::data_ptr(), torch::jit::i, c10::kFloat, c10::kInt, at::log(), caffe2.contrib.playground.AnyExp::log, N, optimizeSleef(), torch::jit::tensorexpr::LoopNest::prepareForCodegen(), at::native::randn(), operator_benchmark.common.repeat_benchmark::s, torch::jit::tensorexpr::IRSimplifier::simplify(), state, and TORCH_CHECK.
|
static |
Definition at line 174 of file bench_approx.cpp.
References caffe2.contrib.aten.gen_op::_, c10::prim::abs(), c10::aten::clamp(), at::native::logit_out(), at::native::randn(), and state.
|
static |
Definition at line 195 of file bench_approx.cpp.
References caffe2.contrib.aten.gen_op::_, c10::prim::abs(), at::native::allclose(), c10::aten::clamp(), at::Tensor::data_ptr(), logit_caffe2_impl(), at::native::logit_out(), N, at::native::nan_to_num(), at::native::randn(), state, TORCH_CHECK, X, and at::native::metal::mpscnn::Y.
void logit_caffe2_impl | ( | int | size, |
const T * | X, | ||
T * | Y, | ||
float | eps_ = 1e-6f |
||
) |
Definition at line 186 of file bench_approx.cpp.
References size, X, and at::native::metal::mpscnn::Y.
Referenced by logit_caffe2().
|
static |
Definition at line 137 of file bench_approx.cpp.
References caffe2.contrib.aten.gen_op::_, caffe2::A, c10::prim::abs(), at::native::allclose(), compare-fastrnn-results::args, B, c10::aten::clamp(), torch::jit::tensorexpr::Compute(), at::Tensor::data_ptr(), torch::jit::tensorexpr::fast_log(), torch::jit::i, c10::kFloat, torch::jit::tensorexpr::kGT, c10::kInt, torch::jit::tensorexpr::kLT, at::native::logit(), max, min, N, at::native::nan_to_num(), optimizePointwise(), torch::jit::tensorexpr::LoopNest::prepareForCodegen(), at::native::randn(), operator_benchmark.common.repeat_benchmark::s, torch::jit::tensorexpr::IRSimplifier::simplify(), state, and TORCH_CHECK.
|
static |
Definition at line 100 of file bench_approx.cpp.
References caffe2.contrib.aten.gen_op::_, caffe2::A, c10::prim::abs(), at::native::allclose(), compare-fastrnn-results::args, B, c10::aten::clamp(), torch::jit::tensorexpr::Compute(), at::Tensor::data_ptr(), torch::jit::i, c10::kFloat, torch::jit::tensorexpr::kGT, c10::kInt, torch::jit::tensorexpr::kLT, caffe2.contrib.playground.AnyExp::log, at::native::logit(), max, min, N, at::native::nan_to_num(), optimizePointwise(), torch::jit::tensorexpr::LoopNest::prepareForCodegen(), at::native::randn(), operator_benchmark.common.repeat_benchmark::s, torch::jit::tensorexpr::IRSimplifier::simplify(), state, and TORCH_CHECK.
void optimizePointwise | ( | tensorexpr::LoopNest * | ln, |
tensorexpr::Tensor * | target | ||
) |
Definition at line 20 of file bench_approx.cpp.
References torch::jit::tensorexpr::LoopNest::getLoopStmtsFor(), at::native::inner(), loops, c10::aten::outer(), torch::jit::tensorexpr::LoopNest::splitWithTail(), caffe2::tail, caffe2::target(), caffe2.perfkernels.hp_emblookup_codegen::unroll(), and torch::jit::tensorexpr::LoopNest::vectorize().
Referenced by log_nnc_fast(), logit_nnc_fast(), logit_nnc_sleef(), and tanh_nnc_fast().
void optimizeSleef | ( | tensorexpr::LoopNest * | ln, |
tensorexpr::Tensor * | target | ||
) |
Definition at line 13 of file bench_approx.cpp.
References torch::jit::tensorexpr::LoopNest::getLoopStmtsFor(), at::native::inner(), loops, c10::aten::outer(), torch::jit::tensorexpr::LoopNest::splitWithTail(), caffe2::tail, caffe2::target(), and torch::jit::tensorexpr::LoopNest::vectorize().
Referenced by log_nnc_sleef().
|
static |
Definition at line 245 of file bench_approx.cpp.
References caffe2.contrib.aten.gen_op::_, c10::prim::abs(), at::native::randn(), state, and at::native::tanh_out().
|
static |
Definition at line 255 of file bench_approx.cpp.
References caffe2.contrib.aten.gen_op::_, c10::prim::abs(), at::native::allclose(), c, at::Tensor::data_ptr(), N, at::native::randn(), state, at::native::tanh(), at::native::tanh_out(), TORCH_CHECK, X, and at::native::metal::mpscnn::Y.
|
static |
Definition at line 215 of file bench_approx.cpp.
References caffe2.contrib.aten.gen_op::_, caffe2::A, c10::prim::abs(), at::native::allclose(), compare-fastrnn-results::args, B, torch::jit::tensorexpr::Compute(), at::Tensor::data_ptr(), torch::jit::tensorexpr::fast_tanh(), torch::jit::i, c10::kFloat, c10::kInt, N, optimizePointwise(), at::native::randn(), operator_benchmark.common.repeat_benchmark::s, torch::jit::tensorexpr::IRSimplifier::simplify(), state, at::native::tanh(), and TORCH_CHECK.