pytorch  1.8.2
About: PyTorch provides Tensor computation (like NumPy) with strong GPU acceleration and Deep Neural Networks (in Python) built on a tape-based autograd system. LTS (Long Term Support) release.
  Fossies Dox: pytorch-1.8.2.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

core_overhead_benchmark.cc File Reference
#include "benchmark/benchmark.h"
#include <c10/util/Logging.h>
Include dependency graph for core_overhead_benchmark.cc:

Go to the source code of this file.

Macros

#define NOINLINE
 Copyright (c) 2016-present, Facebook, Inc. More...
 

Functions

int call (int id)
 
int call_no_logging (int id)
 
static void BM_APILogging (benchmark::State &state)
 
 BENCHMARK (BM_APILogging)
 
static void BM_NoAPILogging (benchmark::State &state)
 
 BENCHMARK (BM_NoAPILogging)
 
 BENCHMARK_MAIN ()
 

Macro Definition Documentation

◆ NOINLINE

#define NOINLINE

Copyright (c) 2016-present, Facebook, Inc.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Definition at line 24 of file core_overhead_benchmark.cc.

Function Documentation

◆ BENCHMARK() [1/2]

BENCHMARK ( BM_APILogging  )

◆ BENCHMARK() [2/2]

BENCHMARK ( BM_NoAPILogging  )

◆ BENCHMARK_MAIN()

BENCHMARK_MAIN ( )

◆ BM_APILogging()

static void BM_APILogging ( benchmark::State &  state)
static

Definition at line 36 of file core_overhead_benchmark.cc.

References call(), and state.

◆ BM_NoAPILogging()

static void BM_NoAPILogging ( benchmark::State &  state)
static

Definition at line 47 of file core_overhead_benchmark.cc.

References call_no_logging(), and state.

◆ call()

int call ( int  id)

Definition at line 27 of file core_overhead_benchmark.cc.

References C10_LOG_API_USAGE_ONCE.

Referenced by BM_APILogging(), c10::detail::infer_schema::createReturns< ReturnType, std::enable_if_t<!std::is_same< void, ReturnType >::value &&!guts::is_instantiation_of< std::tuple, ReturnType >::value > >::call(), c10::detail::infer_schema::createReturns< void, void >::call(), c10::guts::detail::extract_arg_by_filtered_index_< Condition, index, std::enable_if_t<!Condition< Head >::value >, Head, Tail... >::call(), c10::guts::detail::extract_arg_by_filtered_index_< Condition, index, std::enable_if_t< Condition< Head >::value &&index !=0 >, Head, Tail... >::call(), caffe2::DispatchHelper< FixedValues< FirstVal, Values... >, ExtraArgs... >::call(), c10::impl::call_functor_with_args_from_stack_(), torch::detail::call_torchbind_method_from_stack(), c10::detail::infer_schema::createArgumentVectorFromTypes(), c10::detail::infer_schema::createFunctionSchemaFromTraitsFlattenedReturns(), c10::detail::infer_schema::createFunctionSchemaFromTraitsSingleReturn(), torch::jit::CompilationUnit::define(), caffe2::CastOp< Context >::DoRunWithDstType(), caffe2::SparseLengthsReductionFakeFp16Op< InputTypes, USE_WEIGHT, USE_MEAN, USE_POSITIONAL_WEIGHT, USE_ACC_FP16, USE_FP16_FOR_EMBEDDING_ONLY >::DoRunWithType(), caffe2::BatchGatherGradientOp< Context >::DoRunWithType(), caffe2::MergeSingleMapFeatureTensorsOp< Context >::DoRunWithType(), caffe2::MergeMultiMapFeatureTensorsOp< Context >::DoRunWithType(), caffe2::CPUSparseLengthsReductionOp< T, InputTypes, USE_WEIGHT, USE_MEAN, USE_POSITIONAL_WEIGHT >::DoRunWithType(), caffe2::CreateMapOp< Context >::DoRunWithType(), caffe2::KeyValueToMapOp< Context >::DoRunWithType(), caffe2::PackSegmentsOp< Context >::DoRunWithType(), caffe2::UnpackSegmentsOp< Context >::DoRunWithType(), caffe2::AbstractSortedSegmentOp< T, SIndex, Context, Reducer, SparseFused, InputAccessor >::DoRunWithType(), caffe2::AbstractUnsortedSegmentOp< T, SIndex, Context, Reducer, SparseFused, InputAccessor >::DoRunWithType(), caffe2::AbstractLengthsOp< TData, TLengths, Context, Reducer, SparseFused, InputAccessor >::DoRunWithType(), caffe2::AbstractLengthsWithMainInputGradientOp< Tembedding, T, TLengths, Context, ReducerGradient, SparseFused, GradientNeedIndices >::DoRunWithType(), caffe2::SparseToDenseOp< Context >::DoRunWithType(), caffe2::SquareRootDivideOp< Context >::DoRunWithType(), caffe2::ScatterWeightedSumOp< T, Context >::DoRunWithType(), torch::jit::to_ir::emitBinaryOp(), torch::jit::to_ir::emitUnaryOp(), c10::guts::extract_arg_by_filtered_index(), c10::guts::filter_map(), torch::jit::tensorexpr::analysis::AccessInfo::getIndices(), c10::guts::if_constexpr(), c10::guts::typelist::map_types_to_values(), torch::jit::tensorexpr::CudaCodeGen::operator()(), torch::jit::tensorexpr::SimpleIREvaluator::operator()(), caffe2::FindOp< Context >::RunOnDevice(), caffe2::ONNXWhileOp< Context >::RunOnDevice(), caffe2::PackSegmentsOp< Context >::RunOnDevice(), caffe2::SparseLpRegularizerOp< T, Context >::RunOnDevice(), caffe2::SparseNormalizeOp< T, Context >::RunOnDevice(), caffe2::TileOp< Context >::RunOnDevice(), caffe2::BatchMatMulFP16FakeOp< Context, Engine, USE_ACC_FP16, USE_TMP_ACCUMULATOR, USE_CUSTOM_ACC32 >::RunOnDevice(), caffe2::SparseLengthsFused4BitRowwiseFakeFP16Op< Context, with_weights, use_fp16_for_embedding_only >::RunOnDevice(), caffe2::SparseLengthsFused8BitRowwiseFakeFP16Op< Context, with_weights, is_mean, use_acc_fp16, use_inv_scale, use_nnpi_fma, use_fp16_for_embedding_only, use_acc_fp32 >::RunOnDevice(), caffe2::SparseLengthsReductionFakeFp16Op< InputTypes, USE_WEIGHT, USE_MEAN, USE_POSITIONAL_WEIGHT, USE_ACC_FP16, USE_FP16_FOR_EMBEDDING_ONLY >::RunOnDevice(), caffe2::SpatialBNFakeLoweredFp16Op::RunOnDevice(), caffe2::SpatialBNFakeFp16Op::RunOnDevice(), caffe2::CuDNNActivationOp< kCuDNNActivationMode >::RunOnDevice(), caffe2::CuDNNActivationGradientOp< kCuDNNActivationMode >::RunOnDevice(), caffe2::ArgOp< Context, Reducer >::RunOnDevice(), caffe2::AssertOp< Context >::RunOnDevice(), caffe2::BatchBoxCoxOp< Context >::RunOnDevice(), caffe2::BatchGatherOp< Context >::RunOnDevice(), caffe2::BatchGatherGradientOp< Context >::RunOnDevice(), caffe2::BatchMatMulOp< Context, Engine >::RunOnDevice(), caffe2::BooleanMaskOpGradient< Context >::RunOnDevice(), caffe2::BoxWithNMSLimitOp< Context >::RunOnDevice(), caffe2::ChannelStatsOp< Context >::RunOnDevice(), caffe2::CopyRowsToTensorOp< Context >::RunOnDevice(), caffe2::CopyRowsToTensorGradientOp< Context >::RunOnDevice(), caffe2::BinaryElementwiseWithArgsGradientOp< NumericTypes, CPUContext, BinaryFunctorWithDefaultCtor< DivFunctor< CPUContext > >, SameTypeAsInput, SameTypeAsInput >::RunOnDevice(), caffe2::WhereOp< Context >::RunOnDevice(), caffe2::IsMemberOfOp< Context >::RunOnDevice(), caffe2::UnaryElementwiseWithArgsOp< InputTypes, Context, Functor, OutputTypeMap >::RunOnDevice(), caffe2::BinaryElementwiseWithArgsOp< InputTypes, Context, Functor, OutputTypeMap >::RunOnDevice(), caffe2::BinaryElementwiseWithArgsGradientOp< InputTypes, Context, Functor, OutputTypeMap, GradientTypeMap >::RunOnDevice(), caffe2::SumReduceLikeOp< Context >::RunOnDevice(), caffe2::CuDNNActivationOp< CUDNN_ACTIVATION_ELU >::RunOnDevice(), caffe2::CuDNNActivationGradientOp< CUDNN_ACTIVATION_ELU >::RunOnDevice(), caffe2::EnforceFiniteOp< Context >::RunOnDevice(), caffe2::EnsureClippedOp< T, Context >::RunOnDevice(), caffe2::ExpandOp< InputTypes, Context >::RunOnDevice(), caffe2::ExpandGradientOp< InputTypes, Context >::RunOnDevice(), caffe2::MergeDenseFeatureTensorsOp< Context >::RunOnDevice(), caffe2::MergeSingleScalarFeatureTensorsOp< Context >::RunOnDevice(), caffe2::MergeSingleScalarFeatureTensorsGradientOp< Context >::RunOnDevice(), caffe2::MergeSingleListFeatureTensorsOp< Context >::RunOnDevice(), caffe2::MergeSingleListOrMapFeatureTensorsGradientOp< Context >::RunOnDevice(), caffe2::MergeSingleMapFeatureTensorsOp< Context >::RunOnDevice(), caffe2::MergeMultiScalarFeatureTensorsOp< Context >::RunOnDevice(), caffe2::MergeMultiScalarFeatureTensorsGradientOp< Context >::RunOnDevice(), caffe2::MergeMultiListFeatureTensorsOp< Context >::RunOnDevice(), caffe2::MergeMultiMapFeatureTensorsOp< Context >::RunOnDevice(), caffe2::MergeMultiListOrMapFeatureTensorsGradientOp< Context >::RunOnDevice(), caffe2::FindDuplicateElementsOp< Context >::RunOnDevice(), caffe2::GatherFused8BitRowwiseOp< Context >::RunOnDevice(), caffe2::GatherOp< Context >::RunOnDevice(), caffe2::GatherRangesToDenseOp< Context >::RunOnDevice(), caffe2::MIOPENActivationOp< kMIOPENActivationMode >::RunOnDevice(), caffe2::MIOPENActivationGradientOp< kMIOPENActivationMode >::RunOnDevice(), caffe2::HistogramOp< Context >::RunOnDevice(), caffe2::IndexHashOp< Context >::RunOnDevice(), caffe2::IndexGetOp::RunOnDevice(), caffe2::IndexLoadOp::RunOnDevice(), caffe2::IndexStoreOp::RunOnDevice(), caffe2::LayerNormOp< Context >::RunOnDevice(), caffe2::LayerNormGradientOp< Context >::RunOnDevice(), caffe2::LengthsPadOp< Context >::RunOnDevice(), caffe2::SparseLengthsFused8BitRowwiseOp< Context, with_weights, is_mean >::RunOnDevice(), caffe2::SparseLengthsFusedNBitRowwiseOp< BIT_RATE, Context, with_weights, is_mean >::RunOnDevice(), caffe2::SparseLengthsSumSparseLookupOp::RunOnDevice(), caffe2::SparseLengthsNBitRowwiseSparseOp< BIT_RATE, with_weights, is_mean >::RunOnDevice(), caffe2::CPUSparseLengthsReductionOp< T, InputTypes, USE_WEIGHT, USE_MEAN, USE_POSITIONAL_WEIGHT >::RunOnDevice(), caffe2::SparseLengths8BitsRowwiseOp< Context, USE_WEIGHTS, USE_MEAN, OutDataT >::RunOnDevice(), caffe2::CreateMapOp< Context >::RunOnDevice(), caffe2::KeyValueToMapOp< Context >::RunOnDevice(), caffe2::MapToKeyValueOp< Context >::RunOnDevice(), caffe2::MergeIdListsOp< Context >::RunOnDevice(), caffe2::MishGradientOp< Context >::RunOnDevice(), caffe2::ModOp< Context >::RunOnDevice(), caffe2::BatchOneHotOp< Context >::RunOnDevice(), caffe2::PackRNNSequenceOpBase< Context, Forward >::RunOnDevice(), caffe2::UnpackSegmentsOp< Context >::RunOnDevice(), caffe2::GatherByKeyOp::RunOnDevice(), caffe2::PartitionOp::RunOnDevice(), caffe2::LengthsPartitionOp::RunOnDevice(), caffe2::PowOp< InputTypes, Context, Functor, TypeMap >::RunOnDevice(), caffe2::QuantileOp< Context >::RunOnDevice(), caffe2::int8::Int8ReshapeOp::RunOnDevice(), caffe2::int8::Int8SliceOp::RunOnDevice(), caffe2::SumReduceDimsOp< Context, FIRSTDIMS, NORMALIZE >::RunOnDevice(), caffe2::SumReduceDimsGradientOp< Context, FIRSTDIMS, NORMALIZE >::RunOnDevice(), caffe2::ReduceOp< InputTypes, Context, Reducer >::RunOnDevice(), caffe2::ReduceGradientOp< InputTypes, Context, Reducer >::RunOnDevice(), caffe2::SumSqrElementsOp< Context >::RunOnDevice(), caffe2::RemoveDataBlocksOp< Context >::RunOnDevice(), caffe2::ReplaceNaNOp< Context >::RunOnDevice(), caffe2::ReshapeOp< F, Context >::RunOnDevice(), caffe2::ReversePackedSegsOp< Context >::RunOnDevice(), caffe2::RMSNormOp< Context >::RunOnDevice(), caffe2::RMSNormGradientOp< Context >::RunOnDevice(), caffe2::AccumulateInputGradientOp< Context >::RunOnDevice(), caffe2::ScaleBlobsOp< Context >::RunOnDevice(), caffe2::ScaleOp< Context >::RunOnDevice(), caffe2::AbstractReduceFrontOrBackOp< T, Context, Reducer, FirstDim, InputAccessor >::RunOnDevice(), caffe2::AbstractReduceFrontOrBackGradientOp< T, Context, ReducerGradient, FirstDim >::RunOnDevice(), caffe2::AbstractSortedSegmentOp< T, SIndex, Context, Reducer, SparseFused, InputAccessor >::RunOnDevice(), caffe2::AbstractSortedSegmentGradientOp< T, SIndex, Context, ReducerGradient >::RunOnDevice(), caffe2::AbstractUnsortedSegmentOp< T, SIndex, Context, Reducer, SparseFused, InputAccessor >::RunOnDevice(), caffe2::AbstractUnsortedSegmentGradientOp< T, SIndex, Context, ReducerGradient >::RunOnDevice(), caffe2::AbstractLengthsOp< TData, TLengths, Context, Reducer, SparseFused, InputAccessor >::RunOnDevice(), caffe2::AbstractLengthsGradientOp< T, TLengths, Context, ReducerGradient, GradientNeedIndices >::RunOnDevice(), caffe2::AbstractLengthsWithMainInputGradientOp< Tembedding, T, TLengths, Context, ReducerGradient, SparseFused, GradientNeedIndices >::RunOnDevice(), caffe2::AbstractLengthsWithMainInputAndForwardOutputGradientOp< T, TLengths, Context, ReducerGradient >::RunOnDevice(), caffe2::SelfBinningHistogramOp< Context >::RunOnDevice(), caffe2::GatherPaddingOp< Context >::RunOnDevice(), caffe2::RemovePaddingOp< Context >::RunOnDevice(), caffe2::AddPaddingOp< Context >::RunOnDevice(), caffe2::SinusoidPositionEncodingOp< Context >::RunOnDevice(), caffe2::SliceOp< Context >::RunOnDevice(), caffe2::SliceGradientOp< Context >::RunOnDevice(), caffe2::CuDNNSoftmaxOp::RunOnDevice(), caffe2::CuDNNSoftmaxGradientOp::RunOnDevice(), caffe2::SparseToDenseMaskOp< Context >::RunOnDevice(), caffe2::SparseToDenseMaskGradientOp< Context >::RunOnDevice(), caffe2::SparseToDenseOp< Context >::RunOnDevice(), caffe2::SpatialBNOp< Context >::RunOnDevice(), caffe2::SpatialBNGradientOp< Context >::RunOnDevice(), caffe2::SquareRootDivideOp< Context >::RunOnDevice(), caffe2::TemplatePutOp< T >::RunOnDevice(), caffe2::StringJoinOp< Context >::RunOnDevice(), caffe2::SwishGradientOp< Context >::RunOnDevice(), caffe2::TileGradientOp< Context >::RunOnDevice(), caffe2::TransposeOp< Context >::RunOnDevice(), caffe2::UniqueOp< Context >::RunOnDevice(), caffe2::IsNanOp< Context >::RunOnDevice(), caffe2::PrintOp< Context >::RunOnDevice(), caffe2::SumOp< Context >::RunOnDevice(), caffe2::ScatterWeightedSumOp< T, Context >::RunOnDevice(), caffe2::ScatterOp< Context >::RunOnDevice(), caffe2::SegmentIdsToLengthsOp< Context >::RunOnDevice(), caffe2::SegmentIdsToRangesOp< Context >::RunOnDevice(), caffe2::LengthsToWeightsOp< Context >::RunOnDevice(), caffe2::GatherRangesOp< Context >::RunOnDevice(), caffe2::LengthsGatherOp< Context >::RunOnDevice(), caffe2::RangeOp< Context >::RunOnDevice(), caffe2::GatherDNNLowPOp< T >::RunOnDevice(), caffe2::SparseAdadeltaOp< Context >::RunOnDevice(), caffe2::SparseAdagradOp::RunOnDevice(), caffe2::RowWiseSparseAdagradOp< Context >::RunOnDevice(), caffe2::SparseAdamOp< T, Context >::RunOnDevice(), caffe2::RowWiseSparseAdamOp< T, Context >::RunOnDevice(), caffe2::SparseMomentumSGDUpdateOp< T, Context >::RunOnDevice(), caffe2::RowWiseSparseAdagradFusedWithSparseLengthsSumGradientOp< Tdata, T, TLengths, rowWiseAdagradT, is_mean >::RunOnDevice(), caffe2::RowWiseSparseAdagradFusedWithSparseLengthsWeightedSumGradientOp< Tdata, T, TLengths, rowWiseAdagradT >::RunOnDevice(), caffe2::RowWiseSparseAdagradFusedWithSparseLengthsWeightedSumGradientApproxOp< Tdata, T, TLengths, rowWiseAdagradT >::RunOnDevice(), caffe2::RowWiseCounterOp::RunOnDevice(), caffe2::SparseStormOp< Context >::RunOnDevice(), caffe2::WeightScaleOp< Context >::RunOnDevice(), caffe2::SparseWngradOp< T, Context >::RunOnDevice(), c10::str(), torch::jit::tensorexpr::analysis::MemDependencyChecker::visit(), c10::impl::detail::with_out_arguments_reordered(), and c10::impl::detail::with_explicit_optional_tensors_< Return(TargetSignatureArgs...), Return(KernelSignatureArgs...), TORCH_FN_TYPE(KernelFunc)>::wrapper().

◆ call_no_logging()

int call_no_logging ( int  id)

Definition at line 32 of file core_overhead_benchmark.cc.

Referenced by BM_NoAPILogging().