pytorch  1.8.2
About: PyTorch provides Tensor computation (like NumPy) with strong GPU acceleration and Deep Neural Networks (in Python) built on a tape-based autograd system. LTS (Long Term Support) release.
  Fossies Dox: pytorch-1.8.2.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

caffe2 Namespace Reference

Copyright (c) 2016-present, Facebook, Inc. More...

Namespaces

namespace  BlobStat
 
namespace  cast
 
namespace  contrib
 
namespace  convert
 
namespace  core
 
namespace  dag_utils
 
namespace  dataset_ops
 
namespace  db
 
namespace  detail
 To make a c10 operator "C10Add" callable from caffe2 as "C2MyAddOpName", just write.
 
namespace  details
 
namespace  distributed
 
namespace  elementwise_ops_utils
 
namespace  emulator
 
namespace  experiments
 
namespace  fb
 
namespace  FeedBlob
 
namespace  gather_helper
 
namespace  gelu_utils
 
namespace  gloo
 
namespace  glow
 
namespace  int8
 
namespace  internal
 
namespace  lc_op_util
 
namespace  load_save_op_util
 
namespace  math
 
namespace  memonger
 
namespace  nccl
 
namespace  nnapi
 
namespace  onnx
 
namespace  opt
 
namespace  perfkernels
 
namespace  pool_op_util
 
namespace  predictor_utils
 
namespace  proto
 
namespace  python
 
namespace  quantization
 
namespace  serialize
 
namespace  softmax_utils
 
namespace  SRLHelper
 
namespace  TensorInferenceFunction
 
namespace  tensorrt
 
namespace  TextFormat
 
namespace  tracing
 
namespace  transform
 
namespace  utils
 

Classes

struct  AbsFunctor
 
struct  AbsGradientFunctor
 
struct  AbstractLengthsDef
 
class  AbstractLengthsGradientOp
 
class  AbstractLengthsOp
 Segment reduction op with optional fused embedding lookup. More...
 
class  AbstractLengthsWithMainInputAndForwardOutputGradientOp
 
class  AbstractLengthsWithMainInputGradientOp
 
struct  AbstractReduceBackDef
 
struct  AbstractReduceFrontDef
 
class  AbstractReduceFrontOrBackGradientOp
 
class  AbstractReduceFrontOrBackOp
 Simple non-segmented reduction over the first few dimensions of the tensor. More...
 
struct  AbstractSortedSegmentDef
 
class  AbstractSortedSegmentGradientOp
 
class  AbstractSortedSegmentOp
 Segment reduction op with optional fused embedding lookup. More...
 
struct  AbstractSortedSegmentRangeDef
 
class  AbstractSortedSegmentRangeGradientOp
 
class  AbstractSortedSegmentRangeOp
 Base implementation for segment reduction op that leverages continuity of the data. More...
 
struct  AbstractSparseLengthsDef
 
struct  AbstractSparseSortedSegmentDef
 
struct  AbstractSparseUnsortedSegmentDef
 
struct  AbstractUnsortedSegmentDef
 
class  AbstractUnsortedSegmentGradientOp
 
class  AbstractUnsortedSegmentOp
 Unsorted segment reduction op with optional fused embedding lookup. More...
 
class  AccumulateHistogramOp
 
class  AccumulateInputGradientOp
 
class  AccumulateOp
 
class  AccuracyOp
 
struct  AcosFunctor
 
struct  AcosGradientFunctor
 
class  AdadeltaOp
 
struct  adagrad_update_prefetch_inlined
 
class  AdagradOp
 
class  AdamOp
 
class  AddDNNLowPOp
 
struct  AddFunctor
 
class  AddPaddingOp
 
class  AffineChannelGradientOp
 
class  AffineChannelOp
 
class  AlgorithmsCache
 
class  AliasOp
 Alias op makes the output and the input share the same underlying storage. More...
 
class  AliasWithNameOp
 
struct  AlignedDeleter
 
struct  AllocAligned
 
class  AllThreadLocalHelperVector
 A thread safe vector of all ThreadLocalHelper, this will be used to encapuslate the locking in the APIs for the changes to the global AllThreadLocalHelperVector instance. More...
 
class  AlternateLearningRate
 
struct  Analysis
 
struct  AndFunctor
 
class  APMeterOp
 
struct  ArgMaxReducer
 
struct  ArgMinReducer
 
class  ArgOp
 
class  ArgumentHelper
 A helper class to index into arguments. More...
 
struct  AsinFunctor
 
struct  AsinGradientFunctor
 
class  AssertOp
 
class  AsyncErrorOp
 
class  AsyncNetBarrierOp
 
class  AsyncNetBase
 
struct  AsyncNetCancelled
 
class  AsyncNetExecutorHelper
 
class  AsyncSchedulingNet
 
class  AsyncTask
 
class  AsyncTaskFuture
 
class  AsyncTaskGraph
 
class  AsyncTaskGraphBase
 
struct  AtanFunctor
 
struct  AtanGradientFunctor
 
class  ATenOp
 
class  AtomicIterOp
 
class  AveragedLoss
 
class  AveragedLossGradient
 
struct  AveragePoolFunctor
 
struct  AveragePutStat
 
class  AvgExportedStat
 
class  BackendTransformerBase
 
struct  BackendTransformOptions
 
class  BaseInputAccessor
 
class  BaseReducer
 
class  BaseReducerGradient
 
class  BatchBoxCoxOp
 
class  BatchBucketizeOp
 
class  BatchBucketOneHotOp
 
class  BatchDenseToSparseOp
 
class  BatchGatherGradientOp
 
class  BatchGatherOp
 
class  BatchMatMulDNNLowPOp
 
class  BatchMatMulFP16FakeOp
 
class  BatchMatMulOp
 
class  BatchMomentsGradientOp
 
class  BatchMomentsOp
 
class  BatchOneHotOp
 
class  BatchPermutationDNNLowPOp
 
class  BatchPermutationGradientOp
 
class  BatchPermutationOp
 
class  BatchSparseToDenseOp
 
class  BatchToSpaceOp
 
class  BBoxTransformOp
 
class  BernoulliJSDGradientOp
 
class  BernoulliJSDOp
 
class  BinaryElementwiseDNNLowPOp
 
class  BinaryElementwiseWithArgsGradientOp
 
class  BinaryElementwiseWithArgsGradientOp< NumericTypes, CPUContext, BinaryFunctorWithDefaultCtor< DivFunctor< CPUContext > >, SameTypeAsInput, SameTypeAsInput >
 
class  BinaryElementwiseWithArgsOp
 
struct  BinaryFunctorWithDefaultCtor
 
class  BisectPercentileOp
 
struct  BitwiseAndFunctor
 
struct  BitwiseOrFunctor
 
struct  BitwiseXorFunctor
 
class  Blob
 Blob is a general container that hosts a typed pointer. More...
 
class  BlobDeserializerBase
 BlobDeserializerBase is an abstract class that deserializes a blob from a BlobProto or a TensorProto. More...
 
class  BlobSerializerBase
 BlobSerializerBase is an abstract class that serializes a blob to a string. More...
 
class  BlobsQueue
 
struct  BlobStatGetter
 
struct  BlobStatRegistry
 
class  BlobTestFooDeserializer
 
class  BlobTestFooSerializer
 
class  BlockingCounter
 
class  BlockingErrorOp
 
class  BooleanMaskOp
 
class  BooleanMaskOpGradient
 
class  BooleanUnmaskOp
 
class  BoundShapeInferencer
 
class  BoundShapeInferencerBase
 
struct  BoundShapeSpec
 
class  BoxWithNMSLimitOp
 
class  BRGNCHWCToPackedInt8BGRAStylizerDeprocessOp
 
class  BucketizeOp
 
class  BufferedTokenizer
 
class  ByteWeightDequantOp
 
class  Caffe2Annotation
 
class  Caffe2ModuleTestStaticDummyOp
 
class  Callback
 
class  CallbackImpl
 
struct  CastHelper
 
struct  CastHelper< std::string, SrcType >
 
class  CastOp
 
struct  CbrtFunctor
 
struct  CbrtGradientFunctor
 
class  CeilOp
 
class  ChannelBackpropStatsOp
 
class  ChannelShuffleDNNLowPOp
 
class  ChannelShuffleGradientOp
 
class  ChannelShuffleOp
 
class  ChannelStatsOp
 
struct  CharRange
 
class  CheckCounterDoneOp
 
class  CheckpointOp
 
class  ClipGradientOp
 
class  ClipOp
 
class  ClipTensorByScalingOp
 
class  CloseBlobsQueueOp
 
class  CloseRebatchingQueueOp
 
class  Col2ImOp
 
class  CollectAndDistributeFpnRpnProposalsOp
 
class  CollectRpnProposalsOp
 
class  CommonSubexpressionEliminationTransform
 Common Subexpression Elimination. More...
 
class  CompositeCosineLearningRate
 
class  CompositeCyclicalLearningRate
 
class  CompositeLearningRate
 
class  CompositeLearningRateItem
 
class  ComputeEqualizationScaleOp
 
class  ConcatAddMulReplaceNaNClipOp
 
class  ConcatBatchMatMulBatchGatherOp
 
class  ConcatDNNLowPOp
 
class  ConcatOp
 
class  ConditionalOp
 
class  ConstantFillOp
 
class  ConstantThenLinearWarmupLearningRate
 
class  ConstantWarmupLearningRate
 
struct  ConvArgs
 
class  ConvDNNLowPAcc16Op
 Quantized Conv operator with 16-bit accumulation. More...
 
class  ConvDNNLowPOp
 
class  ConvDNNLowPPackWeightOp
 Pack a weight matrix that can be used by DNNLOWP Int8Conv operators. More...
 
class  Converter
 
class  ConvGradientOp
 
class  ConvOp
 
class  ConvPoolDNNLowPOpBase
 
class  ConvPoolOpBase
 
class  ConvReluOp
 
class  ConvToNNPackTransform
 
class  ConvTransposeGradientOp
 
class  ConvTransposeOp
 
class  ConvTransposeUnpoolBase
 
class  CopyOnDeviceLikeOp
 
class  CopyOp
 
class  CopyRowsToTensorGradientOp
 
class  CopyRowsToTensorOp
 
struct  CosFunctor
 
struct  CosGradientFunctor
 
struct  CoshFunctor
 
struct  CoshGradientFunctor
 
class  CosineEmbeddingCriterionGradientOp
 
class  CosineEmbeddingCriterionOp
 
class  CosineLearningRate
 
class  CosineSimilarityGradientOp
 
class  CosineSimilarityOp
 
class  CountDownOp
 
class  Counter
 
class  CountUpOp
 
class  CPUContext
 The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement. More...
 
struct  CPUEventWrapper
 
class  CpuId
 Identification of an Intel CPU. More...
 
class  CPUSparseLengthsReductionOp
 
class  CreateBlobsQueueOp
 
class  CreateCounterOp
 
class  CreateDBOp
 
class  CreateMapOp
 
class  CreateRebatchingQueueOp
 
class  CreateScopeOp
 
class  CreateTextFileReaderOp
 
class  CrossEntropyGradientOp
 
class  CrossEntropyOp
 
class  CTCBeamSearchDecoderOp
 
class  CTCGreedyDecoderOp
 
class  CTCOp
 
struct  CubeFunctor
 
struct  CubeGradientFunctor
 
class  CUDAContext
 
struct  CudaDevicePropWrapper
 
struct  CudaEventWrapper
 
class  CudaProfileInitializeOp
 
class  CudaProfileStartOp
 
class  CudaProfileStopOp
 
class  CUDARecurrentNetworkExecutor
 
class  CudaRTCFunction
 
class  CuDNNActivationGradientOp
 
class  CuDNNActivationGradientOp< CUDNN_ACTIVATION_ELU >
 
class  CuDNNActivationOp
 
class  CuDNNActivationOp< CUDNN_ACTIVATION_ELU >
 
class  CuDNNActivationOpBase
 
class  CudnnConvGradientOp
 
class  CudnnConvOp
 
class  CudnnConvOpBase
 
class  cudnnFilterDescWrapper
 
class  CuDNNLRNGradientOp
 
class  CuDNNLRNOp
 
class  CuDNNSoftmaxGradientOp
 
class  CuDNNSoftmaxOp
 
class  CuDNNState
 
class  cudnnTensorDescWrapper
 cudnnTensorDescWrapper is the placeholder that wraps around a cudnnTensorDescriptor_t, allowing us to do descriptor change as-needed during runtime. More...
 
class  cudnnTypeWrapper
 cudnnTypeWrapper is a wrapper class that allows us to refer to the cudnn type in a template function. More...
 
class  cudnnTypeWrapper< at::Half >
 
class  cudnnTypeWrapper< double >
 
class  cudnnTypeWrapper< float >
 
struct  CuDNNWorkspace
 CuDNNWorkspace is a wrapper around a raw cuda pointer that holds the cudnn scratch space. More...
 
class  CuDNNWrapper
 CuDNNWrapper is a class that wraps the cudnn handles and cudnn workspaces. More...
 
class  CyclicalLearningRate
 
class  DataCoupleOp
 
class  DBExistsOp
 
struct  DecodedAudio
 
class  DecodedFrame
 
class  DefaultEngine
 
class  DeformConvGradientOp
 
class  DeformConvOp
 
class  DeformConvOpBase
 
class  DenseVectorToIdListOp
 
class  DequantizeDNNLowPOp
 
class  DequeueBlobsOp
 
class  DequeueRebatchingQueueOp
 
class  DetailedExportedStat
 
struct  DeviceTypeRegisterer
 
class  DiagonalFillOp
 
struct  DispatchHelper
 
struct  DispatchHelper< FixedValues< FirstVal, Values... >, ExtraArgs... >
 
struct  DispatchHelper< FixedValues<>, ExtraArgs... >
 
struct  DispatchHelper< TensorTypes2< FirstType, Types... >, ExtraArgs... >
 
struct  DispatchHelper< TensorTypes2< GenericTensorImplementation >, ExtraArgs... >
 
struct  DispatchHelper< TensorTypes2<>, ExtraArgs... >
 
struct  DispatchHelper< TensorTypes< FirstType, Types... >, ExtraArgs... >
 
struct  DispatchHelper< TensorTypes< GenericTensorImplementation >, ExtraArgs... >
 
struct  DispatchHelper< TensorTypes<>, ExtraArgs... >
 
class  DistributeFpnProposalsOp
 
struct  DivFunctor
 
class  DNNLowPOp
 A convenient base class for C2 operators with DNNLOWP engine. More...
 
class  DoOp
 
class  DotProductGradientOp
 
class  DotProductOp
 
class  DotProductWithPaddingGradientOp
 
class  DotProductWithPaddingOp
 
class  DropoutGradientOp
 
class  DropoutOp
 
struct  EigenPowFunctor
 
class  ElementwiseLinearDNNLowPOp
 
class  ElementwiseLinearGradientOp
 
class  ElementwiseLinearOp
 
class  ElementwiseRTCOp
 A GPU operator that can generate limited elementwise operations. More...
 
struct  EluFunctor
 
struct  EluGradientFunctor
 
class  EnforceFiniteOp
 
class  EnqueueBlobsOp
 
class  EnqueueRebatchingQueueOp
 
class  EnsureClippedOp
 
class  EnsureCPUOutputOp
 
class  EnsureDenseOp
 Pass inputs to outputs. More...
 
struct  EQFunctor
 
struct  ErfFunctor
 
struct  ErfGradientFunctor
 
class  ErrorOp
 
class  Event
 
struct  EventCreateFunctionRegisterer
 
struct  EventErrorMessageFunctionRegisterer
 
struct  EventFinishFunctionRegisterer
 
struct  EventQueryFunctionRegisterer
 
struct  EventRecordFunctionRegisterer
 
struct  EventResetFunctionRegisterer
 
struct  EventSetCallbackFunctionRegisterer
 
struct  EventSetFinishedFunctionRegisterer
 
struct  EventWaitFunctionRegisterer
 
struct  ExecutionOptions
 
class  ExecutorHelper
 
class  ExecutorHelperDummyOp
 
class  ExpandDimsOp
 
class  ExpandGradientOp
 
class  ExpandOp
 
struct  ExpFunctor
 
class  ExpLearningRate
 
class  ExportedStat
 
struct  ExportedStatValue
 
struct  ExternalTensorDescriptor
 
class  ExternalTensorFunctionsBase
 
class  FailOp
 
class  FbFCPackedOperator
 C2 wrapper for fp16 gemm. More...
 
class  FbGemmPackOp
 
class  FeedBlobOp
 
class  FileReader
 
class  FileStoreHandler
 
class  FileStoreHandlerCreateOp
 
class  FillerOp
 
class  FindDuplicateElementsOp
 
class  FindOp
 
class  FixedDivisor
 
class  FixedDivisor< std::int32_t >
 
class  FixedLearningRate
 
struct  FixedType
 
struct  FixedValues
 
class  FlattenOp
 
class  FlattenToVecOp
 
class  FlexibleTopKGradientOp
 
class  FlexibleTopKOp
 
class  Float16ConstantFillOp
 
class  Float16UniformFillOp
 
class  FloatToFused8BitRowwiseQuantizedOp
 
class  FloatToFusedNBitFakeRowwiseQuantizedOp
 
class  FloatToFusedNBitRowwiseQuantizedOp
 
class  FloatToFusedRandRowwiseQuantizedOp
 
class  FloatToHalfOp
 
class  FloatToRowwiseQuantized8BitsOp
 
class  FloorOp
 
class  FooGradientDummyEngineOp
 
class  FooGradientOp
 
struct  ForEach
 ForEach is a unary functor that forwards each element of the input array into the elementwise Functor provided, and gathers the results of each call into the resulting array. More...
 
class  Fp16FCAccOp
 
class  FP16MomentumSGDUpdateOp
 
class  FP32MomentumSGDUpdateOp
 
class  FreeOp
 
class  FtrlOp
 
struct  FtrlParams
 
class  FullyConnectedDecompGradientOp
 
class  FullyConnectedDNNLowPAcc16Op
 Quantized FC operator with 16-bit accumulation. More...
 
class  FullyConnectedDNNLowPOp
 
class  FullyConnectedDNNLowPPackWeightOp
 
class  FullyConnectedFakeLowpFPOp
 
class  FullyConnectedGradientFakeLowpFPOp
 
class  FullyConnectedGradientOp
 
class  FullyConnectedOp
 
class  FullyConnectedOp_SPARSE
 
class  FullyConnectedOpDecomp
 
class  FullyConnectedOpPrune
 
class  FullyConnectedPruneGradientOp
 
class  FunHashGradientOp
 
class  FunHashOp
 
class  Fused8BitRowwiseQuantizedToFloatOp
 
class  FusedNBitRowwiseQuantizedToFloatOp
 
class  FusedRandRowwiseQuantizedToFloatOp
 
class  GateLearningRate
 
class  GatherByKeyOp
 
class  GatherDNNLowPOp
 
class  GatherFused8BitRowwiseOp
 
class  GatherOp
 
class  GatherPaddingOp
 
class  GatherRangesOp
 
class  GatherRangesToDenseOp
 
class  GaussianFillOp
 
struct  GEFunctor
 
struct  GeluFunctor
 
struct  GeluGradientFunctor
 
class  GenerateProposalsOp
 
struct  GenericTensorImplementation
 
struct  GetAddPaddingGradient
 
class  GetAveragedLossGradient
 
class  GetBatchGatherGradient
 
class  GetBatchToSpaceGradient
 
class  GetBernoulliJSDGradient
 
class  GetCastGradient
 
class  GetCol2ImGradient
 
class  GetConvGradient
 
class  GetConvTransposeGradient
 
struct  GetCopyGradient
 
class  GetCosineSimilarityGradient
 
struct  GetCPUToGPUGradient
 
class  GetCrossEntropyGradient
 
class  GetDotProductGradient
 
class  GetDotProductWithPaddingGradient
 
class  GetDropoutGradient
 
struct  GetElementwiseLinearGradient
 
class  GetExpandDimsGradient
 
class  GetFCDecompGradient
 
class  GetFloatToHalfGradient
 
class  GetFooGradient
 
struct  GetGPUToCPUGradient
 
class  GetGroupSpatialSoftmaxGradient
 
class  GetGRUUnitGradient
 
class  GetHalfToFloatGradient
 
class  GetIm2ColGradient
 
class  GetIntegralImageGradient
 
class  GetL1DistanceGradient
 
class  GetLabelCrossEntropyGradient
 
class  GetLeakyReluGradient
 
class  GetLRNGradient
 
class  GetLSTMUnitGradient
 
struct  GetMakeTwoClassGradient
 
class  GetMatMulGradient
 
class  GetMeanGradient
 
struct  GetNanCheckGradient
 
struct  GetNegateGradientGradient
 
class  GetNormalizeGradient
 
class  GetPackSegmentsGradient
 
class  GetPadImageGradient
 
class  GetPrependDimGradient
 
class  GetProfDagStatsOp
 
struct  GetRecurrentGradient
 
struct  GetRecurrentNetworkGradient
 
class  GetReduceBackMaxGradient
 
class  GetReduceBackSumGradient
 
class  GetReduceFrontMaxGradient
 
class  GetReduceFrontMeanGradient
 
class  GetReduceFrontSumGradient
 
struct  GetRemovePaddingGradient
 
class  GetResizeNearest3DGradient
 
class  GetResizeNearestGradient
 
class  GetReversePackedSegsGradient
 
class  GetSampleAsGradient
 
class  GetScaleGradient
 
class  GetSelectSmoothL1LossGradient
 
class  GetSeluGradient
 
class  GetSigmoidCrossEntropyLossGradient
 
struct  GetSigmoidCrossEntropyWithLogitsGradient
 
class  GetSigmoidFocalLossGradient
 
class  GetSmoothL1LossGradient
 
class  GetSoftmaxFocalLossGradient
 
class  GetSoftplusGradient
 
class  GetSpaceToBatchGradient
 
class  GetSquaredL2DistanceGradient
 
class  GetSquareRootDivideGradient
 
class  GetSqueezeGradient
 
class  GetSumElementsGradient
 
class  GetTopKGradient
 
class  GetTransposeGradient
 
class  GetTTSparseLengthsGradient
 
class  GetUnpackSegmentsGradient
 
class  GetUpsampleBilinearGradient
 
class  GetUpsampleNearestGradient
 
struct  GetWeightedSigmoidCrossEntropyWithLogitsGradient
 
struct  GetZeroGradientOpGradient
 
class  GFtrlOp
 
struct  GFtrlParams
 
class  GivenTensorByteStringToUInt8FillOp
 
class  GivenTensorFillOp
 
class  GlobalInitIsCalledGuard
 
class  GluOp
 
class  GPUFallbackOpEx
 A templated class to allow one to wrap a CPU operator as a CUDA operator. More...
 
class  GradientMakerBase
 
struct  GradientNotImplementedYet
 A helper class to indicate that the gradient mechanism is not ready. More...
 
struct  GradientOpsMeta
 A struct that holds the gradient operators and related gradient maps. More...
 
struct  GradientWrapper
 
class  GroupNormDNNLowPOp
 
class  GroupNormGradientOp
 
class  GroupNormOp
 
class  GroupSpatialSoftmaxGradientOp
 
class  GroupSpatialSoftmaxOp
 
class  GRUUnitGradientOp
 
class  GRUUnitOp
 
struct  GTFunctor
 
class  HalfToFloatOp
 
struct  HandleExecutorThreadExceptionsGuard
 
struct  HardSigmoidFunctor
 
struct  HardSigmoidGradientFunctor
 
class  HasElementsOp
 
class  HasScopeOp
 
class  HeatmapMaxKeypointOp
 
class  HillLearningRate
 
class  HistogramNetObserver
 
class  HistogramObserver
 Given min/max, collect histogram. More...
 
class  HistogramOp
 
class  HSoftmaxGradientOp
 
class  HSoftmaxOp
 
class  HSoftmaxOpBase
 
class  HSoftmaxSearchOp
 
class  HuffmanTreeHierarchyOp
 
class  IDEEPContext
 
class  IDEEPConvPoolOpBase
 
class  IDEEPFallbackOp
 A templated class to allow one to wrap a CPU operator as an IDEEP operator. More...
 
class  IDEEPOperator
 
class  IfOp
 
class  Im2ColOp
 
class  ImageInputOp
 
class  IncrementByOneOp
 
struct  IncrementPutStat
 
struct  Index
 
struct  IndexBase
 
class  IndexCreateOp
 
class  IndexDeserializer
 
class  IndexFreezeOp
 
class  IndexGetOp
 
class  IndexHashOp
 
class  IndexLoadOp
 
class  IndexSerializer
 
class  IndexSizeOp
 
class  IndexStoreOp
 
struct  InferenceGraph
 This struct stores information about the inference graph which defines underlying math of BlackBoxPredictor. More...
 
class  InitRegisterer
 
class  InstanceNormGradientOp
 
class  InstanceNormOp
 
struct  Int8ConvDNNLowPPackedWeightBlob
 Packed weight matrix for DNNLOWP Int8Conv operator. More...
 
class  Int8ConvDNNLowpPackedWeightBlobShapeFunctions
 
struct  Int8FCDNNLowPPackedWeightBlob
 Packed weight matrix for DNNLOWP Int8FC operator. More...
 
class  Int8FCDNNLowpPackedWeightBlobShapeFunctions
 
class  Int8GenQuantParamsMinMaxOp
 
class  Int8GenQuantParamsOp
 
struct  Int8QuantParamsBlob
 
struct  Int8QuantSchemeBlob
 
class  Int8QuantSchemeBlobFillOp
 
class  IntegralImageGradientOp
 
class  IntegralImageOp
 
class  InvLearningRate
 
class  IsEmptyOp
 
class  IsMemberOfOp
 
class  IsMemberOfValueHolder
 
class  IsNanOp
 
class  IterOp
 
class  JustTest
 
class  JustTestAndDoesConstruct
 
class  JustTestAndNeverConstructs
 
class  JustTestCUDA
 
class  JustTestCUDNN
 
class  JustTestWithNonStandardIsTestArg
 
class  JustTestWithRequiredArg
 
class  JustTestWithSomeOutput
 
class  JustTestWithStandardIsTestArg
 
class  KeySplitOp
 
class  KeyValueToMapOp
 
class  L1DistanceGradientOp
 
class  L1DistanceOp
 
struct  L1Reducer
 
struct  L2Reducer
 
class  LabelCrossEntropyGradientOp
 
class  LabelCrossEntropyOp
 
class  LambdaRankNdcgGradientOp
 
class  LambdaRankNdcgOp
 
class  LarsOp
 
class  LayerNormFakeFp16Op
 
class  LayerNormGradientOp
 
class  LayerNormOp
 
class  LayerNormUtils
 
class  LeakyReluGradientOp
 
class  LeakyReluOp
 
class  LearningRateAdaptionOp
 
class  LearningRateFunctor
 
class  LearningRateOp
 
struct  LEFunctor
 
class  LengthsGatherOp
 
struct  LengthsOpGetGradient
 
class  LengthsPadOp
 
class  LengthsPartitionOp
 
class  LengthsRangeFillOp
 
class  LengthsSplitOp
 
class  LengthsTileOp
 
class  LengthsToOffsetsOp
 
class  LengthsTopKGradientOp
 
class  LengthsTopKOp
 
class  LengthsToRangesOp
 
class  LengthsToSegmentIdsOp
 
class  LengthsToShapeOp
 
class  LengthsToWeightsOp
 
class  LinearWarmupLearningRate
 
class  LoadOp
 
class  LocallyConnectedGradientOp
 
class  LocallyConnectedOp
 
class  LogFatalOp
 
struct  LogFunctor
 
struct  LogitFunctor
 
class  LogitGradientOp
 
class  LogMeanExpRangeReducer
 
class  LogMeanExpRangeReducer< T, CPUContext >
 
struct  LogMeanExpRangeReducerDef
 
class  LogMeanExpRangeReducerGradient
 
class  LogSumExpRangeReducer
 
class  LogSumExpRangeReducer< T, CPUContext >
 
struct  LogSumExpRangeReducerDef
 
class  LogSumExpRangeReducerGradient
 
class  LpNormGradientOp
 
class  LpNormOp
 
struct  LpPoolFunctor
 
class  LRNGradientOp
 
class  LRNOp
 
class  LRNOpBase
 
class  LSTMUnitDNNLowPOp
 
class  LSTMUnitGradientOp
 
class  LSTMUnitOp
 
struct  LTFunctor
 
struct  MakeAligned
 
class  MakeTwoClassGradientOp
 
class  MakeTwoClassOp
 
class  MapDeserializer
 
class  MapSerializer
 
class  MapToKeyValueOp
 
struct  MapTypeTraits
 
class  MarginRankingCriterionGradientOp
 
class  MarginRankingCriterionOp
 
class  MatMulOp
 
class  MaxGradientOp
 
class  MaxOp
 
struct  MaxPoolFunctor
 
class  MaxPoolGradientRTCOp
 
class  MaxPoolRTCOp
 
class  MaxPoolWithIndexGradientOp
 
class  MaxPoolWithIndexOp
 
class  MaxRangeReducer
 
class  MaxRangeReducer< T, CPUContext >
 
struct  MaxRangeReducerDef
 
class  MaxRangeReducerGradient
 
class  MaxReduceDimsGradientOp
 
class  MaxReduceDimsOp
 
class  MaxReducer
 
class  MaxReducer< T, CPUContext >
 
struct  MaxReducerDef
 
class  MaxReducerGradient
 
class  MaxReductionGradientOp
 
class  MaxReductionOp
 
class  MeanGradientOp
 
class  MeanOp
 
class  MeanRangeReducer
 
class  MeanRangeReducer< T, CPUContext >
 
struct  MeanRangeReducerDef
 
class  MeanRangeReducerGradient
 
class  MeanReducer
 
class  MeanReducer< T, CPUContext >
 
struct  MeanReducerDef
 
class  MeanReducerGradient
 
class  MergeDenseFeatureTensorsOp
 
class  MergeDimOp
 
class  MergeIdListsOp
 
class  MergeMultiListFeatureTensorsOp
 
class  MergeMultiListOrMapFeatureTensorsGradientOp
 
class  MergeMultiMapFeatureTensorsOp
 
class  MergeMultiScalarFeatureTensorsGradientOp
 
class  MergeMultiScalarFeatureTensorsOp
 
class  MergeSingleListFeatureTensorsOp
 
class  MergeSingleListOrMapFeatureTensorsGradientOp
 
class  MergeSingleMapFeatureTensorsOp
 
class  MergeSingleScalarFeatureTensorsGradientOp
 
class  MergeSingleScalarFeatureTensorsOp
 
class  MinGradientOp
 
class  MinOp
 
struct  MinReducer
 
class  MIOPENActivationGradientOp
 
class  MIOPENActivationOp
 
class  MIOPENActivationOpBase
 
class  MIOPENState
 
class  miopenTensorDescWrapper
 miopenTensorDescWrapper is the placeholder that wraps around a miopenTensorDescriptor_t, allowing us to do descriptor change as-needed during runtime. More...
 
class  miopenTypeWrapper
 miopenTypeWrapper is a wrapper class that allows us to refer to the miopen type in a template function. More...
 
class  miopenTypeWrapper< at::Half >
 
class  miopenTypeWrapper< float >
 
struct  MIOPENWorkspace
 MIOPENWorkspace is a wrapper around a raw cuda pointer that holds the miopen scratch space. More...
 
class  MIOPENWrapper
 MIOPENWrapper is a class that wraps the miopen handles and miopen workspaces. More...
 
struct  MishFunctor
 
class  MishGradientOp
 
class  ModOp
 
class  ModuleSchema
 A module schema that can be used to store specific information about different modules. More...
 
class  MomentsGradientOp
 
class  MomentsOp
 
class  MomentumSGDOp
 
class  MomentumSGDUpdateOp
 
class  MPIAllgatherOp
 
class  MPIAllreduceOp
 
class  MPIBroadcastOp
 
class  MPICommonWorldWrapper
 A simple wrapper over an MPI common world. More...
 
class  MPICreateCommonWorldOp
 
class  MPIDataTypeWrapper
 
class  MPIDataTypeWrapper< char >
 
class  MPIDataTypeWrapper< double >
 
class  MPIDataTypeWrapper< float >
 
class  MPIReceiveTensorOp
 
class  MPIReduceOp
 
class  MPISendTensorOp
 
struct  MPSCNNContext
 
class  MSRAFillOp
 
class  MulDNNLowPOp
 
struct  MulFunctor
 
class  MultiClassAccuracyOp
 
class  MutexDeserializer
 
class  MutexSerializer
 
class  NanCheckOp
 
class  NCCLAllGatherOp
 
class  NCCLAllreduceOp
 
class  NCCLBaseOp
 
class  NCCLBroadcastOp
 
class  NCCLReduceOp
 
class  NCCLReduceScatterOp
 
class  NCHW2NHWCOp
 
struct  NEFunctor
 
class  NegateGradientOp
 
struct  NegativeFunctor
 
class  NetBase
 
class  NetObserverReporter
 
class  NetObserverReporterPrint
 
class  NGramFromCategoricalOp
 
class  NHWC2NCHWOp
 
class  NNApi
 
class  NNPACKConvOp
 
class  NNPACKLeakyReluOp
 
class  NNPACKMaxPoolOp
 
class  NNPACKReluOp
 
class  NoDefaultEngineOp
 A helper class to denote that an op does not have a default engine. More...
 
class  NoGradient
 A helper class to indicate that the operator does not need gradient computation. More...
 
class  NoopOp
 
class  NormalizeGradientOp
 
class  NormalizeL1Op
 
class  NormalizeOp
 
class  NotFinishingOp
 
struct  NotFunctor
 
class  NumpyTileOp
 
class  Observable
 Inherit to make your class observable. More...
 
class  ObserverBase
 Use this to implement a Observer using the Observer Pattern template. More...
 
class  ObserverConfig
 
struct  OfflineTensor
 
class  OfflineTensorShapeFunctions
 
class  OneHotOp
 
class  OnnxifiOp
 
class  OnnxifiTransformer
 
struct  OnnxifiTransformerOptions
 
class  ONNXWhileOp
 
class  OpenCLContext
 
struct  OpenCLContextSingleton
 
class  Operator
 
class  OperatorAttachingNetObserver
 
class  OperatorBase
 
class  OpSchema
 A class to record the schema of an op. More...
 
class  OpSchemaRegistry
 A registry to hold all the operator schemas. More...
 
struct  OpTask
 Data structure for a scheduled task in the task queue. More...
 
class  OptimizationPass
 
class  OpWrapper
 Wrap a floating-point operator with quantized inputs with type T. More...
 
struct  OrFunctor
 
class  OutputColumnMaxHistogramNetObserver
 
class  OutputColumnMaxHistogramObserver
 Given min/max, collect histogram of the max value of each column of tensor. More...
 
class  OutputMinMaxNetObserver
 
class  OutputMinMaxObserver
 
class  PackedGemmMatrixFP16ShapeFunctions
 
class  PackedInt8BGRANHWCToNCHWCStylizerPreprocessOp
 
class  PackRNNSequenceOpBase
 
class  PackSegmentsOp
 
class  PadEmptySamplesOp
 
class  PadImageGradientOp
 
class  PadImageOp
 
class  PairWiseLossGradientOp
 
class  PairWiseLossOp
 
class  ParallelNet
 
class  ParallelNetExecutorHelper
 
class  Params
 
class  PartitionOp
 
class  PartitionOpBase
 
class  PatternNetTransform
 PatternNetTransform allows you to create transforms using a simple interface. More...
 
class  PercentileOp
 
class  PerfNetObserver
 
class  PerfOperatorObserver
 
struct  PerformanceInformation
 
class  PerplexityOp
 
class  PieceWarmupLearningRate
 
class  PiecewiseLinearTransformOp
 
class  PolyLearningRate
 
class  PoolGradientOp
 
class  PoolOp
 
class  PowOp
 
class  Predictor
 
struct  PredictorConfig
 Stores parameters nessasary for creating a PredictorInterface object. More...
 
class  PredictorTest
 
class  PrefetchOperator
 
class  PReluGradientOp
 
class  PReluOp
 
class  PrependDimOp
 
class  PrintOp
 
class  ProfDAGCounters
 A simple wrapper around prof_dag's counters. More...
 
class  ProfDAGReport
 
class  ProfDAGStats
 
class  ProfileCounter
 
class  ProfileObserver
 
class  ProfileOperatorObserver
 
class  PSRoIPoolGradientOp
 
class  PSRoIPoolOp
 
class  QConvOp
 
struct  QConvState
 
struct  QShapeInfo
 
class  QTensor
 
class  QTensorDeserializer
 
class  QTensorSerializer
 
class  QuantDecodeGradientOp
 
class  QuantDecodeOp
 
class  QuantDecompZstdOp
 
class  QuantileOp
 
class  QuantizeDNNLowPOp
 
class  RangeFillOp
 
class  RangeOp
 
class  RebatchingQueue
 
struct  ReciprocalFunctor
 
struct  ReciprocalGradientFunctor
 
class  RecurrentBaseOp
 
class  RecurrentGradientOp
 
class  RecurrentNetworkBlobFetcherOp
 
class  RecurrentNetworkExecutorBase
 RecurrentNetworkExecutor is a specialized runtime for recurrent neural networks (RNNs). More...
 
class  RecurrentNetworkGradientOp
 
class  RecurrentNetworkOp
 
class  RecurrentOp
 
class  RecurrentParamAccessOp
 
class  RedisStoreHandler
 
class  RedisStoreHandlerCreateOp
 
class  ReduceGradientOp
 
class  ReduceOp
 
class  ReduceTailSumOp
 
class  RegisterQuantizationParamsNetObserver
 Set quantization parameters of operators based on min/max collected from OutputMinMaxObserver. More...
 
class  RegisterQuantizationParamsWithHistogramNetObserver
 Set quantization parameters of operators based on min/max collected from OutputMinMaxObserver. More...
 
class  ReluDNNLowPOp
 
struct  ReluFakeFp16Functor
 
struct  ReluFunctor
 
struct  ReluGradientFunctor
 
struct  ReluNFunctor
 
struct  ReluNGradientFunctor
 
class  RemoveDataBlocksOp
 
class  RemovePaddingOp
 
class  ReplaceNaNOp
 
class  ResetCounterOp
 
class  ReshapeOp
 
class  ResizeLikeOp
 
class  ResizeNearest3DDNNLowPOp
 
class  ResizeNearest3DGradientOp
 
class  ResizeNearest3DOp
 
class  ResizeNearestDNNLowPOp
 
class  ResizeNearestGradientOp
 
class  ResizeNearestOp
 
class  RetrieveCountOp
 
class  ReversePackedSegsOp
 
class  RMACRegionsOp
 
class  RMSNormGradientOp
 
class  RMSNormOp
 
class  RmsPropOp
 
class  RNNApplyLinkOp
 
struct  RNNNetOperator
 Struct for operator in a timestep and its dependencies. More...
 
class  RoIAlignGradientOp
 
class  RoIAlignOp
 
class  RoIAlignRotatedGradientOp
 
class  RoIAlignRotatedOp
 
class  RoIPoolFGradientOp
 
class  RoIPoolFOp
 
class  RoIPoolGradientOp
 
class  RoIPoolOp
 
class  RowMulOp
 
class  Rowwise8BitQuantizedToFloatOp
 
struct  rowwise_adagrad_update_inlined
 
class  RowWiseCounterOp
 
class  RowWiseSparseAdagradFusedWithSparseLengthsSumGradientOp
 Fused operator of SparseLengthsIndicesInGradientSumGradient (gradient of SparseLengthsSum) + RowWiseSparseAdagrad. More...
 
class  RowWiseSparseAdagradFusedWithSparseLengthsWeightedSumGradientApproxOp
 
class  RowWiseSparseAdagradFusedWithSparseLengthsWeightedSumGradientOp
 
class  RowWiseSparseAdagradOp
 
class  RowWiseSparseAdamOp
 
struct  RsqrtFunctor
 
struct  RsqrtGradientFunctor
 
class  RunCountNetObserver
 
class  RunCountOperatorObserver
 
class  SafeDequeueBlobsOp
 
class  SafeEnqueueBlobsOp
 
struct  SameTypeAsInput
 
class  SampleAsGradientOp
 
class  SampleAsOp
 
struct  SampleInterval
 
class  SaveOp
 
class  ScaleBlobsOp
 
class  ScaleOp
 
class  ScatterAssignOp
 Update slices of the tensor in-place by overriding. More...
 
class  ScatterOp
 
class  ScatterWeightedSumOp
 Update slices of the tensor in-place with weighted sum. More...
 
class  SegmentIdsToLengthsOp
 
class  SegmentIdsToRangesOp
 
class  SegmentOneHotOp
 
struct  SegmentOpGetGradient
 
class  SelectGradientOpBase
 
class  SelectSmoothL1LossGradientOp
 
class  SelectSmoothL1LossOp
 
class  SelfBinningHistogramOp
 
class  SeluGradientOp
 
class  SeluOp
 
class  SequenceMaskOp
 
struct  ShapeInfo
 
class  ShapeOp
 
class  SigmoidCrossEntropyLossGradientOp
 
class  SigmoidCrossEntropyLossOp
 
class  SigmoidCrossEntropyWithLogitsGradientOp
 
class  SigmoidCrossEntropyWithLogitsOp
 
struct  SigmoidEmulatorFunctor
 
struct  SigmoidFakeIdealFp16Functor
 
class  SigmoidFocalLossGradientOp
 
class  SigmoidFocalLossOp
 
class  SigmoidFunctor
 
struct  SigmoidGradientFunctor
 
class  SignalHandler
 
struct  SignFunctor
 
struct  SimpleArray
 
class  SimpleNet
 
class  SimpleQueue
 
class  SimpleRefCountNet
 
struct  SinFunctor
 
class  SingleOpTransform
 Single Op Transform Base class. More...
 
struct  SinGradientFunctor
 
struct  SinhFunctor
 
struct  SinhGradientFunctor
 
class  SinusoidPositionEncodingOp
 
class  SizeOp
 
class  SkipIndices
 
class  SkipIndices<>
 
class  SleepOp
 
class  SliceGradientOp
 
class  SliceOp
 
class  SlopeLearningRate
 
class  SmartTensorPrinter
 
class  SmoothL1LossGradientOp
 
class  SmoothL1LossOp
 
class  SNPEOp
 
class  SoftmaxFocalLossGradientOp
 
class  SoftmaxFocalLossOp
 
class  SoftmaxGradientOp
 
class  SoftmaxOp
 
class  SoftmaxWithLossGradientOp
 
class  SoftmaxWithLossOp
 
class  SoftplusGradientOp
 
class  SoftplusOp
 
struct  SoftsignFunctor
 
struct  SoftsignGradientFunctor
 
class  SpaceBatchOpBase
 
class  SpaceToBatchOp
 
class  SparseAdadeltaOp
 
class  SparseAdagradOp
 
class  SparseAdamOp
 
class  SparseDropoutWithReplacementOp
 
class  SparseFtrlOp
 
class  SparseFunHashGradientOp
 
class  SparseFunHashOp
 
class  SparseLengths8BitsRowwiseOp
 
class  SparseLengthsFused4BitRowwiseFakeFP16Op
 
class  SparseLengthsFused8BitRowwiseFakeFP16Op
 
class  SparseLengthsFused8BitRowwiseOp
 
class  SparseLengthsFusedNBitRowwiseOp
 
class  SparseLengthsNBitRowwiseSparseOp
 
class  SparseLengthsReductionFakeFp16Op
 
class  SparseLengthsSumSparseLookupOp
 
class  SparseLpRegularizerOp
 
class  SparseMatrixReshapeOp
 
class  SparseMomentumSGDUpdateOp
 
class  SparseNormalizeOp
 
class  SparseStormOp
 
class  SparseToDenseMaskBase
 
class  SparseToDenseMaskGradientOp
 
class  SparseToDenseMaskOp
 
class  SparseToDenseOp
 
class  SparseWngradOp
 
class  SpatialBNDNNLowPOp
 Note this implementation assumes SCALE, BIAS, EST_MEAN, and EST_VAR inputs are still in fp32, so is epsilon argument. More...
 
class  SpatialBNFakeFp16Op
 
class  SpatialBNFakeLoweredFp16Op
 
class  SpatialBNGradientOp
 
class  SpatialBNOp
 
class  SpatialNarrowAsGradient
 
class  SpatialNarrowAsGradientOp
 
class  SpatialNarrowAsOp
 
class  SpatialSoftmaxWithLossGradientOp
 
class  SpatialSoftmaxWithLossOp
 
class  SplitByLengthsOp
 
class  SplitOp
 
struct  SqrFakeFp16Functor
 
struct  SqrFunctor
 
struct  SqrtFunctor
 
class  SquaredL2DistanceGradientOp
 
class  SquaredL2DistanceOp
 
class  SquareRootDivideOp
 
class  SqueezeOp
 
struct  Stat
 
struct  StaticLinkingProtector
 
class  StaticStat
 
class  StatRegistry
 Holds a map of atomic counters keyed by name. More...
 
class  StatRegistryCreateOp
 
class  StatRegistryExportOp
 
class  StatRegistryUpdateOp
 
class  StatValue
 
class  StdDevExportedStat
 
struct  StdDevPutStat
 
class  StepLearningRate
 
class  StopGradientOp
 
struct  StopOnSignal
 
class  StoreAddOp
 
class  StoreGetOp
 
class  StoreHandler
 
struct  StoreHandlerNotAvailableException
 
struct  StoreHandlerTimeoutException
 
class  StoreSetOp
 
class  StoreWaitOp
 
class  StormOp
 
class  StringDeserializer
 StringDeserializer is the deserializer for Strings. More...
 
class  StringJoinOp
 
class  StringJoinOpTest
 
struct  StringProvider
 
class  StringSerializer
 StringSerializer is the serializer for String. More...
 
class  StuckAsyncOp
 
class  StuckBlockingOp
 
class  StumpFuncIndexOp
 
class  StumpFuncOp
 
struct  SubFunctor
 
class  SumDNNLowPOp
 
class  SumElementsGradientOp
 
class  SumElementsIntOp
 
class  SumElementsOp
 
class  SumFP16FP16AccOp
 
class  SummarizeOp
 
class  SumOp
 
class  SumRangeReducer
 
class  SumRangeReducer< T, CPUContext >
 
struct  SumRangeReducerDef
 
class  SumRangeReducerGradient
 
class  SumReduceDimsGradientOp
 
class  SumReduceDimsOp
 
class  SumReduceLikeOp
 
class  SumReducer
 
class  SumReducer< T, CPUContext >
 
struct  SumReducerDef
 
class  SumReducerGradient
 
class  SumReluOp
 
class  SumSqrElementsOp
 
struct  SwishFunctor
 
class  SwishGradientOp
 
class  SyncErrorOp
 
struct  TanFunctor
 
struct  TanGradientFunctor
 
struct  TanhEmulatorFunctor
 
struct  TanhFakeIdealFp16Functor
 
class  TanhFunctor
 
struct  TanhGradientFunctor
 
struct  Task
 
struct  TemplatePutOp
 
class  Tensor
 Tensor class holds a shared pointer to the implementation TensorImpl, redirects API calls to TensorImpl; Copying of Tensor results in sharing the same underlying implementation object. More...
 
class  TensorCoreEngine
 Empty class to identify TensorCore-based math. More...
 
class  TensorDeserializer
 TensorDeserializer is the deserializer for Tensors. More...
 
class  TensorFiller
 
class  TensorPrinter
 
class  TensorProtosDBInput
 
class  TensorRTOp
 
class  TensorRTTransformer
 
class  TensorSerializer
 TensorSerializer is the serializer for Tensors. More...
 
struct  TensorTypes
 
struct  TensorTypes2
 
class  TestError
 
struct  TextFileReaderInstance
 
class  TextFileReaderReadOp
 
class  ThreadedRecurrentNetworkExecutor
 
class  ThreadLocalCUDAObjects
 A struct to host thread-local cuda objects. More...
 
class  ThreadLocalHelper
 ThreadLocalHelper is per thread. More...
 
class  ThreadLocalPtr
 
class  ThreadLocalPtrImpl
 ThreadLocalPtrImpl is per object. More...
 
class  ThreadPool
 
class  ThresholdedReluGradientOp
 
class  ThresholdedReluOp
 
class  ThrowChildThreadExceptionOp
 
class  ThrowExceptionOp
 
struct  ThrowInTheTowelIfGradientIsCalled
 A helper class to indicate that the operator should have no gradient. More...
 
class  TileGradientOp
 
class  TileOp
 
class  TimeCounter
 
class  TimeObserver
 
class  TimeOperatorObserver
 
class  Timer
 A simple timer object for measuring time. More...
 
struct  TimerBeginOp
 
struct  TimerEndOp
 
struct  TimerGetAndEndOp
 
struct  TimerGetOp
 
class  TimerInstance
 
struct  Token
 
class  TokenizedString
 
class  Tokenizer
 
class  TopKGradientOp
 
class  TopKOp
 
class  Transform
 The Transform Base Object. More...
 
class  TransposeOp
 
class  TTContractionGradientOp
 
class  TTContractionOp
 
class  TTLinearGradientOp
 
class  TTLinearOp
 
class  TTPadGradientOp
 
class  TTPadOp
 
class  TTSparseLengthsSumGradientOp
 
class  TTSparseLengthsSumOp
 
class  TvmTransformer
 
struct  TvmTransformOptions
 
class  TypeIdentifier
 A type id is a unique id for a given C++ type. More...
 
class  TypeMeta
 TypeMeta is a thin class that allows us to store the type of a container such as a blob, or the data type of a tensor, with a unique run-time id. More...
 
struct  TypeNameTraits
 
struct  TypeNameTraits< int32_t >
 
struct  TypeNameTraits< int64_t >
 
class  UnaryElementwiseWithArgsDNNLowPOp
 
class  UnaryElementwiseWithArgsOp
 
struct  UnaryFunctorWithDefaultCtor
 
class  UniformFillOp
 
class  UniqueOp
 Deduplicates input indices vector and optionally produces reverse remapping. More...
 
class  UniqueUniformFillOp
 
class  UnpackSegmentsOp
 
class  UnsafeCoalesceOp
 
class  UnsupportedOperatorFeature
 
class  UpsampleBilinearGradientOp
 
class  UpsampleBilinearOp
 
class  UpsampleNearestGradientOp
 
class  UpsampleNearestOp
 
class  VariableLengthSequencePaddingOp
 
class  VideoDecoder
 
class  VideoInputOp
 
class  VideoIOContext
 
struct  VideoMeta
 
class  WallClockTimeOp
 
class  WeightedMultiSamplingOp
 
class  WeightedSampleDequeueBlobsOp
 
class  WeightedSampleOp
 
class  WeightedSigmoidCrossEntropyWithLogitsGradientOp
 
class  WeightedSigmoidCrossEntropyWithLogitsOp
 
class  WeightedSumGradientOp
 
class  WeightedSumOp
 
class  WeightedSumReducer
 
class  WeightedSumReducer< T, CPUContext >
 
struct  WeightedSumReducerDef
 
class  WeightedSumReducerGradient
 
class  WeightScaleOp
 
class  WhereOp
 
class  WhileOp
 
class  WngradOp
 
class  Worker
 
class  WorkersPool
 
class  Workspace
 Workspace is a class that holds all the related objects created during runtime: (1) all blobs, and (2) all instantiated networks. More...
 
class  WorkspaceOptimizationPass
 
class  WorkspaceTestFoo
 
class  XavierFillOp
 
struct  XorFunctor
 
class  YellowFinOp
 
class  ZeroGradientOp
 
class  ZmqContext
 
class  ZmqMessage
 
class  ZmqSocket
 

Typedefs

using SparseLengthsSumOp = SparseLengthsReductionFakeFp16Op< TensorTypes< float, at::Half >, 0, 0 >
 
using SparseLengthsWeightedSumOp = SparseLengthsReductionFakeFp16Op< TensorTypes< float, at::Half >, 1, 0 >
 
using SparseLengthsMeanOp = SparseLengthsReductionFakeFp16Op< TensorTypes< float, at::Half >, 0, 1 >
 
using SparseLengthsSumAccFP16Op = SparseLengthsReductionFakeFp16Op< TensorTypes< float, at::Half >, 0, 0, 0, 1 >
 
using SparseLengthsWeightedSumAccFP16Op = SparseLengthsReductionFakeFp16Op< TensorTypes< float, at::Half >, 1, 0, 0, 1 >
 
using SparseLengthsMeanAccFP16Op = SparseLengthsReductionFakeFp16Op< TensorTypes< float, at::Half >, 0, 1, 0, 1 >
 
using SparseLengthsSumFakeFP16EmbeddingOnlyOp = SparseLengthsReductionFakeFp16Op< TensorTypes< float, at::Half >, 0, 0, 0, 0, 1 >
 
using SparseLengthsWeightedSumFakeFP16EmbeddingOnlyOp = SparseLengthsReductionFakeFp16Op< TensorTypes< float, at::Half >, 1, 0, 0, 0, 1 >
 
using SparseLengthsMeanFakeFP16EmbeddingOnlyOp = SparseLengthsReductionFakeFp16Op< TensorTypes< float, at::Half >, 0, 1, 0, 0, 1 >
 
using SparseLengthsSumDef = AbstractSparseLengthsDef< float, int, CPUContext, SumReducerDef, true >
 
using SparseLengthsWeightedSumDef = AbstractSparseLengthsDef< float, int, CPUContext, WeightedSumReducerDef, true >
 
using SparseLengthsMeanDef = AbstractSparseLengthsDef< float, int, CPUContext, MeanReducerDef, true >
 
template<typename Key , typename Value >
using CaffeMap = std::map< Key, Value >
 
using CUDAGuard = c10::cuda::CUDAGuard
 
using TensorCUDA = Tensor
 
typedef void(* EventCreateFunction) (const DeviceOption &option, Event *)
 
typedef void(* EventRecordFunction) (Event *, const void *, const char *)
 
typedef void(* EventWaitFunction) (const Event *, void *)
 
typedef void(* EventFinishFunction) (const Event *)
 
typedef EventStatus(* EventQueryFunction) (const Event *)
 
typedef const std::string &(* EventErrorMessageFunction) (const Event *)
 
typedef void(* EventSetFinishedFunction) (const Event *, const char *)
 
typedef void(* EventResetFunction) (Event *)
 
typedef std::function< void()> EventCallbackFunction
 
typedef void(* EventSetCallbackFunction) (Event *, EventCallbackFunction)
 
typedef ObserverBase< NetBaseNetObserver
 
typedef std::function< std::unique_ptr< NetObserver >(NetBase *)> NetObserverCreator
 
typedef ObserverBase< OperatorBaseOperatorObserver
 
typedef c10::Registry< std::string, std::unique_ptr< OperatorBase >, const OperatorDef &, Workspace * > OperatorRegistry
 
typedef c10::Registry< std::string, std::unique_ptr< OperatorBase >, const OperatorDef &, Workspace * > *(* RegistryFunction) ()
 
using EnginePrefType = std::vector< std::string >
 
using PerOpEnginePrefType = CaffeMap< DeviceType, CaffeMap< std::string, EnginePrefType > >
 
using GlobalEnginePrefType = CaffeMap< DeviceType, EnginePrefType >
 
typedef std::function< bool(int)> ShouldContinue
 
using ExportedStatList = std::vector< ExportedStatValue >
 Holds names and values of counters exported from a StatRegistry. More...
 
using ExportedStatMap = std::unordered_map< std::string, int64_t >
 
using StorageImpl = at::StorageImpl
 
using Storage = at::Storage
 
using TensorCPU = Tensor
 
typedef TypeMeta(* TypeCall) (const void *)
 
typedef vector< int64_t >(* TensorInfoCall) (const void *, size_t *capacity, DeviceOption *device)
 
template<typename T >
using deleted_unique_ptr = std::unique_ptr< T, std::function< void(T *)> >
 
using ParallelFor = std::function< void(size_t, std::function< void(size_t)>)>
 
using T = float
 
using TInd = int
 
using Engine = DefaultEngine
 
using NumericTypes = TensorTypes< int32_t, int64_t, float, double >
 
using IntTypes = TensorTypes< int32_t, int64_t >
 
using BoolTypes = TensorTypes< bool >
 
using IntBoolTypes = TensorTypes< int32_t, int64_t, bool >
 
template<typename InputTypes , class Context , class Functor , class OutputTypeMap = SameTypeAsInput>
using UnaryElementwiseOp = UnaryElementwiseWithArgsOp< InputTypes, Context, UnaryFunctorWithDefaultCtor< Functor >, OutputTypeMap >
 
template<typename InputTypes , class Context , class Functor , class TypeMap = SameTypeAsInput>
using BinaryElementwiseOp = BinaryElementwiseWithArgsOp< InputTypes, Context, BinaryFunctorWithDefaultCtor< Functor >, TypeMap >
 
template<typename InputTypes , class Context , class Functor , class OutputTypeMap = SameTypeAsInput, class GradientTypeMap = SameTypeAsInput>
using BinaryElementwiseGradientOp = BinaryElementwiseWithArgsGradientOp< InputTypes, Context, BinaryFunctorWithDefaultCtor< Functor >, OutputTypeMap, GradientTypeMap >
 
template<class Context >
using GeluOp = UnaryElementwiseWithArgsOp< TensorTypes< float >, Context, GeluFunctor< Context > >
 
template<class Context >
using GeluGradientOp = BinaryElementwiseWithArgsOp< TensorTypes< float >, Context, GeluGradientFunctor< Context > >
 
using MapType64To64 = MapTypeTraits< int64_t, int64_t >::MapType
 
using MapType64To32 = MapTypeTraits< int64_t, int32_t >::MapType
 
using MapType32To32 = MapTypeTraits< int32_t, int32_t >::MapType
 
using MapType32To64 = MapTypeTraits< int32_t, int64_t >::MapType
 
using GPUFallbackOp = GPUFallbackOpEx< SkipIndices<> >
 
template<typename T >
using RoIAlignGradientCPUOp = RoIAlignGradientOp< T, CPUContext >
 
using be = 1.") .Arg( "bin_spacing", "A string indicating 'linear' or 'logarithmic' spacing for the bins.") .Arg( "logspace_start", "A float that 's used as the starting point for logarithmic spacing. " "Since logarithmic spacing cannot contain<=0 values this value will " "be used to represent all such values.") .Arg( "abs", "Apply abs() on every input value." )
 
using op = core.CreateOperator("Slice",["X"],["Y"], starts=(0, 1), ends=(-1, 3)) workspace.FeedBlob("X", np.array()) print("X:", workspace.FetchBlob("X")) workspace.RunOperatorOnce(op) print("Y:", workspace.FetchBlob("Y")) ``` **Result **``` X:Y:```</details >) DOC") .Input(0, "X", "(*Tensor *):tensor to extract slices from") .Input( 1, "starts", "(*Tensor`< int >` *):1D tensor of start-indices for each dimension of data(dimensions following the sliced one might be omitted)") .Input( 2, "ends", "(*Tensor`< int >` *):1D tensor of end-indices for each dimension of data(dimensions following the sliced one might be omitted)") .Arg("starts", "(*Tuple(int) *):list of starting indices") .Arg("ends", "(*Tuple(int) *):list of ending indices") .TensorInferenceFunction([](const OperatorDef& def, const vector<TensorShape>& in) { if (in.size() > 1) { return vector<TensorShape>()
 
template<typename ScalarFunctor , typename TypeMap = FixedType<std::string>>
using StringElementwiseOp = UnaryElementwiseWithArgsOp< TensorTypes< std::string >, CPUContext, ForEach< ScalarFunctor >, TypeMap >
 
using ShapeInfoMap = std::unordered_map< std::string, ShapeInfo >
 
using PredictorParameters = std::map< std::string, std::shared_ptr< Blob > >
 
typedef std::unordered_map< ThreadLocalPtrImpl *, std::shared_ptr< void > > UnsafeThreadLocalMap
 Map of object pointer to instance in each thread to achieve per thread(using thread_local) per object(using the map) thread local pointer. More...
 
typedef std::vector< ThreadLocalHelper * > UnsafeAllThreadLocalHelperVector
 
using DeviceType = at::DeviceType
 
using BatchPermutationFP32Op = CopyOp< CPUContext, CPUContext, CPUContext >
 
using ConvFp32Op = ConvOp< float, CPUContext >
 
using AddFp32Op = BinaryElementwiseOp< NumericTypes, CPUContext, AddFunctor< CPUContext > >
 
using ElementwiseLinearFp32Op = ElementwiseLinearOp< float, CPUContext >
 
using MulFp32Op = BinaryElementwiseOp< NumericTypes, CPUContext, MulFunctor< CPUContext > >
 
using FCFp32Op = FullyConnectedOp< CPUContext >
 
using GroupNormFP32Op = GroupNormOp< float, CPUContext >
 
using ResizeNearest3DFP32Op = ResizeNearest3DOp< float, CPUContext >
 
using ResizeNearestFP32Op = ResizeNearestOp< float, CPUContext >
 
using RebatchingQueuePtr = std::unique_ptr< RebatchingQueue >
 
template<typename T >
using EigenMatrixMap = Eigen::Map< Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > >
 
template<typename T >
using EigenArrayMap = Eigen::Map< Eigen::Array< T, Eigen::Dynamic, Eigen::Dynamic > >
 
template<typename T >
using EigenVectorMap = Eigen::Map< Eigen::Matrix< T, Eigen::Dynamic, 1 > >
 
template<typename T >
using EigenVectorArrayMap = Eigen::Map< Eigen::Array< T, Eigen::Dynamic, 1 > >
 
template<typename T >
using ConstEigenMatrixMap = Eigen::Map< const Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > >
 
template<typename T >
using ConstEigenArrayMap = Eigen::Map< const Eigen::Array< T, Eigen::Dynamic, Eigen::Dynamic > >
 
template<typename T >
using ConstEigenVectorMap = Eigen::Map< const Eigen::Matrix< T, Eigen::Dynamic, 1 > >
 
template<typename T >
using ConstEigenVectorArrayMap = Eigen::Map< const Eigen::Array< T, Eigen::Dynamic, 1 > >
 
using EigenOuterStride = Eigen::OuterStride< Eigen::Dynamic >
 
using EigenInnerStride = Eigen::InnerStride< Eigen::Dynamic >
 
using EigenStride = Eigen::Stride< Eigen::Dynamic, Eigen::Dynamic >
 
template<typename T >
using EigenOuterStridedMatrixMap = Eigen::Map< Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic >, 0, EigenOuterStride >
 
template<typename T >
using EigenOuterStridedArrayMap = Eigen::Map< Eigen::Array< T, Eigen::Dynamic, Eigen::Dynamic >, 0, EigenOuterStride >
 
template<typename T >
using ConstEigenOuterStridedMatrixMap = Eigen::Map< const Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic >, 0, EigenOuterStride >
 
template<typename T >
using ConstEigenOuterStridedArrayMap = Eigen::Map< const Eigen::Array< T, Eigen::Dynamic, Eigen::Dynamic >, 0, EigenOuterStride >
 
template<typename T >
using EigenStridedMatrixMap = Eigen::Map< Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic >, 0, EigenStride >
 
template<typename T >
using EigenStridedArrayMap = Eigen::Map< Eigen::Array< T, Eigen::Dynamic, Eigen::Dynamic >, 0, EigenStride >
 
template<typename T >
using ConstEigenStridedMatrixMap = Eigen::Map< const Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic >, 0, EigenStride >
 
template<typename T >
using ConstEigenStridedArrayMap = Eigen::Map< const Eigen::Array< T, Eigen::Dynamic, Eigen::Dynamic >, 0, EigenStride >
 
template<typename T >
using EArrXt = Eigen::Array< T, Eigen::Dynamic, 1 >
 
using EArrXf = Eigen::ArrayXf
 
using EArrXd = Eigen::ArrayXd
 
using EArrXi = Eigen::ArrayXi
 
using EArrXb = EArrXt< bool >
 
using EArrXI32 = EArrXt< int32_t >
 
using EArrXU16 = EArrXt< uint16_t >
 
using EArrXU8 = EArrXt< uint8_t >
 
using EArr3U8 = Eigen::Array< uint8_t, 3, 1 >
 
template<typename T >
using EArrXXt = Eigen::Array< T, Eigen::Dynamic, Eigen::Dynamic >
 
using EArrXXf = Eigen::ArrayXXf
 
using EArrXXI32 = EArrXXt< int32_t >
 
using EArrXXU16 = EArrXXt< uint16_t >
 
using EArrXXU8 = EArrXXt< uint8_t >
 
using EArrXXi = EArrXXt< int >
 
template<typename T >
using ERArrXXt = Eigen::Array< T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor >
 
using ERArrXXf = ERArrXXt< float >
 
using ERArrXXI32t = ERArrXXt< int32_t >
 
using ERArrXXU16t = ERArrXXt< uint16_t >
 
using ERArrXXU8t = ERArrXXt< uint8_t >
 
using ERArrXXi = ERArrXXt< int >
 
using ERArrXXi64t = ERArrXXt< int64_t >
 
using ERArrXXi32t = ERArrXXt< int32_t >
 
template<typename T >
using EVecXt = Eigen::Matrix< T, Eigen::Dynamic, 1 >
 
using EVecXd = Eigen::VectorXd
 
using EVecXf = Eigen::VectorXf
 
using ERVecXd = Eigen::RowVectorXd
 
using ERVecXf = Eigen::RowVectorXf
 
template<typename T >
using EMatXt = Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic >
 
using EMatXd = Eigen::MatrixXd
 
using EMatXf = Eigen::MatrixXf
 
using EMatXU8 = EMatXt< uint8_t >
 
using EMatXU16 = EMatXt< uint16_t >
 
template<typename T >
using ERMatXt = Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor >
 
using ERMatXd = ERMatXt< double >
 
using ERMatXf = ERMatXt< float >
 
using ERMatXU8 = ERMatXt< uint8_t >
 

Enumerations

enum class  CudaMemoryPoolType { NONE = 0 , CUB = 1 , THC = 2 }
 
enum  EventStatus { EVENT_INITIALIZED = 0 , EVENT_SCHEDULED = 1 , EVENT_SUCCESS = 2 , EVENT_FAILED = 3 }
 
enum  StorageOrder { UNKNOWN = 0 , NHWC = 1 , NCHW = 2 }
 
enum  ConvAlgorithm { CONV_ALGORITHM_AUTO = 0 , CONV_ALGORITHM_WINOGRAD = 1 , CONV_ALGORITHM_MAX }
 
enum  FusionType {
  FUSION_UNKNOWN = 0 , FUSION_CONV_RELU = 1 , FUSION_CONV_SUM = 2 , FUSION_CONV_SUM_RELU = 3 ,
  FUSION_MAX
}
 
enum  { ALGO_FWD = 0 , ALGO_WGRAD = 1 , ALGO_DGRAD = 2 }
 
enum class  PadMode { CONSTANT = 0 , REFLECT = 1 , EDGE = 2 }
 
enum class  QuantDecodeRunTy { RUN_ALWAYS , RUN_ONCE }
 
enum  RecurrentParamOpMode { SET_PARAM , GET_PARAM , SET_PARAM , GET_PARAM }
 
enum  RecurrentParamOpMode { SET_PARAM , GET_PARAM , SET_PARAM , GET_PARAM }
 
enum  FillerDistribution { FD_UNIFORM , FD_FIXEDSUM , FD_SYNTHETIC }
 
enum  FLowAlgType { FarnebackOpticalFlow = 0 , DensePyrLKOpticalFlow = 1 , BroxOpticalFlow = 2 , OpticalFlowDual_TVL1 = 3 }
 
enum  FlowDataType { Flow2C = 0 , Flow3C = 1 , FlowWithGray = 2 , FlowWithRGB = 3 }
 
enum  SpecialFps { SAMPLE_NO_FRAME = 0 , SAMPLE_ALL_FRAMES = -1 , SAMPLE_TIMESTAMP_ONLY = -2 }
 
enum  VideoResType { USE_WIDTH_HEIGHT = 0 , USE_SHORT_EDGE = 1 , ORIGINAL_RES = 2 }
 
enum  DecodeType { DO_TMP_JITTER = 0 , DO_UNIFORM_SMP = 1 , USE_START_FRM = 2 }
 

Functions

void swap (Blob &lhs, Blob &rhs)
 
std::ostream & operator<< (std::ostream &out, const Blob &v)
 
 CAFFE_KNOWN_TYPE (c10::intrusive_ptr< LinearPackedParamsBase >)
 
void reportTime (std::string type, double ts, std::string metric, std::string unit)
 
void splitSizes (const std::string &arg, int *ptr0, int *ptr1)
 
cv::Mat resizeImage (cv::Mat &img)
 
cv::Mat cropToRec (cv::Mat &img, int *height_ptr, int *width_ptr)
 
std::vector< floatconvertToVector (cv::Mat &img)
 
std::vector< floatconvertOneImage (std::string &filename, int *height_ptr, int *width_ptr)
 
int getBatchSize (int num_items)
 
TensorProtos writeValues (std::vector< std::vector< std::vector< float > > > &values, std::vector< std::vector< int > > &dims)
 
TensorProtos convertImages (std::string &image_file)
 
template<class TYPE >
vector< TYPE > splitString (std::string &line)
 
TensorProtos convertValues (std::string &file_name)
 
void ConvertToRawDataset (const string &input_db_name, const string &output_db_name)
 
void writeValues (std::vector< std::vector< std::vector< float > > > &values, std::vector< std::vector< int > > &dims, std::string output_file)
 
void convertImages ()
 
void convertValues ()
 
void ReadImage (std::ifstream *file, int *label, char *buffer)
 
void WriteToDB (const string &filename, const int num_items, const int &offset, db::DB *db)
 
void ConvertCIFAR ()
 
void ConvertImageDataset (const string &input_folder, const string &list_filename, const string &output_db_name, const bool)
 
uint32_t swap_endian (uint32_t val)
 
void convert_dataset (const char *image_filename, const char *label_filename, const char *db_path, const int data_limit)
 
void run ()
 
static int Split (int argc, char **argv)
 
 CAFFE_KNOWN_TYPE (detail::_guard_long_unique< long >)
 
constexpr bool operator< (TypeIdentifier lhs, TypeIdentifier rhs)
 
std::ostream & operator<< (std::ostream &stream, caffe2::TypeIdentifier typeId)
 
template<>
constexpr C10_EXPORT uint16_t TypeMeta::_typeMetaData< detail::_Uninitialized > () noexcept
 
bool operator== (const TypeMeta lhs, const TypeMeta rhs) noexcept
 
bool operator!= (const TypeMeta lhs, const TypeMeta rhs) noexcept
 
std::ostream & operator<< (std::ostream &stream, caffe2::TypeMeta typeMeta)
 
 REGISTER_CPU_OPERATOR (ATen, ATenOp< CPUContext >)
 
 OPERATOR_SCHEMA (ATen)
 
 REGISTER_CUDA_OPERATOR (ATen, ATenOp< CUDAContext >)
 
vector< TensorShape > TensorInferenceForBatchMatMul (const OperatorDef &def, const vector< TensorShape > &in)
 
OpSchema::Cost CostInferenceForBatchMatMul (const OperatorDef &def, const vector< TensorShape > &in)
 
 REGISTER_CPU_OPERATOR (BatchMatMulFP16Fake, BatchMatMulFP16FakeOp< CPUContext >)
 
 NumInputs (2) .NumOutputs(1) .SetDoc(R"DOC( Batch Matrix multiplication Yi = Ai * Bi
 
where A has shape (dim0, dim1,... M, K)
 
where A has B has shape (dim0, dim1,... K, N)
 
 REGISTER_CPU_OPERATOR (SumFakeFp16, SumFP16FP16AccOp< CPUContext >)
 
 OPERATOR_SCHEMA (SumFakeFp16).NumInputs(1
 
INT_MAX NumOutputs (1, INT_MAX)
 
 REGISTER_CPU_OPERATOR (AddFakeFp16, BinaryElementwiseOp< TensorTypes< float, int >, CPUContext, FP16PairWiseCPUFunctor< AddFunctor< CPUContext > > >)
 
 OPERATOR_SCHEMA (AddFakeFp16).NumInputs(2).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (DivFakeFp16, BinaryElementwiseOp< TensorTypes< float >, CPUContext, FP16PairWiseCPUFunctor< DivFunctor< CPUContext > > >)
 
 OPERATOR_SCHEMA (DivFakeFp16).NumInputs(2).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (MulFakeFp16, BinaryElementwiseOp< TensorTypes< float >, CPUContext, FP16PairWiseCPUFunctor< MulFunctor< CPUContext > > >)
 
 OPERATOR_SCHEMA (MulFakeFp16).NumInputs(2).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (SubFakeFp16, BinaryElementwiseOp< TensorTypes< float >, CPUContext, FP16PairWiseCPUFunctor< SubFunctor< CPUContext > > >)
 
 OPERATOR_SCHEMA (SubFakeFp16).NumInputs(2).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (Fp16FCAcc32, Fp16FCAccOp< CPUContext, DefaultEngine, false, true, false >)
 
NumInputs(3) .NumOutputs(1) .TensorInferenceFunction(std REGISTER_CPU_OPERATOR (Fp16FCAcc16, Fp16FCAccOp< CPUContext, DefaultEngine, true, true, false >)
 
NumInputs(3) .NumOutputs(1) .TensorInferenceFunction(std REGISTER_CPU_OPERATOR (Fp16FCAcc32NNPI, Fp16FCAccOp< CPUContext, DefaultEngine, false, false, true >)
 
NumInputs(3) .NumOutputs(1) .TensorInferenceFunction(std REGISTER_CPU_OPERATOR (Fp16FCAcc16NNPI, Fp16FCAccOp< CPUContext, DefaultEngine, true, false, true >)
 
void transpose (const float *A, std::vector< float > &A_trans, int M, int N)
 
void custom_fp16_gemm_with_trans (const CBLAS_TRANSPOSE trans_A, const CBLAS_TRANSPOSE trans_B, const int m, const int k, const int n, const float *A, const float *B, const float beta, float *C, const bool use_acc_fp16, const bool use_temp_accumulator)
 
static __m256 clamp_subnormals (__m256 input, const float epsilon_)
 
void custom_fp16_gemm (const int m, const int k, const int n, const float *A_fp16, const float *B_fp16, const float beta, float *C, const bool use_acc_fp16, const bool use_temp_accumulator)
 
void custom_fp16_gemv (const bool use_acc_fp16, const bool use_custom_acc32, const bool use_temp_accumulator, const CBLAS_TRANSPOSE trans_A, const int M, const int N, const float alpha, const float *A, const float *x, const float beta, float *y, CPUContext *context)
 
void custom_fp16_gemm_batched (const bool use_acc_fp16, const bool use_custom_acc32, const bool use_temp_accumulator, const CBLAS_TRANSPOSE trans_A, const CBLAS_TRANSPOSE trans_B, const int batch_size, const int M, const int N, const int K, const float alpha, const float **A, const float **B, const float beta, float **C, CPUContext *context)
 
void custom_fp16_gemm_strided_batched (const bool use_acc_fp16, const bool use_custom_acc32, const bool use_temp_accumulator, const CBLAS_TRANSPOSE trans_A, const CBLAS_TRANSPOSE trans_B, const int batch_size, const int M, const int N, const int K, const float alpha, const float *A, const int A_stride, const float *B, const int B_stride, const float beta, float *C, const int C_stride, CPUContext *context)
 
void transpose (const float *A, float *A_trans, int M, int N)
 
 REGISTER_CPU_OPERATOR (Int8DequantizeNNPI, int8::Int8DequantizeNNPIOp)
 
 IdenticalTypeAndShape () .NumInputs(1) .NumOutputs(1) .Input(0
 
Int8 Tensor qX Output (0, "Y", "FP32 Tensor that represents mapped real value of qX.")
 
 REGISTER_CPU_OPERATOR (Int8QuantizeNNPI, int8::Int8QuantizeNNPIOp)
 
Output tensor quantization scale Arg ("Y_zero_point", "Output tensor quantization offset") .NumInputs(1) .NumOutputs(1) .Input(0
 
Output tensor quantization scale FP32 Tensor X Output (0, "Y", "Int8 Tensor qX representing X with linear quantization.")
 
 REGISTER_CPU_OPERATOR (SwishFakeInt8NNPI, int8::SwishInt8NNPIOp)
 
Inout tensor quantization scale Arg ("X_zero_point", "Input tensor quantization offset") .Arg("Y_scale"
 
 REGISTER_CPU_OPERATOR (LayerNormFakeFP16NNPI, LayerNormFakeFp16Op< false >)
 
 OPERATOR_SCHEMA (LayerNormFakeFP16NNPI).NumInputs(
 
 NumOutputs (3)
 
 REGISTER_CPU_OPERATOR (LayerNormInt8QuantizeFakeNNPI, LayerNormFakeFp16Op< true >)
 
 REGISTER_CPU_OPERATOR (SparseLengthsSumFused4BitRowwiseFakeFP16NNPI, SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, false >)
 
 NumInputs (3) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext
 
false SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, false >::LENGTHS SetDoc (R"DOC( Performs the same operation as SparseLengthsSum, but operating on 4-bit rowwise quantized matrices with fused storage (where each row stores quantized values, and then 2-byte scale and 2-byte bias). )DOC") .Input(0
 
false SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, false >::LENGTHS uint8 tensor obtained with operator FloatToFused4BitRowwiseQuantized") .Input (1, "INDICES", "Integer vector containing indices of the first " "dimension of DATA for the slices that are being aggregated") .Input(2
 
 NO_GRADIENT (SparseLengthsSumFused4BitRowwiseFakeFP16NNPI)
 
 REGISTER_CPU_OPERATOR (SparseLengthsSumFused4BitRowwiseFakeFP16EmbeddingOnly, SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, false, true >)
 
true SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, false, true >::LENGTHS SetDoc (R"DOC( Performs the same operation as SparseLengthsSum, but operating on 4-bit rowwise quantized matrices with fused storage (where each row stores quantized values, and then 2-byte scale and 2-byte bias). Convert only embedding entries using fake fp16. )DOC") .Input(0
 
 NO_GRADIENT (SparseLengthsSumFused4BitRowwiseFakeFP16EmbeddingOnly)
 
 REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused4BitRowwiseFakeFP16NNPI, SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, true >)
 
 NumInputs (4) .NumOutputs(1) .WeightedValueKeyLengthInputFillers(SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext
 
true SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, true >::WEIGHTS SetDoc (R"DOC( Performs the same operation as SparseLengthsWeightedSum, but operating on 4-bit rowwise quantized matrices with fused storage (where each row stores quantized values, and then 2-byte scale and 2-byte bias). )DOC") .Input(0
 
true SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, true >::WEIGHTS uint8 tensor obtained with Vector with the same sum of elements as the first dimension of DATA Input (3, "WEIGHTS", "Vector of weights to scale rows of DATA with before reduction") .Output(0
 
 NO_GRADIENT (SparseLengthsWeightedSumFused4BitRowwiseFakeFP16NNPI)
 
 REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused4BitRowwiseFakeFP16EmbeddingOnly, SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, true, true >)
 
true SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, true, true >::WEIGHTS SetDoc (R"DOC( Performs the same operation as SparseLengthsWeightedSum, but operating on 4-bit rowwise quantized matrices with fused storage (where each row stores quantized values, and then 2-byte scale and 2-byte bias). Convert only embedding entries using fake fp16. )DOC") .Input(0
 
 NO_GRADIENT (SparseLengthsWeightedSumFused4BitRowwiseFakeFP16EmbeddingOnly)
 
 REGISTER_CPU_OPERATOR (SparseLengthsSumFused8BitRowwiseFakeFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext >)
 
NumInputs(3) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContextNO_GRADIENT (SparseLengthsSumFused8BitRowwiseFakeFP16)
 
 REGISTER_CPU_OPERATOR (SparseLengthsSumFused8BitRowwiseFakeFP16EmbeddingOnly, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, false, false, false, false, true >)
 
true SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, false, false, false, false, true >::LENGTHS SetDoc (R"DOC( Performs the same operation as SparseLengthsSum, but operating on 8-bit rowwise quantized matrices with fused storage (where each row stores quantized values, and then 4-byte scale and 4-byte bias). Convert only embedding entries using fake fp16. )DOC") .Input(0
 
true SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, false, false, false, false, true >::LENGTHS uint8 tensor obtained with operator FloatToFused8BitRowwiseQuantized") .Input (1, "INDICES", "Integer vector containing indices of the first " "dimension of DATA for the slices that are being aggregated") .Input(2
 
 NO_GRADIENT (SparseLengthsSumFused8BitRowwiseFakeFP16EmbeddingOnly)
 
 REGISTER_CPU_OPERATOR (SparseLengthsSumFused8BitRowwiseFakeFP16NNPI, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, false, true, false, true >)
 
true SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, false, true >::LENGTHS SetDoc (R"DOC( Performs the same operation as SparseLengthsSum, but operating on 8-bit rowwise quantized matrices with fused storage (where each row stores quantized values, and then 4-byte scale and 4-byte bias). )DOC") .Input(0
 
 NO_GRADIENT (SparseLengthsSumFused8BitRowwiseFakeFP16NNPI)
 
 REGISTER_CPU_OPERATOR (SparseLengthsSumFused8BitRowwiseFakeFP32NNPI, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, false, false, false, false, false, true >)
 
 NO_GRADIENT (SparseLengthsSumFused8BitRowwiseFakeFP32NNPI)
 
 REGISTER_CPU_OPERATOR (SparseLengthsSumFused8BitRowwiseFakeFP16AccFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, false, true >)
 
 NO_GRADIENT (SparseLengthsSumFused8BitRowwiseFakeFP16AccFP16)
 
 REGISTER_CPU_OPERATOR (SparseLengthsSumFused8BitRowwiseFakeFP16AccInvScaleFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, 0, true, true >)
 
 NO_GRADIENT (SparseLengthsSumFused8BitRowwiseFakeFP16AccInvScaleFP16)
 
 REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true >)
 
true SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true >::WEIGHTS SetDoc (R"DOC( Performs the same operation as SparseLengthsWeightedSum, but operating on 8-bit rowwise quantized matrices with fused storage (where each row stores quantized values, and then 4-byte scale and 4-byte bias). )DOC") .Input(0
 
 NO_GRADIENT (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16)
 
 REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16EmbeddingOnly, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true, false, false, false, false, true >)
 
true SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true, false, false, false, false, true >::WEIGHTS SetDoc (R"DOC( Performs the same operation as SparseLengthsWeightedSum, but operating on 8-bit rowwise quantized matrices with fused storage (where each row stores quantized values, and then 4-byte scale and 4-byte bias). Convert only embedding entries using fake fp16. )DOC") .Input(0
 
 NO_GRADIENT (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16EmbeddingOnly)
 
 REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16AccFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true, false, true >)
 
 NO_GRADIENT (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16AccFP16)
 
 REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16NNPI, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true, false, true, false, true >)
 
 NO_GRADIENT (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16NNPI)
 
 REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused8BitRowwiseFakeFP32NNPI, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true, false, false, false, false, false, true >)
 
 NO_GRADIENT (SparseLengthsWeightedSumFused8BitRowwiseFakeFP32NNPI)
 
 REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16AccInvScaleFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true, false, true, true >)
 
 NO_GRADIENT (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16AccInvScaleFP16)
 
 REGISTER_CPU_OPERATOR (SparseLengthsMeanFused8BitRowwiseFakeFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, true >)
 
true SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, true >::LENGTHS SetDoc (R"DOC( Performs the same operation as SparseLengthsMean, but operating on 8-bit rowwise quantized matrices with fused storage (where each row stores quantized values, and then 4-byte scale and 4-byte bias). )DOC") .Input(0
 
 NO_GRADIENT (SparseLengthsMeanFused8BitRowwiseFakeFP16)
 
 REGISTER_CPU_OPERATOR (SparseLengthsMeanFused8BitRowwiseFakeFP16AccFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, true, true >)
 
 NO_GRADIENT (SparseLengthsMeanFused8BitRowwiseFakeFP16AccFP16)
 
 REGISTER_CPU_OPERATOR (SparseLengthsSumFakeFP16, SparseLengthsSumOp)
 
 REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFakeFP16, SparseLengthsWeightedSumOp)
 
 REGISTER_CPU_OPERATOR (SparseLengthsMeanFakeFP16, SparseLengthsMeanOp)
 
 REGISTER_CPU_OPERATOR (SparseLengthsSumFakeFP16AccFP16, SparseLengthsSumAccFP16Op)
 
 REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFakeFP16AccFP16, SparseLengthsWeightedSumAccFP16Op)
 
 REGISTER_CPU_OPERATOR (SparseLengthsMeanFakeFP16AccFP16, SparseLengthsMeanAccFP16Op)
 
 REGISTER_CPU_OPERATOR (SparseLengthsSumFakeFP16EmbeddingOnly, SparseLengthsSumFakeFP16EmbeddingOnlyOp)
 
 REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFakeFP16EmbeddingOnly, SparseLengthsWeightedSumFakeFP16EmbeddingOnlyOp)
 
 REGISTER_CPU_OPERATOR (SparseLengthsMeanFakeFP16EmbeddingOnly, SparseLengthsMeanFakeFP16EmbeddingOnlyOp)
 
template<typename Def >
string FormatDoc ()
 
NumInputs(SparseLengthsSumDef::ForwardOp::kNumInputs) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsSumOp NO_GRADIENT (SparseLengthsSumFakeFP16)
 
NumInputs(SparseLengthsWeightedSumDef::ForwardOp::kNumInputs) .NumOutputs(1) .WeightedValueKeyLengthInputFillers(SparseLengthsWeightedSumOp NO_GRADIENT (SparseLengthsWeightedSumFakeFP16)
 
NumInputs(SparseLengthsMeanDef::ForwardOp::kNumInputs) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsMeanOp NO_GRADIENT (SparseLengthsMeanFakeFP16)
 
NumInputs(SparseLengthsSumDef::ForwardOp::kNumInputs) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsSumOp NO_GRADIENT (SparseLengthsSumFakeFP16AccFP16)
 
NumInputs(SparseLengthsWeightedSumDef::ForwardOp::kNumInputs) .NumOutputs(1) .WeightedValueKeyLengthInputFillers(SparseLengthsWeightedSumOp NO_GRADIENT (SparseLengthsWeightedSumFakeFP16AccFP16)
 
NumInputs(SparseLengthsMeanDef::ForwardOp::kNumInputs) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsMeanOp NO_GRADIENT (SparseLengthsMeanFakeFP16AccFP16)
 
NumInputs(SparseLengthsSumDef::ForwardOp::kNumInputs) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsSumFakeFP16EmbeddingOnlyOp NO_GRADIENT (SparseLengthsSumFakeFP16EmbeddingOnly)
 
NumInputs(SparseLengthsWeightedSumDef::ForwardOp::kNumInputs) .NumOutputs(1) .WeightedValueKeyLengthInputFillers(SparseLengthsWeightedSumFakeFP16EmbeddingOnlyOp NO_GRADIENT (SparseLengthsWeightedSumFakeFP16EmbeddingOnly)
 
NumInputs(SparseLengthsMeanDef::ForwardOp::kNumInputs) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsMeanFakeFP16EmbeddingOnlyOp NO_GRADIENT (SparseLengthsMeanFakeFP16EmbeddingOnly)
 
 REGISTER_CPU_OPERATOR (TanhQuantFakeFp16NNPI, TanhInt8QuantizeNNPIOp)
 
 Arg ("Y_scale", "Output tensor quantization scale") .Arg("Y_zero_point"
 
Output tensor quantization offset NumInputs (1) .NumOutputs(1) .SetDoc(R"DOC( Apply TanH and convert the result to Int8. <details> </details> )DOC") .Input(0
 
Output tensor quantization offset Float Tensor X Output (0, "Y", "Int8 Tensor Y.")
 
 REGISTER_CPU_OPERATOR (SpatialBNFakeLoweredFp16NNPI, SpatialBNFakeLoweredFp16Op)
 
 OPERATOR_SCHEMA (SpatialBNFakeLoweredFp16NNPI).NumInputs(
 
 NumOutputs (1)
 
 REGISTER_CPU_OPERATOR (SpatialBNFakeFp16NNPI, SpatialBNFakeFp16Op)
 
 OPERATOR_SCHEMA (SpatialBNFakeFp16NNPI).NumInputs(
 
OpSchema::Cost CostInferenceForRelu (const OperatorDef &def, const vector< TensorShape > &in)
 
 REGISTER_CPU_OPERATOR (ReluFakeFp16, UnaryElementwiseOp< TensorTypes< float >, CPUContext, ReluFakeFp16Functor< CPUContext > >)
 
 CostInferenceFunction (CostInferenceForRelu) .IdenticalTypeAndShape() .SetDoc(R"DOC( Applies rectified linear unit operation to the input data element-wise. The Relu operation takes one input $X$
 
 CAFFE_KNOWN_TYPE (::gloo::float16)
 
 CAFFE_KNOWN_TYPE (std::shared_ptr<::gloo::Context >)
 
nccl::NCCLExecution getNCCLElements (OperatorBase *op, const CUDAContext &context)
 
 REGISTER_CPU_OPERATOR_WITH_ENGINE (Conv, NNPACK, NNPACKConvOp)
 
 REGISTER_CPU_OPERATOR_WITH_ENGINE (MaxPool, NNPACK, NNPACKMaxPoolOp)
 
 REGISTER_CPU_OPERATOR_WITH_ENGINE (Relu, NNPACK, NNPACKReluOp)
 
 REGISTER_CPU_OPERATOR_WITH_ENGINE (LeakyRelu, NNPACK, NNPACKLeakyReluOp)
 
 CAFFE_KNOWN_TYPE (Tensor< OpenCLContext >)
 
void EventCreateOPENCL (const DeviceOption &, Event *)
 
void EventRecordOPENCL (Event *, const void *, const char *)
 
void EventWaitOPENCL (const Event *, void *)
 
void EventFinishOPENCL (const Event *)
 
void EventResetOPENCL (Event *)
 
 REGISTER_EVENT_CREATE_FUNCTION (OPENCL, EventCreateOPENCL)
 
 REGISTER_EVENT_RECORD_FUNCTION (OPENCL, EventRecordOPENCL)
 
 REGISTER_EVENT_WAIT_FUNCTION (OPENCL, OPENCL, EventWaitOPENCL)
 
 REGISTER_EVENT_FINISH_FUNCTION (OPENCL, EventFinishOPENCL)
 
 REGISTER_EVENT_RESET_FUNCTION (OPENCL, EventResetOPENCL)
 
 OPERATOR_SCHEMA (CudaProfileInitialize)
 
 OPERATOR_SCHEMA (CudaProfileStart)
 
 OPERATOR_SCHEMA (CudaProfileStop)
 
 REGISTER_CPU_OPERATOR (CudaProfileInitialize, CudaProfileInitializeOp)
 
 REGISTER_CPU_OPERATOR (CudaProfileStart, CudaProfileStartOp)
 
 REGISTER_CPU_OPERATOR (CudaProfileStop, CudaProfileStopOp)
 
 REGISTER_CUDA_OPERATOR (CudaProfileInitialize, CudaProfileInitializeOp)
 
 REGISTER_CUDA_OPERATOR (CudaProfileStart, CudaProfileStartOp)
 
 REGISTER_CUDA_OPERATOR (CudaProfileStop, CudaProfileStopOp)
 
 CAFFE_KNOWN_TYPE (std::unique_ptr< Module >)
 
 REGISTER_BLOB_SERIALIZER ((TypeMeta::Id< std::unique_ptr< Module > >()), ScriptModuleSerializer)
 
 REGISTER_BLOB_DESERIALIZER (torch::jit::Module, ScriptModuleDeserializer)
 
 NumInputs (1, INT_MAX) .NumOutputs(0
 
INT_MAX Input (0, "script_module_instance", "Instance of shared_ptr<Module>")
 
 REGISTER_CPU_OPERATOR (ScriptModule, ScriptModuleOp< CPUContext >)
 
 SHOULD_NOT_DO_GRADIENT (ScriptModule)
 
 NumInputs (0) .NumOutputs(1) .DisallowInputFillers() .Output(0
 
New instance of shared_ptr< ModuleArg ("serialized_binary", "Binary string representing contents of .pt file (zip container)")
 
 REGISTER_CPU_OPERATOR (ScriptModuleLoad, ScriptModuleLoadOp)
 
 NO_GRADIENT (ScriptModuleLoad)
 
 NumInputs (0, INT_MAX) .NumOutputs(0
 
INT_MAX SetDoc (R"DOC( The TensorRT operator is a black-box operator serialized from prebuilt TensorRT Engine string. It will take the input, do the computation by calling TensorRT inference engine and generate the outputs. This is a GPU only operator. )DOC") .Arg("log_verbosity"
 
INT_MAX int default verbosity of the TensorRt engine log Arg ("backend_buffer", "(string default=\"\" blob for serialized TensorRT engine." "Note that serialized engine is not compatible across platform and " "different TensorRT version.") .Arg("max_batch_size"
 
 REGISTER_CUDA_OPERATOR (TensorRT, TensorRTOp)
 
void BuildInitializationList (Workspace *ws, ::ONNX_NAMESPACE::GraphProto *g, std::unordered_set< std::string > *initialization_list)
 
 REGISTER_CPU_OPERATOR (CTC, CTCOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (CTC).NumInputs(3
 
 NumOutputs (2, 3)
 
 REGISTER_GRADIENT (CTC, GetCTCGradient)
 
 REGISTER_CUDA_OPERATOR (CTC, CTCOp< float, CUDAContext >)
 
bool BlobIsInt8TensorCPUType (const Blob &blob)
 
bool BlobIsTensorType (const Blob &blob, DeviceType device_type)
 
TensorBlobSetTensor (Blob *blob, Tensor &&tensor)
 
Tensor GetSizedTensorWithOptions (Tensor &&previous_tensor, at::IntArrayRef dims, at::TensorOptions options)
 
TensorBlobGetMutableTensor (Blob *blob, at::IntArrayRef dims, at::TensorOptions options)
 
Tensor XBlobGetMutableTensor (Blob *blob, at::IntArrayRef dims, at::TensorOptions options)
 
TensorBlobGetMutableTensor (Blob *blob, DeviceType device_type)
 
const TensorBlobGetTensor (const Blob &blob, DeviceType device_type)
 
Tensor BlobGetTensorOrUndefined (const Blob &blob)
 
void SerializeBlob (const Blob &blob, const string &name, BlobSerializerBase::SerializationAcceptor acceptor, int chunk_size=kDefaultChunkSize)
 Serializes the given blob, if possible. More...
 
std::string SerializeBlob (const Blob &blob, const string &name)
 Convenience function to serialize a blob to a string. More...
 
static bool EnableByteEncoding (const TensorProto::DataType &dataType, const size_t &typeSize)
 
template<typename T , typename S = T>
static void SerializeUsingBytesOrInt32 (const Tensor &input, const TensorProto::DataType &dataType, size_t chunkBegin, int32_t chunkSize, BaseContext *context, TensorProto &proto)
 
int GetGPUIDForPointer (const void *ptr)
 Gets the GPU id that the current pointer is located at. More...
 
 C10_DEFINE_TYPED_REGISTRY (BlobSerializerRegistry, TypeIdentifier, BlobSerializerBase, std::unique_ptr)
 
 C10_DEFINE_REGISTRY (BlobDeserializerRegistry, BlobDeserializerBase)
 
void DeserializeBlob (const string &content, Blob *result)
 Deserializes from a string containing either BlobProto or TensorProto. More...
 
void DeserializeBlob (const BlobProto &blob_proto, Blob *result)
 
std::vector< int64_tDimsFromTensorProto (const TensorProto &proto)
 
int64_t NumelFromTensorProto (const TensorProto &tensor_proto)
 
TypeMeta GetDataType (const TensorProto &tensor_proto)
 
static at::TensorOptions TensorOptionsFromProto (const TensorProto &tensor_proto)
 
std::unique_ptr< BaseContextContextFromProto (const TensorProto &tensor_proto)
 
Tensor EmptyTensorFromProto (const TensorProto &tensor_proto)
 
template<typename T , typename D = T>
void DeserializeFromBytesOrInt32 (const TensorProto &tensor_proto, size_t chunkBegin, int32_t chunkSize, BaseContext *context, Tensor *tensor)
 
std::string SerializeAsString_EnforceCheck (const google::protobuf::MessageLite &msg, const char *error_location)
 
std::string SerializeBlobProtoAsString_EnforceCheck (const BlobProto &blob)
 
 C10_DECLARE_TYPED_REGISTRY (BlobSerializerRegistry, TypeIdentifier, BlobSerializerBase, std::unique_ptr)
 
unique_ptr< BlobSerializerBaseCreateSerializer (TypeIdentifier id)
 
 C10_DECLARE_REGISTRY (BlobDeserializerRegistry, BlobDeserializerBase)
 
unique_ptr< BlobDeserializerBaseCreateDeserializer (const string &type)
 
 CAFFE_KNOWN_TYPE (BlobTestFoo)
 
 CAFFE_KNOWN_TYPE (BlobTestBar)
 
 CAFFE_KNOWN_TYPE (BlobTestNonDefaultConstructible)
 
 REGISTER_BLOB_SERIALIZER ((TypeMeta::Id< BlobTestFoo >()), BlobTestFooSerializer)
 
 REGISTER_BLOB_DESERIALIZER (BlobTestFoo, BlobTestFooDeserializer)
 
 CAFFE_KNOWN_TYPE (DummyType)
 
bool HasCudaRuntime ()
 
bool HasHipRuntime ()
 
const std::map< string, string > & GetBuildOptions ()
 
template<typename Dst , typename Src >
Dst dynamic_cast_if_rtti (Src ptr)
 
size_t cudnnCompiledVersion ()
 
size_t cudnnRuntimeVersion ()
 
void CheckCuDNNVersions ()
 
cudnnTensorFormat_t GetCudnnTensorFormat (const StorageOrder &order)
 A wrapper function to convert the Caffe storage order to cudnn storage order enum values. More...
 
int NumCudaDevices ()
 Returns the number of devices. More...
 
void SetDefaultGPUID (const int deviceid)
 
int GetDefaultGPUID ()
 
int CaffeCudaGetDevice ()
 Gets the current GPU id. More...
 
void CaffeCudaSetDevice (const int id)
 Gets the current GPU id. More...
 
const cudaDeviceProp & GetDeviceProperty (const int device)
 Gets the device property for the given device. More...
 
void DeviceQuery (const int deviceid)
 Runs a device query function and prints out the results to LOG(INFO). More...
 
bool GetCudaPeerAccessPattern (vector< vector< bool > > *pattern)
 Return a peer access pattern by returning a matrix (in the format of a nested vector) of boolean values specifying whether peer access is possible. More...
 
bool TensorCoreAvailable ()
 Return the availability of TensorCores for math. More...
 
const charcublasGetErrorString (cublasStatus_t error)
 Return a human readable cublas error string. More...
 
const charcurandGetErrorString (curandStatus_t error)
 Return a human readable curand error string. More...
 
int CudaVersion ()
 A runtime function to report the cuda version that Caffe2 is built with. More...
 
bool HasCudaGPU ()
 Check if the current running session has a cuda gpu present. More...
 
int CAFFE_GET_BLOCKS (const int N)
 Compute the number of blocks needed to run N threads. More...
 
dim3 CAFFE_GET_BLOCKS_2D (const int N, const int)
 Compute the number of blocks needed to run N threads for a 2D grid. More...
 
 TEST (CommonTest, TestStoi)
 
 TEST (CommonTest, TestStod)
 
uint32_t RandomNumberSeed ()
 A function to generate a random number seed that is unique in a best-effort basis, using an ever-incrementing seed and the current time. More...
 
CAFFE2_CUDA_API CudaMemoryPoolType GetCudaMemoryPoolType ()
 Gets the current memory pool type used by Caffe2. More...
 
 TEST (CUDATest, HasCudaRuntime)
 
 TEST (CUDAContextTest, TestAllocDealloc)
 
 TEST (CUDAContextTest, TestSetGetDeviceWithoutCaffeMode)
 
 TEST (CUDAContextTest, MemoryPoolAllocateDealloc)
 
cudaStream_t getStreamForHandle (cublasHandle_t handle)
 
 TEST (CUDAContextTest, TestSameThreadSameObject)
 
 TEST (CUDAContextTest, TestSameThreadTempObject)
 
 TEST (CUDAContextTest, TestSameThreadDifferntObjectIfDifferentDevices)
 
 TEST (CUDAContextTest, TestDifferntThreadDifferentobject)
 
 TEST (CPUContextTest, TestAllocAlignment)
 
 TEST (CPUContextTest, TestAllocDealloc)
 
 CAFFE_KNOWN_TYPE (db::DBReader)
 
 CAFFE_KNOWN_TYPE (db::Cursor)
 
void EventCreateCPU (const DeviceOption &option, Event *event)
 
void EventRecordCPU (Event *event, const void *, const char *err_msg)
 
void EventFinishCPU (const Event *event)
 
void EventWaitCPUCPU (const Event *event, void *)
 
EventStatus EventQueryCPU (const Event *event)
 
const std::string & EventErrorMessageCPU (const Event *event)
 
void EventSetFinishedCPU (const Event *event, const char *err_msg)
 
void EventSetCallbackCPU (Event *event, EventCallbackFunction callback)
 
void EventResetCPU (Event *event)
 
 REGISTER_EVENT_CREATE_FUNCTION (CPU, EventCreateCPU)
 
 REGISTER_EVENT_RECORD_FUNCTION (CPU, EventRecordCPU)
 
 REGISTER_EVENT_WAIT_FUNCTION (CPU, CPU, EventWaitCPUCPU)
 
 REGISTER_EVENT_FINISH_FUNCTION (CPU, EventFinishCPU)
 
 REGISTER_EVENT_QUERY_FUNCTION (CPU, EventQueryCPU)
 
 REGISTER_EVENT_ERROR_MESSAGE_FUNCTION (CPU, EventErrorMessageCPU)
 
 REGISTER_EVENT_SET_FINISHED_FUNCTION (CPU, EventSetFinishedCPU)
 
 REGISTER_EVENT_RESET_FUNCTION (CPU, EventResetCPU)
 
 REGISTER_EVENT_SET_CALLBACK_FUNCTION (CPU, EventSetCallbackCPU)
 
bool EventCanScheduleCPU (const Event *, const Event *)
 
void EventCreateCUDA (const DeviceOption &option, Event *event)
 
void EventRecordCUDA (Event *event, const void *context, const char *err_msg)
 
void EventFinishCUDA (const Event *event)
 
void EventWaitCUDACUDA (const Event *event, void *context)
 
void EventWaitCPUCUDA (const Event *event, void *context)
 
void EventWaitCUDACPU (const Event *event, void *context)
 
EventStatus EventQueryCUDA (const Event *event)
 
const std::string & EventErrorMessageCUDA (const Event *event)
 
void EventSetFinishedCUDA (const Event *event, const char *err_msg)
 
void EventResetCUDA (Event *event)
 
 REGISTER_EVENT_CREATE_FUNCTION (CUDA, EventCreateCUDA)
 
 REGISTER_EVENT_RECORD_FUNCTION (CUDA, EventRecordCUDA)
 
 REGISTER_EVENT_WAIT_FUNCTION (CUDA, CUDA, EventWaitCUDACUDA)
 
 REGISTER_EVENT_WAIT_FUNCTION (CPU, CUDA, EventWaitCPUCUDA)
 
 REGISTER_EVENT_WAIT_FUNCTION (CUDA, CPU, EventWaitCUDACPU)
 
 REGISTER_EVENT_FINISH_FUNCTION (CUDA, EventFinishCUDA)
 
 REGISTER_EVENT_QUERY_FUNCTION (CUDA, EventQueryCUDA)
 
 REGISTER_EVENT_ERROR_MESSAGE_FUNCTION (CUDA, EventErrorMessageCUDA)
 
 REGISTER_EVENT_SET_FINISHED_FUNCTION (CUDA, EventSetFinishedCUDA)
 
 REGISTER_EVENT_RESET_FUNCTION (CUDA, EventResetCUDA)
 
 REGISTER_EVENT_WAIT_FUNCTION (MKLDNN, CUDA, EventWaitCPUCUDA)
 
 REGISTER_EVENT_WAIT_FUNCTION (CUDA, MKLDNN, EventWaitCUDACPU)
 
 TEST (EventCUDATest, EventBasics)
 
 TEST (EventCPUTest, EventBasics)
 
 TEST (EventCPUTest, EventErrors)
 
 C10_DEFINE_REGISTRY (C10OperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
OperatorDef * AddOp (NetDef *netdef_ptr, string op_type, std::vector< string > inputs, std::vector< string > outputs)
 
bool MatchStrings (string p, string s)
 This allows for the use of * and | to match operator types, engines, or any other property that is represented by strings. More...
 
bool MatchArguments (const OperatorDef &p_op, const OperatorDef &g_op)
 This ensures that each named arg that exists in the pattern exists in g_op, is equal in value. More...
 
size_t miopenCompiledVersion ()
 
size_t miopenRuntimeVersion ()
 
void CheckMIOPENVersions ()
 
bool GlobalInitAlreadyRun ()
 Determine whether GlobalInit has already been run. More...
 
bool GlobalInit (int *pargc, char ***argv)
 Initialize the global environment of caffe2. More...
 
bool GlobalInit ()
 Initialize the global environment without command line arguments. More...
 
bool unsafeRunCaffe2InitFunction (const char *name, int *pargc, char ***pargv)
 
void QuitIfFeatureUnsupported (const bool cpu_has_feature, const string &feature)
 
static void WarnIfFeatureUnused (const bool cpu_has_feature, const string &feature)
 
bool Caffe2CheckIntrinsicsFeatures (int *, char ***)
 
 REGISTER_CAFFE2_INIT_FUNCTION (Caffe2CheckIntrinsicsFeatures, &Caffe2CheckIntrinsicsFeatures, "Check intrinsics compatibility between the CPU feature and the binary.")
 
 TEST (InitTest, TestInitFunctionHasRun)
 
 TEST (InitTest, CanRerunGlobalInit)
 
void LateRegisterInitFunction ()
 
void LateRegisterEarlyInitFunction ()
 
void LateRegisterFailInitFunction ()
 
 TEST (InitTest, FailLateRegisterInitFunction)
 
void run_schema_check (const NetDef &net)
 
static std::mutexgModuleChangeMutex ()
 
static CaffeMap< string, const ModuleSchema * > & MutableCurrentModules ()
 
static CaffeMap< string, void * > CurrentModuleHandles ()
 
const CaffeMap< string, const ModuleSchema * > & CurrentModules ()
 Current Modules present in the Caffe2 runtime. More...
 
bool HasModule (const string &name)
 Checks whether a module is already present in the current binary. More...
 
void LoadModule (const string &name, const string &filename="")
 Load a module. More...
 
 REGISTER_CPU_OPERATOR (Caffe2ModuleTestStaticDummy, Caffe2ModuleTestStaticDummyOp)
 
 OPERATOR_SCHEMA (Caffe2ModuleTestStaticDummy)
 
 TEST (ModuleTest, StaticModule)
 
 C10_DEFINE_REGISTRY (NetRegistry, NetBase, const std::shared_ptr< const NetDef > &, Workspace *)
 
void AddGlobalNetObserverCreator (NetObserverCreator creator)
 
void ClearGlobalNetObservers ()
 
unique_ptr< NetBaseCreateNet (const NetDef &net_def, Workspace *ws)
 Creates a network, accessing / creating blobs in the given workspace. More...
 
unique_ptr< NetBaseCreateNet (const std::shared_ptr< const NetDef > &net_def, Workspace *ws)
 
 C10_DECLARE_REGISTRY (NetRegistry, NetBase, const std::shared_ptr< const NetDef > &, Workspace *)
 
template<class TaskThreadPoolImpl , int device_type>
std::shared_ptr< TaskThreadPoolBaseGetAsyncNetThreadPool (int device_id, int pool_size, bool create_new)
 
 REGISTER_NET (async_scheduling, AsyncSchedulingNet)
 
 TEST (DagUtilTest, Empty)
 
 TEST (DagUtilTest, AllSync)
 
 TEST (DagUtilTest, AllAsync)
 
 TEST (DagUtilTest, Mixed0)
 
 TEST (DagUtilTest, Mixed1)
 
 TEST (DagUtilTest, Mixed2)
 
void testExecution (std::unique_ptr< NetBase > &net, int num_ops)
 
void checkChainingAndRun (const char *spec, const dag_utils::ExecutionChains &expected)
 
 TEST (NetTest, DISABLED_ChainingForDifferentDevices)
 
std::shared_ptr< AsyncTaskGraphBaseGetAsyncTaskGraph (ExecutorHelper *helper, const ExecutionOptions &options)
 
 C10_DEFINE_SHARED_REGISTRY (TaskGraphRegistry, AsyncTaskGraphBase, ExecutorHelper *, const ExecutionOptions &)
 
 C10_REGISTER_CREATOR (TaskGraphRegistry, futures, GetAsyncTaskGraph)
 
 REGISTER_NET (parallel, ParallelNet)
 
 C10_DECLARE_SHARED_REGISTRY (TaskGraphRegistry, AsyncTaskGraphBase, ExecutorHelper *, const ExecutionOptions &)
 
 REGISTER_NET (simple, SimpleNet)
 
 REGISTER_NET (simple_refcount, SimpleRefCountNet)
 
 TEST (NetTest, ConstructionNoDeclaredInputOutput)
 
 TEST (NetTest, ConstructionDeclaredInput)
 
 TEST (NetTest, ConstructionDeclaredOutput)
 
 TEST (NetTest, DeclaredInputInsufficient)
 
 TEST (NetDeathTest, DeclaredOutputNotMet)
 
void checkNumChainsAndRun (const char *spec, const int expected_num_chains)
 
 TEST (NetTest, DISABLED_ChainingForLinearModel)
 
 TEST (NetTest, DISABLED_ChainingForFork)
 
 TEST (NetTest, DISABLED_ChainingForForkJoin)
 
 TEST (NetTest, DISABLED_ChainingForwardBackward)
 
 TEST (NetTest, DISABLED_ChainingForHogwildModel)
 
 TEST (NetTest, DISABLED_FailingOperator)
 
 REGISTER_CPU_OPERATOR (ExecutorHelperDummy, ExecutorHelperDummyOp)
 
 OPERATOR_SCHEMA (ExecutorHelperDummy)
 
 TEST (NetTest, OperatorWithExecutorHelper)
 
 TEST (NetTest, DISABLED_OperatorWithDisabledEvent)
 
 TEST (NetTest, ExecutorOverride)
 
 TEST (NetTest, AsyncEmptyNet)
 
 TEST (NetTest, DISABLED_RunAsyncFailure)
 
 TEST (NetTest, NoTypeNet)
 
 REGISTER_CPU_OPERATOR (NotFinishingOp, NotFinishingOp)
 
 OPERATOR_SCHEMA (NotFinishingOp)
 
 TEST (NetTest, PendingOpsAndNetFailure)
 
 REGISTER_CPU_OPERATOR (AsyncErrorOp, AsyncErrorOp)
 
 OPERATOR_SCHEMA (AsyncErrorOp)
 
std::unique_ptr< NetBaseAsyncErrorNet (Workspace *ws, const std::string &net_name, bool throw_, bool fail_in_sync)
 
 TEST (NetTest, AsyncErrorOpTest)
 
 TEST (NetTest, AsyncErrorTimingsTest)
 
 REGISTER_CPU_OPERATOR (SyncErrorOp, SyncErrorOp)
 
 OPERATOR_SCHEMA (SyncErrorOp)
 
std::unique_ptr< NetBaseChainErrorNet (Workspace *ws, const std::string &net_name, bool throw_)
 
 TEST (NetTest, ChainErrorTest)
 
void testProfDAGNetErrorCase (bool test_error)
 
 TEST (NetTest, ProfDAGNetErrorTest)
 
 TEST (ObserverTest, TestNotify)
 
 TEST (ObserverTest, TestUniqueMap)
 
 TEST (ObserverTest, TestNotifyAfterDetach)
 
 TEST (ObserverTest, TestDAGNetBase)
 
const std::string OpRegistryKey (const std::string &op_type, const std::string &engine)
 
void SetPerOpEnginePref (const PerOpEnginePrefType &per_op_engine_pref)
 
void SetGlobalEnginePref (const GlobalEnginePrefType &global_engine_pref)
 
void SetEnginePref (const PerOpEnginePrefType &per_op_engine_pref, const GlobalEnginePrefType &global_engine_pref)
 
void SetOpEnginePref (const std::string &op_type, const CaffeMap< DeviceType, EnginePrefType > &op_pref)
 
unique_ptr< OperatorBaseCreateOperator (const OperatorDef &operator_def, Workspace *ws, int net_position)
 
std::map< DeviceType, OperatorRegistry * > * gDeviceTypeRegistry ()
 
 C10_DEFINE_REGISTRY (CPUOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
 CAFFE_REGISTER_DEVICE_TYPE (CPU, CPUOperatorRegistry)
 
 C10_DEFINE_REGISTRY (CUDAOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
 CAFFE_REGISTER_DEVICE_TYPE (CUDA, CUDAOperatorRegistry)
 
 C10_DEFINE_REGISTRY (HIPOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
 CAFFE_REGISTER_DEVICE_TYPE (HIP, HIPOperatorRegistry)
 
 C10_DEFINE_REGISTRY (GradientRegistry, GradientMakerBase, const OperatorDef &, const vector< GradientWrapper > &)
 
GradientOpsMeta GetGradientForOp (const OperatorDef &def, const vector< GradientWrapper > &g_output)
 Gets the GradientOpsMeta for the given operator def. More...
 
TensorShapes InferBlobShapesAndTypes (CaffeMap< string, TensorShape > &blob_desc, const vector< NetDef * > &nets)
 
void LoadInt8TensorInfoOfBlob (std::vector< float > *scale, std::vector< float > *offset, uint32_t *axis, const Blob *b)
 
TensorShape GetTensorShapeOfBlob (const Blob *b)
 
TensorShapes InferBlobShapesAndTypesFromWorkspace (Workspace *ws, const vector< NetDef * > &nets)
 
TensorShapes InferBlobShapesAndTypesFromMap (const CaffeMap< std::string, std::vector< int64_t > > &blob_dimensions, const vector< NetDef * > &nets)
 
TensorShapes InferBlobShapesAndTypesFromMap (const CaffeMap< std::string, std::vector< int64_t > > &blob_dimensions, const CaffeMap< std::string, TensorProto_DataType > &blob_types, const vector< NetDef * > &nets)
 
std::map< string, std::pair< DeviceOption, DeviceOption > > ValidateTensorDevices (OperatorBase &op, const OperatorDef &op_def)
 
std::set< std::string > GetRegisteredOperators ()
 
void SetOperatorLogger (std::function< void(const OperatorDef &)> tracer)
 
std::function< void(const OperatorDef &)> GetOperatorLogger ()
 
 C10_DEFINE_TYPED_REGISTRY (ExternalTensorFunctionsBaseRegistry, TypeIdentifier, ExternalTensorFunctionsBase, std::unique_ptr)
 
 C10_DECLARE_REGISTRY (CPUOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
 C10_DECLARE_REGISTRY (CUDAOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
 C10_DECLARE_REGISTRY (HIPOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
 C10_DECLARE_TYPED_REGISTRY (ExternalTensorFunctionsBaseRegistry, TypeIdentifier, ExternalTensorFunctionsBase, std::unique_ptr)
 
unique_ptr< ExternalTensorFunctionsBaseCreateExternalTensorFunctions (TypeIdentifier id)
 
 OPERATOR_SCHEMA (JustTest).NumInputs(0
 
 NumOutputs (0, 1)
 
 REGISTER_CUDA_OPERATOR (JustTest, JustTestCUDA)
 
 REGISTER_CUDNN_OPERATOR (JustTest, JustTestCUDNN)
 
 TEST (EnginePrefTest, GPUDeviceDefaultPreferredEngines)
 
 C10_DECLARE_REGISTRY (GradientRegistry, GradientMakerBase, const OperatorDef &, const vector< GradientWrapper > &)
 
C10_EXPORT std::ostream & operator<< (std::ostream &out, const OpSchema &schema)
 
template<typename T_I = int>
TensorShape CreateTensorShape (vector< T_I > dims, ::caffe2::TensorProto_DataType dt)
 
vector< int64_tGetDimsVector (const TensorShape &shape)
 
uint64_t nElemFromDim (const TensorShape &X, int dim=0)
 
uint64_t nElemBetweenDim (const TensorShape &X, int start, int stop)
 
std::pair< std::vector< DeviceOption >, std::vector< DeviceOption > > InferOpInputOutputDevice (const OperatorDef &op)
 
template<uint64_t OpsPerPoint>
OpSchema::Cost PointwiseCostInference (const OperatorDef &, const vector< TensorShape > &inputs)
 
dummy input Output (0, "out0", "dummy output.")
 
 TEST (OperatorSchemaTest, BasicSchema)
 
 NumInputs ({2, 4}).NumOutputs(
 
 TEST (OperatorSchemaTest, SpecifiedInputOutput)
 
 NumInputsOutputs ([](int in, int out) { return out==in||out==in *2;})
 
 TEST (OperatorSchemaTest, InputOutputRelation)
 
 SameNumberOfOutput ()
 
 TEST (OperatorSchemaTest, SameInputOutput)
 
 NumInputs (1, 5).NumOutputs(2
 
 OutputCalculator ([](int n) { return n+1;})
 
 TEST (OperatorSchemaTest, CalculateOutput)
 
 EnforceInplace ({{1, 1}})
 
 TEST (OperatorSchemaTest, Inplace)
 
 OPERATOR_SCHEMA (OpSchemaSameInputOutputTensorInference).IdenticalTypeAndShape()
 
 TEST (OperatorSchemaTest, TensorInferenceIdentical)
 
 TensorInferenceFunction ([](const OperatorDef &, const vector< TensorShape > &) { vector< TensorShape > shapes(1);shapes[0].set_data_type(TensorProto::FLOAT);shapes[0].add_dims(1701);return shapes;})
 
 TEST (OperatorSchemaTest, TensorInferenceArbitrary)
 
 TEST (OperatorSchemaTest, TestCastSchema)
 
 NumInputs (2) .NumOutputs(2) .CostInferenceFunction([](const OperatorDef &
 
 TEST (OperatorSchemaTest, TestCostInference)
 
 OPERATOR_SCHEMA (JustTestCPUOnly).NumInputs(0
 
 OPERATOR_SCHEMA (JustTestWithSomeOutput)
 
 REGISTER_CPU_OPERATOR (JustTest, JustTest)
 
 REGISTER_CPU_OPERATOR (JustTestCPUOnly, JustTest)
 
 REGISTER_CPU_OPERATOR_WITH_ENGINE (JustTest, FOO, JustTestAndNeverConstructs)
 
 REGISTER_CPU_OPERATOR_WITH_ENGINE (JustTest, BAR, JustTestAndDoesConstruct)
 
 REGISTER_CPU_OPERATOR_WITH_ENGINE (JustTest, BAZ, JustTestAndDoesConstruct)
 
 REGISTER_CUDA_OPERATOR (JustTest, JustTest)
 
 REGISTER_CPU_OPERATOR (JustTestWithSomeOutput, JustTestWithSomeOutput)
 
 TEST (OperatorTest, DeviceTypeRegistryWorks)
 
 TEST (OperatorTest, RegistryWorks)
 
 TEST (OperatorTest, RegistryWrongDevice)
 
 TEST (OperatorTest, ExceptionWorks)
 
 TEST (OperatorTest, FallbackIfEngineDoesNotBuild)
 
 TEST (OperatorTest, MultipleEngineChoices)
 
 TEST (OperatorTest, CannotUseUninitializedBlob)
 
 TEST (OperatorTest, TestParameterAccess)
 
 TEST (OperatorTest, CannotAccessParameterWithWrongType)
 
 TEST (OperatorTest, TestDefaultValue)
 
 TEST (OperatorTest, TestSetUp)
 
 TEST (OperatorTest, TestSetUpInputOutputCount)
 
 TEST (OperatorTest, TestOutputValues)
 
NetDef GetNetDefForTest ()
 
 TEST (NetTest, TestScaffoldingSimpleNet)
 
 TEST (NetTest, TestScaffoldingDAGNet)
 
 GRADIENT_OPERATOR_SCHEMA (FooGradient).NumInputs(1).NumOutputs(1)
 
 REGISTER_CPU_GRADIENT_OPERATOR_WITH_ENGINE (FooGradient, DUMMY_ENGINE, FooGradientDummyEngineOp) REGISTER_GRADIENT(Foo
 
 TEST (OperatorGradientRegistryTest, GradientSimple)
 
 TEST (EnginePrefTest, PerOpEnginePref)
 
 TEST (EnginePrefTest, GlobalEnginePref)
 
 TEST (EnginePrefTest, GlobalEnginePrefAndPerOpEnginePref)
 
 TEST (EnginePrefTest, GlobalEnginePrefAndPerOpEnginePrefAndOpDef)
 
 TEST (EnginePrefTest, SetOpEnginePref)
 
 TEST (EnginePrefTest, SetDefaultEngine)
 
 REGISTER_CPU_OPERATOR (JustTestWithRequiredArg, JustTestWithRequiredArg)
 
 NumInputs (0, 1) .NumOutputs(0
 
 Arg ("test_arg", "this arg is required", true)
 
 TEST (RequiredArg, Basic)
 
 REGISTER_CPU_OPERATOR (JustTestWithStandardIsTestArg, JustTestWithStandardIsTestArg)
 
 ArgIsTest ("this is_test arg is required")
 
 TEST (IsTestArg, standard)
 
 REGISTER_CPU_OPERATOR (JustTestWithNonStandardIsTestArg, JustTestWithNonStandardIsTestArg)
 
 Arg (OpSchema::Arg_IsTest, "this is_test arg is not required")
 
 TEST (IsTestArg, non_standard)
 
 OPERATOR_SCHEMA (Sleep).NumInputs(0
 
 REGISTER_CPU_OPERATOR (Sleep, SleepOp)
 
 REGISTER_CUDA_OPERATOR (Sleep, SleepOp)
 
 TEST (DAGNetTest, TestDAGNetTiming)
 
 TEST (SimpleNetTest, TestSimpleNetTiming)
 
 TEST (DAGNetTest, TestDAGNetTimingReadAfterRead)
 
 TEST (SimpleNetTest, TestSimpleNetTimingReadAfterRead)
 
 TEST (DAGNetTest, TestDAGNetTimingWriteAfterWrite)
 
 TEST (SimpleNetTest, TestSimpleNetTimingWriteAfterWrite)
 
 TEST (DAGNetTest, TestDAGNetTimingWriteAfterRead)
 
 TEST (SimpleNetTest, TestSimpleNetTimingWriteAfterRead)
 
 TEST (DAGNetTest, TestDAGNetTimingControlDependency)
 
 TEST (SimpleNetTest, TestSimpleNetTimingControlDependency)
 
bool RunPlanOnWorkspace (Workspace *ws, const PlanDef &plan, ShouldContinue shouldContinue)
 
 TEST (PlanExecutorTest, EmptyPlan)
 
 REGISTER_CPU_OPERATOR (StuckBlocking, StuckBlockingOp)
 
 OPERATOR_SCHEMA (StuckBlocking).NumInputs(0).NumOutputs(0)
 
 REGISTER_CPU_OPERATOR (Noop, NoopOp)
 
 OPERATOR_SCHEMA (Noop).NumInputs(0).NumOutputs(0)
 
 REGISTER_CPU_OPERATOR (StuckAsync, StuckAsyncOp)
 
 OPERATOR_SCHEMA (StuckAsync).NumInputs(0).NumOutputs(0)
 
 REGISTER_CPU_OPERATOR (Error, ErrorOp)
 
 OPERATOR_SCHEMA (Error).NumInputs(0).NumOutputs(0)
 
 REGISTER_CPU_OPERATOR (BlockingError, BlockingErrorOp)
 
 OPERATOR_SCHEMA (BlockingError).NumInputs(0).NumOutputs(0)
 
PlanDef parallelErrorPlan ()
 
PlanDef parallelErrorPlanWithCancellableStuckNet ()
 
PlanDef reporterErrorPlanWithCancellableStuckNet ()
 
 TEST (PlanExecutorTest, ErrorAsyncPlan)
 
 TEST (PlanExecutorTest, BlockingErrorPlan)
 
 TEST (PlanExecutorTest, ErrorPlanWithCancellableStuckNet)
 
 TEST (PlanExecutorTest, ReporterErrorPlanWithCancellableStuckNet)
 
PlanDef shouldStopWithCancelPlan ()
 
 TEST (PlanExecutorTest, ShouldStopWithCancel)
 
 CAFFE_KNOWN_TYPE (QTensor< CPUContext >)
 
template<typename F >
detail::ScopeGuardImplDecay< F > MakeGuard (F &&f) noexcept(noexcept(detail::ScopeGuardImplDecay< F >(static_cast< F && >(f))))
 ScopeGuard is a general implementation of the "Initialization is Resource Acquisition" idiom. More...
 
ExportedStatMap toMap (const ExportedStatList &stats)
 
 CAFFE_KNOWN_TYPE (Tensor)
 
TypeMeta GetTensorType (const void *c)
 
TypeMeta GetInt8TensorType (const void *c)
 
TypeCall GetTypeCallFunction (TypeIdentifier id)
 
void RegisterTypeCallFunction (TypeIdentifier id, TypeCall c)
 
vector< int64_tGetTensorInfo (const void *c, size_t *capacity, DeviceOption *device)
 
vector< int64_tGetInt8TensorInfo (const void *c, size_t *capacity, DeviceOption *device)
 
TensorInfoCall GetTensorInfoFunction (TypeIdentifier id)
 
void RegisterTensorInfoFunction (TypeIdentifier id, TensorInfoCall c)
 
void TensorVectorResize (std::vector< Tensor > &tensors, int size, DeviceType type)
 
Tensor empty (at::IntArrayRef dims, at::TensorOptions options)
 
void ReinitializeTensor (Tensor *t, at::IntArrayRef dims, at::TensorOptions options)
 Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if the Tensor already has correct size and data type. More...
 
void ReinitializeAndCopyFrom (Tensor *t, at::TensorOptions options, const Tensor &src, bool async)
 
template<typename T >
Tensor TensorCPUFromValues (at::IntArrayRef dims, at::ArrayRef< T > values)
 Creates a CPU tensor, and fills its contents with the given values. More...
 
 CAFFE_KNOWN_TYPE (int8::Int8TensorCPU)
 
 C10_DEFINE_REGISTRY (TransformRegistry, Transform)
 
unique_ptr< TransformCreateTransform (string key)
 
NetDef ApplyTransform (const string &key, const NetDef &netdef)
 
double average_net_run_duration (const NetDef &netdef, const NetDef &init_netdef, const int warmup_runs, const int main_runs)
 
NetDef ApplyTransformIfFaster (const string &key, const NetDef &netdef, const NetDef &init_netdef, const int warmup_runs, const int main_runs, const double improvement_threshold)
 
 C10_DECLARE_REGISTRY (TransformRegistry, Transform)
 
TensorProto::DataType TypeMetaToDataType (const TypeMeta meta)
 
const TypeMeta DataTypeToTypeMeta (const TensorProto::DataType &dt)
 
StorageOrder StringToStorageOrder (const string &str)
 
int32_t GetDimFromOrderString (const std::string &str)
 
constexpr char NameScopeSeparator ()
 
template<typename T >
bool fp16_type ()
 
template<>
bool fp16_type< at::Half > ()
 
 CAFFE_KNOWN_TYPE (WorkspaceTestFoo)
 
 TEST (WorkspaceTest, BlobAccess)
 
 TEST (WorkspaceTest, RunEmptyPlan)
 
 TEST (WorkspaceTest, Sharing)
 
 TEST (WorkspaceTest, BlobMapping)
 
static void forEachCheck (std::initializer_list< Workspace * > workspaces)
 Checks that Workspace::ForEach(f) applies f on the specified set of workspaces in any order. More...
 
 TEST (WorkspaceTest, ForEach)
 
std::string GetUniqueName ()
 
 REGISTER_CPU_OPERATOR (CreateDB, CreateDBOp< CPUContext >)
 
 OPERATOR_SCHEMA (CreateDB).NumInputs(0).NumOutputs(1)
 
 NO_GRADIENT (CreateDB)
 
 REGISTER_CUDA_OPERATOR (CreateDB, CreateDBOp< CUDAContext >)
 
static std::string encodeName (const std::string &name)
 
 REGISTER_CPU_OPERATOR (FileStoreHandlerCreate, FileStoreHandlerCreateOp< CPUContext >)
 
such as NFS This store handler is not built to be fast Its recommended use is for integration tests and prototypes where extra dependencies are cumbersome Use an ephemeral path to ensure multiple processes or runs don t interfere DOC Arg ("path", "base path used by the FileStoreHandler") .Arg("prefix"
 
such as NFS This store handler is not built to be fast Its recommended use is for integration tests and prototypes where extra dependencies are cumbersome Use an ephemeral path to ensure multiple processes or runs don t interfere DOC prefix for all keys used by this store Output (0, "handler", "unique_ptr<StoreHandler>")
 
 NO_GRADIENT (FileStoreHandlerCreateOp)
 
 REGISTER_CUDA_OPERATOR (FileStoreHandlerCreate, FileStoreHandlerCreateOp< CUDAContext >)
 
 REGISTER_CPU_OPERATOR (RedisStoreHandlerCreate, RedisStoreHandlerCreateOp< CPUContext >)
 
host name of Redis server Arg ("port", "port number of Redis server") .Arg("prefix"
 
 NO_GRADIENT (RedisStoreHandlerCreateOp)
 
 REGISTER_CUDA_OPERATOR (RedisStoreHandlerCreate, RedisStoreHandlerCreateOp< CUDAContext >)
 
 CAFFE_KNOWN_TYPE (std::unique_ptr< StoreHandler >)
 
 REGISTER_CPU_OPERATOR (StoreSet, StoreSetOp)
 
alternative key for the blob (optional)") .Input(0
 
alternative key for the unique_ptr< StoreHandlerInput (1, "data", "data blob")
 
 REGISTER_CPU_OPERATOR (StoreGet, StoreGetOp)
 
alternative key for the unique_ptr< StoreHandlerOutput (0, "data", "data blob")
 
 REGISTER_CPU_OPERATOR (StoreAdd, StoreAddOp)
 
the store initializes it to and then performs the add operation The operation returns the resulting counter value DOC Arg ("blob_name", "key of the counter (required)") .Arg("add_value"
 
the store initializes it to and then performs the add operation The operation returns the resulting counter value DOC value that is added (optional, default:1)") .Input(0
 
the store initializes it to and then performs the add operation The operation returns the resulting counter value DOC value that is unique_ptr< StoreHandlerOutput (0, "value", "the current value of the counter")
 
 REGISTER_CPU_OPERATOR (StoreWait, StoreWaitOp)
 
 NumInputs (1, 2) .NumOutputs(0) .SetDoc(R"DOC( Wait for the specified blob names to be set. The blob names can be passed either as an input blob with blob names or as an argument. )DOC") .Arg("blob_names"
 
names of the blobs to wait for (optional)") .Input(0
 
names of the blobs to wait unique_ptr< StoreHandlerInput (1, "names", "names of the blobs to wait for (optional)")
 
 REGISTER_CPU_OPERATOR (FC_Decomp, FullyConnectedOpDecomp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (FCGradient_Decomp, FullyConnectedDecompGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (FC_Decomp).NumInputs(4).NumOutputs(1)
 
 OPERATOR_SCHEMA (FCGradient_Decomp).NumInputs(4).NumOutputs(3
 
 REGISTER_GRADIENT (FC_Decomp, GetFCDecompGradient)
 
 REGISTER_CUDA_OPERATOR (FC_Decomp, FullyConnectedOpDecomp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (FCGradient_Decomp, FullyConnectedDecompGradientOp< float, CUDAContext >)
 
 REGISTER_CPU_OPERATOR (TTContraction, TTContractionOp< float, CPUContext >)
 
 REGISTER_CUDA_OPERATOR (TTContraction, TTContractionOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (TTContractionGradient, TTContractionGradientOp< float, CUDAContext >)
 
 REGISTER_IDEEP_COMPARE_OPERATOR (EQ)
 
 REGISTER_IDEEP_COMPARE_OPERATOR (GT)
 
 REGISTER_IDEEP_COMPARE_OPERATOR (GE)
 
 REGISTER_IDEEP_COMPARE_OPERATOR (LT)
 
 REGISTER_IDEEP_COMPARE_OPERATOR (LE)
 
 REGISTER_IDEEP_COMPARE_OPERATOR (NE)
 
 REGISTER_IDEEP_OPERATOR (Softmax, IDEEPFallbackOp< SoftmaxOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (LabelCrossEntropy, IDEEPFallbackOp< LabelCrossEntropyOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (AveragedLoss, IDEEPFallbackOp< AveragedLoss< float, CPUContext >, SkipIndices< 0 > >)
 
 REGISTER_IDEEP_OPERATOR (Flatten, IDEEPFallbackOp< FlattenOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (ResizeLike, IDEEPFallbackOp< ResizeLikeOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (Slice, IDEEPFallbackOp< SliceOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (Clip, IDEEPFallbackOp< ClipOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (ScatterAssign, IDEEPFallbackOp< ScatterAssignOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (Cast, IDEEPFallbackOp< CastOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (XavierFill, IDEEPFallbackOp< XavierFillOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (ConstantFill, IDEEPFallbackOp< ConstantFillOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (GaussianFill, IDEEPFallbackOp< GaussianFillOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (MSRAFill, IDEEPFallbackOp< MSRAFillOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (GivenTensorFill, IDEEPFallbackOp< GivenTensorFillOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (GivenTensorDoubleFill, IDEEPFallbackOp< GivenTensorFillOp< double, CPUContext >, SkipIndices< 0 > >)
 
 REGISTER_IDEEP_OPERATOR (GivenTensorBoolFill, IDEEPFallbackOp< GivenTensorFillOp< bool, CPUContext >, SkipIndices< 0 > >)
 
 REGISTER_IDEEP_OPERATOR (GivenTensorIntFill, IDEEPFallbackOp< GivenTensorFillOp< int, CPUContext >, SkipIndices< 0 > >)
 
 REGISTER_IDEEP_OPERATOR (GivenTensorInt64Fill, IDEEPFallbackOp< GivenTensorFillOp< int64_t, CPUContext >, SkipIndices< 0 > >)
 
 REGISTER_IDEEP_OPERATOR (GivenTensorStringFill, IDEEPFallbackOp< GivenTensorFillOp< std::string, CPUContext >, SkipIndices< 0 > >)
 
 REGISTER_IDEEP_OPERATOR (Load, IDEEPFallbackOp< LoadOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (Save, IDEEPFallbackOp< SaveOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (RMACRegions, IDEEPFallbackOp< RMACRegionsOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (RoIPool, IDEEPFallbackOp< RoIPoolOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (RoIAlign, IDEEPFallbackOp< RoIAlignOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (RoIAlignRotated, IDEEPFallbackOp< RoIAlignRotatedOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (GenerateProposals, IDEEPFallbackOp< GenerateProposalsOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (GenerateProposalsCPP, IDEEPFallbackOp< GenerateProposalsOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (CollectAndDistributeFpnRpnProposals, IDEEPFallbackOp< CollectAndDistributeFpnRpnProposalsOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (BoxWithNMSLimit, IDEEPFallbackOp< BoxWithNMSLimitOp< CPUContext >, SkipIndices< 0, 1, 2 > >)
 
 REGISTER_IDEEP_OPERATOR (BBoxTransform, IDEEPFallbackOp< BBoxTransformOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (AffineChannel, IDEEPFallbackOp< AffineChannelOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (StopGradient, IDEEPFallbackOp< StopGradientOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (PadImage, IDEEPFallbackOp< PadImageOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (PRelu, IDEEPFallbackOp< PReluOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (CTCGreedyDecoder, IDEEPFallbackOp< CTCGreedyDecoderOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (CTCBeamSearchDecoder, IDEEPFallbackOp< CTCBeamSearchDecoderOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (AveragedLossGradient, IDEEPFallbackOp< AveragedLossGradient< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (LabelCrossEntropyGradient, IDEEPFallbackOp< LabelCrossEntropyGradientOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (SoftmaxGradient, IDEEPFallbackOp< SoftmaxGradientOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (Iter, IDEEPFallbackOp< IterOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (LearningRate, IDEEPFallbackOp< LearningRateOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (Abs, IDEEPFallbackOp< UnaryElementwiseOp< TensorTypes< float >, CPUContext, AbsFunctor< CPUContext > > >)
 
 REGISTER_IDEEP_OPERATOR (Atan, IDEEPFallbackOp< UnaryElementwiseOp< TensorTypes< float >, CPUContext, AtanFunctor< CPUContext > > >)
 
 REGISTER_IDEEP_OPERATOR (Sqrt, IDEEPFallbackOp< UnaryElementwiseOp< TensorTypes< float >, CPUContext, SqrtFunctor< CPUContext > > >)
 
 REGISTER_IDEEP_OPERATOR (Sign, IDEEPFallbackOp< UnaryElementwiseOp< TensorTypes< float >, CPUContext, SignFunctor< CPUContext > > >)
 
 REGISTER_IDEEP_OPERATOR (Div, IDEEPFallbackOp< BinaryElementwiseOp< NumericTypes, CPUContext, DivFunctor< CPUContext > > >)
 
 REGISTER_IDEEP_OPERATOR (Mul, IDEEPFallbackOp< BinaryElementwiseOp< NumericTypes, CPUContext, MulFunctor< CPUContext > > >)
 
 REGISTER_IDEEP_OPERATOR (Sub, IDEEPFallbackOp< BinaryElementwiseOp< NumericTypes, CPUContext, SubFunctor< CPUContext > > >)
 
 REGISTER_IDEEP_OPERATOR (Tanh, IDEEPFallbackOp< UnaryElementwiseOp< TensorTypes< float >, CPUContext, TanhFunctor< CPUContext > > >)
 
 REGISTER_IDEEP_OPERATOR (L1Distance, IDEEPFallbackOp< L1DistanceOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (Scale, IDEEPFallbackOp< ScaleOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (Accuracy, IDEEPFallbackOp< AccuracyOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (AddGradient, IDEEPFallbackOp< BinaryElementwiseGradientOp< NumericTypes, CPUContext, AddFunctor< CPUContext > > >)
 
 REGISTER_IDEEP_OPERATOR (TanhGradient, IDEEPFallbackOp< BinaryElementwiseOp< TensorTypes< float >, CPUContext, TanhGradientFunctor< CPUContext > > >)
 
 REGISTER_IDEEP_OPERATOR (MulGradient, IDEEPFallbackOp< BinaryElementwiseGradientOp< NumericTypes, CPUContext, MulFunctor< CPUContext > > >)
 
 REGISTER_IDEEP_OPERATOR (TensorProtosDBInput, IDEEPFallbackOp< TensorProtosDBInput< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (CloseBlobsQueue, IDEEPFallbackOp< CloseBlobsQueueOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (SoftmaxWithLoss, IDEEPFallbackOp< SoftmaxWithLossOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (SoftmaxWithLossGradient, IDEEPFallbackOp< SoftmaxWithLossGradientOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (Expand, IDEEPFallbackOp< ExpandOp< TensorTypes< std::int32_t, std::int64_t, float, double >, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (Gather, IDEEPFallbackOp< GatherOp< CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (Normalize, IDEEPFallbackOp< NormalizeOp< float, CPUContext > >)
 
 REGISTER_IDEEP_OPERATOR (ReduceL2, IDEEPFallbackOp< ReduceOp< TensorTypes< float >, CPUContext, L2Reducer< CPUContext > > >)
 
 REGISTER_IDEEP_OPERATOR (ReduceSum, IDEEPFallbackOp< ReduceOp< TensorTypes< std::int32_t, std::int64_t, float, double >, CPUContext, SumReducer< CPUContext > > >)
 
 REGISTER_IDEEP_OPERATOR (ReduceMean, IDEEPFallbackOp< ReduceOp< TensorTypes< float >, CPUContext, MeanReducer< CPUContext > > >)
 
 REGISTER_IDEEP_OPERATOR (BatchMatMul, IDEEPFallbackOp< BatchMatMulOp< CPUContext > >)
 
 C10_DECLARE_REGISTRY (IDEEPOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
ideep::scale_t ConvertScales (const std::vector< float > scales_z)
 
ideep::tensor::dims CanonicalDims (ideep::tensor::dims adims, int32_t axis)
 
 CAFFE_KNOWN_TYPE (ideep::tensor)
 
 C10_DEFINE_REGISTRY (IDEEPOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
 CAFFE_REGISTER_DEVICE_TYPE (DeviceType::IDEEP, IDEEPOperatorRegistry)
 
 REGISTER_EVENT_CREATE_FUNCTION (IDEEP, EventCreateCPU)
 
 REGISTER_EVENT_RECORD_FUNCTION (IDEEP, EventRecordCPU)
 
 REGISTER_EVENT_WAIT_FUNCTION (IDEEP, IDEEP, EventWaitCPUCPU)
 
 REGISTER_EVENT_WAIT_FUNCTION (IDEEP, CPU, EventWaitCPUCPU)
 
 REGISTER_EVENT_WAIT_FUNCTION (CPU, IDEEP, EventWaitCPUCPU)
 
 REGISTER_EVENT_FINISH_FUNCTION (IDEEP, EventFinishCPU)
 
 REGISTER_EVENT_QUERY_FUNCTION (IDEEP, EventQueryCPU)
 
 REGISTER_EVENT_ERROR_MESSAGE_FUNCTION (IDEEP, EventErrorMessageCPU)
 
 REGISTER_EVENT_SET_FINISHED_FUNCTION (IDEEP, EventSetFinishedCPU)
 
 REGISTER_EVENT_RESET_FUNCTION (IDEEP, EventResetCPU)
 
 REGISTER_CPU_OPERATOR (ImageInput, ImageInputOp< CPUContext >)
 
INT_MAX TensorInferenceFunction ([](const OperatorDef &def, const vector< TensorShape > &) { vector< TensorShape > out(2);ArgumentHelper helper(def);int batch_size=helper.GetSingleArgument< int >("batch_size", 0);int crop=helper.GetSingleArgument< int >("crop", -1);int color=helper.GetSingleArgument< int >("color", 1);CHECK_GT(crop, 0);out[0]=CreateTensorShape(vector< int >{batch_size, crop, crop, color ? 3 :1}, TensorProto::FLOAT);out[1]=CreateTensorShape(vector< int >{1, batch_size}, TensorProto::INT32);return out;}) .SetDoc(R"DOC( Imports and processes images from a database. For each run of the operator
 
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial image (optional) - The image is rescaled either up or down(with the scale argument) or just up(with the minsize argument) - The image is randomly cropped(crop size is passed as an argument but the location of the crop is random except if is_test is passed in which case the image in cropped at the center) - The image is normalized. Each of its color channels can have separate normalization values The dimension of the output image will always be cropxcrop) DOC") .Arg( "batch_size"
 
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the operator" ".Must be 1 or greater") .Arg ("color", "Number of color channels (1 or 3). Defaults to 1") .Arg("color_jitter"
 
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the Whether or not to do color jitter Defaults to Arg ("img_saturation", "Image saturation scale used in color jittering. " "Defaults to 0.4") .Arg("img_brightness"
 
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the Whether or not to do color jitter Defaults to Image brightness scale used in color jittering Defaults to Arg ("img_contrast", "Image contrast scale used in color jittering. " "Defaults to 0.4") .Arg("color_lighting"
 
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the Whether or not to do color jitter Defaults to Image brightness scale used in color jittering Defaults to Whether or not to do color lighting Defaults to Arg ("color_lighting_std", "Std of normal distribution where color lighting" " scaling factor is sampled. Defaults to 0.1") .Arg("scale_jitter_type"
 
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the Whether or not to do color jitter Defaults to Image brightness scale used in color jittering Defaults to Whether or not to do color lighting Defaults to Scale the size of the smallest dimension of the image to this Scale and minsize are mutually exclusive Must be larger than crop Arg ("minsize", "Scale the size of the smallest dimension of the image to" " this only if the size is initially smaller. Scale and minsize are" " mutually exclusive. Must be larger than crop.") .Arg("warp"
 
the other dimension is proportionally scaled Defaults to Arg ("crop", "Size to crop the image to. Must be provided") .Arg("mirror"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Arg ("mean", "Mean by which to normalize color channels." " Defaults to 0.") .Arg("mean_per_channel"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color channel (1 or 3 elements). Defaults to mean argument. Channel order BGR") .Arg( "std"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Arg ("std_per_channel", "Vector of standard dev. per color channel " " (1 or 3 elements). Defaults to std argument. Channel order is BGR") .Arg("bounding_ymin"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults to (none)") .Arg("bounding_xmin"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Arg ("use_gpu_transform", "1 if GPU acceleration should be used." " Defaults to 0. Can only be 1 in a CUDAContext") .Arg("decode_threads"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Arg ("output_type", "If gpu_transform, can set to FLOAT or FLOAT16.") .Arg("db"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Name of the database (if not passed as input)") .Arg( "db_type"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Name of the Type of The sizes of any outputs besides the data and label (should have a number of elements equal to the number of additional " "outputs)") .Arg( "random_scale"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Name of the Type of The sizes of any outputs besides the data and shortest side desired for image resize Defaults to[-1, -1] or no random resize desired Input (0, "reader", "The input reader (a db::DBReader)") .Output(0
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Name of the Type of The sizes of any outputs besides the data and shortest side desired for image resize Defaults to[-1, -1] or no random resize desired Tensor containing the images Output (1, "label", "Tensor containing the labels") .Output(2
 
 NO_GRADIENT (ImageInput)
 
template<class Context >
bool RandomSizedCropping (cv::Mat *img, const int crop, std::mt19937 *randgen)
 
template<class Context >
void Saturation (float *img, const int img_size, const float alpha_rand, std::mt19937 *randgen)
 
template<class Context >
void Brightness (float *img, const int img_size, const float alpha_rand, std::mt19937 *randgen)
 
template<class Context >
void Contrast (float *img, const int img_size, const float alpha_rand, std::mt19937 *randgen)
 
template<class Context >
void ColorJitter (float *img, const int img_size, const float saturation, const float brightness, const float contrast, std::mt19937 *randgen)
 
template<class Context >
void ColorLighting (float *img, const int img_size, const float alpha_std, const std::vector< std::vector< float > > &eigvecs, const std::vector< float > &eigvals, std::mt19937 *randgen)
 
template<class Context >
void ColorNormalization (float *img, const int img_size, const int channels, const std::vector< float > &mean, const std::vector< float > &std)
 
template<class Context >
void TransformImage (const cv::Mat &scaled_img, const int channels, float *image_data, const bool color_jitter, const float saturation, const float brightness, const float contrast, const bool color_lighting, const float color_lighting_std, const std::vector< std::vector< float > > &color_lighting_eigvecs, const std::vector< float > &color_lighting_eigvals, const int crop, const bool mirror, const std::vector< float > &mean, const std::vector< float > &std, std::mt19937 *randgen, std::bernoulli_distribution *mirror_this_image, bool is_test=false)
 
template<class Context >
void CropTransposeImage (const cv::Mat &scaled_img, const int channels, uint8_t *cropped_data, const int crop, const bool mirror, std::mt19937 *randgen, std::bernoulli_distribution *mirror_this_image, bool is_test=false)
 
 REGISTER_CUDA_OPERATOR (ImageInput, ImageInputOp< CUDAContext >)
 
template<typename T_IN , typename T_OUT , class Context >
bool TransformOnGPU (Tensor &X, Tensor *Y, Tensor &mean, Tensor &std, Context *context)
 
bool tryConvertToMPSCNN (const NetDef &initNet, const NetDef &predictNet, NetDef *mpscnnPredictNet)
 
NetDef annotateDefWithReadCounts (const NetDef &net)
 
NetDef rewriteForMetal (const NetDef &net)
 
NetDef runMPSCNNFusion (const NetDef &net)
 
void dumpDef (const NetDef &d)
 
void mpscnnRecordExecutionFinish ()
 
MPSCNNContextgetMPSCNNContext ()
 
Analysis analyzeNet (const NetDef &net)
 
static void rewriteInput (OperatorDef *op, int i)
 
static void rewriteOutput (OperatorDef *op, int i)
 
static void insertOutputCopyFromMPSCNNOp (NetDef &predictNet, const std::vector< std::string > &cpu_blobs)
 
NetDef insertInputOutputCopyOps (const NetDef &def)
 
bool nextIsOnlyUserOfCurrent (const Analysis &analysis, size_t currentIdx, const OperatorDef &currentOp, const OperatorDef &nextOp)
 
bool tryFuseAdjacentOps (const Analysis &analysis, size_t currentIdx, const OperatorDef &currentOp, const OperatorDef &nextOp, OperatorDef *fusedOp)
 
bool tryConvertToMPSCNNIntermediateCopies (const NetDef &initNet, const NetDef &predictNet, NetDef *mpscnnPredictNet)
 
NetDef setSpecialArgs (const NetDef &def)
 
void testMPSCNN ()
 
void compareModels (const NetDef &initNet, NetDef predictNet)
 
void ver