|
void | swap (Blob &lhs, Blob &rhs) |
|
std::ostream & | operator<< (std::ostream &out, const Blob &v) |
|
| CAFFE_KNOWN_TYPE (c10::intrusive_ptr< LinearPackedParamsBase >) |
|
void | reportTime (std::string type, double ts, std::string metric, std::string unit) |
|
void | splitSizes (const std::string &arg, int *ptr0, int *ptr1) |
|
cv::Mat | resizeImage (cv::Mat &img) |
|
cv::Mat | cropToRec (cv::Mat &img, int *height_ptr, int *width_ptr) |
|
std::vector< float > | convertToVector (cv::Mat &img) |
|
std::vector< float > | convertOneImage (std::string &filename, int *height_ptr, int *width_ptr) |
|
int | getBatchSize (int num_items) |
|
TensorProtos | writeValues (std::vector< std::vector< std::vector< float > > > &values, std::vector< std::vector< int > > &dims) |
|
TensorProtos | convertImages (std::string &image_file) |
|
template<class TYPE > |
vector< TYPE > | splitString (std::string &line) |
|
TensorProtos | convertValues (std::string &file_name) |
|
void | ConvertToRawDataset (const string &input_db_name, const string &output_db_name) |
|
void | writeValues (std::vector< std::vector< std::vector< float > > > &values, std::vector< std::vector< int > > &dims, std::string output_file) |
|
void | convertImages () |
|
void | convertValues () |
|
void | ReadImage (std::ifstream *file, int *label, char *buffer) |
|
void | WriteToDB (const string &filename, const int num_items, const int &offset, db::DB *db) |
|
void | ConvertCIFAR () |
|
void | ConvertImageDataset (const string &input_folder, const string &list_filename, const string &output_db_name, const bool) |
|
uint32_t | swap_endian (uint32_t val) |
|
void | convert_dataset (const char *image_filename, const char *label_filename, const char *db_path, const int data_limit) |
|
void | run () |
|
static int | Split (int argc, char **argv) |
|
| CAFFE_KNOWN_TYPE (detail::_guard_long_unique< long >) |
|
constexpr bool | operator< (TypeIdentifier lhs, TypeIdentifier rhs) |
|
std::ostream & | operator<< (std::ostream &stream, caffe2::TypeIdentifier typeId) |
|
template<> |
constexpr C10_EXPORT uint16_t | TypeMeta::_typeMetaData< detail::_Uninitialized > () noexcept |
|
bool | operator== (const TypeMeta lhs, const TypeMeta rhs) noexcept |
|
bool | operator!= (const TypeMeta lhs, const TypeMeta rhs) noexcept |
|
std::ostream & | operator<< (std::ostream &stream, caffe2::TypeMeta typeMeta) |
|
| REGISTER_CPU_OPERATOR (ATen, ATenOp< CPUContext >) |
|
| OPERATOR_SCHEMA (ATen) |
|
| REGISTER_CUDA_OPERATOR (ATen, ATenOp< CUDAContext >) |
|
vector< TensorShape > | TensorInferenceForBatchMatMul (const OperatorDef &def, const vector< TensorShape > &in) |
|
OpSchema::Cost | CostInferenceForBatchMatMul (const OperatorDef &def, const vector< TensorShape > &in) |
|
| REGISTER_CPU_OPERATOR (BatchMatMulFP16Fake, BatchMatMulFP16FakeOp< CPUContext >) |
|
| NumInputs (2) .NumOutputs(1) .SetDoc(R"DOC( Batch Matrix multiplication Yi = Ai * Bi |
|
where A has | shape (dim0, dim1,... M, K) |
|
where A has B has | shape (dim0, dim1,... K, N) |
|
| REGISTER_CPU_OPERATOR (SumFakeFp16, SumFP16FP16AccOp< CPUContext >) |
|
| OPERATOR_SCHEMA (SumFakeFp16).NumInputs(1 |
|
INT_MAX | NumOutputs (1, INT_MAX) |
|
| REGISTER_CPU_OPERATOR (AddFakeFp16, BinaryElementwiseOp< TensorTypes< float, int >, CPUContext, FP16PairWiseCPUFunctor< AddFunctor< CPUContext > > >) |
|
| OPERATOR_SCHEMA (AddFakeFp16).NumInputs(2).NumOutputs(1) |
|
| REGISTER_CPU_OPERATOR (DivFakeFp16, BinaryElementwiseOp< TensorTypes< float >, CPUContext, FP16PairWiseCPUFunctor< DivFunctor< CPUContext > > >) |
|
| OPERATOR_SCHEMA (DivFakeFp16).NumInputs(2).NumOutputs(1) |
|
| REGISTER_CPU_OPERATOR (MulFakeFp16, BinaryElementwiseOp< TensorTypes< float >, CPUContext, FP16PairWiseCPUFunctor< MulFunctor< CPUContext > > >) |
|
| OPERATOR_SCHEMA (MulFakeFp16).NumInputs(2).NumOutputs(1) |
|
| REGISTER_CPU_OPERATOR (SubFakeFp16, BinaryElementwiseOp< TensorTypes< float >, CPUContext, FP16PairWiseCPUFunctor< SubFunctor< CPUContext > > >) |
|
| OPERATOR_SCHEMA (SubFakeFp16).NumInputs(2).NumOutputs(1) |
|
| REGISTER_CPU_OPERATOR (Fp16FCAcc32, Fp16FCAccOp< CPUContext, DefaultEngine, false, true, false >) |
|
NumInputs(3) .NumOutputs(1) .TensorInferenceFunction(std | REGISTER_CPU_OPERATOR (Fp16FCAcc16, Fp16FCAccOp< CPUContext, DefaultEngine, true, true, false >) |
|
NumInputs(3) .NumOutputs(1) .TensorInferenceFunction(std | REGISTER_CPU_OPERATOR (Fp16FCAcc32NNPI, Fp16FCAccOp< CPUContext, DefaultEngine, false, false, true >) |
|
NumInputs(3) .NumOutputs(1) .TensorInferenceFunction(std | REGISTER_CPU_OPERATOR (Fp16FCAcc16NNPI, Fp16FCAccOp< CPUContext, DefaultEngine, true, false, true >) |
|
void | transpose (const float *A, std::vector< float > &A_trans, int M, int N) |
|
void | custom_fp16_gemm_with_trans (const CBLAS_TRANSPOSE trans_A, const CBLAS_TRANSPOSE trans_B, const int m, const int k, const int n, const float *A, const float *B, const float beta, float *C, const bool use_acc_fp16, const bool use_temp_accumulator) |
|
static __m256 | clamp_subnormals (__m256 input, const float epsilon_) |
|
void | custom_fp16_gemm (const int m, const int k, const int n, const float *A_fp16, const float *B_fp16, const float beta, float *C, const bool use_acc_fp16, const bool use_temp_accumulator) |
|
void | custom_fp16_gemv (const bool use_acc_fp16, const bool use_custom_acc32, const bool use_temp_accumulator, const CBLAS_TRANSPOSE trans_A, const int M, const int N, const float alpha, const float *A, const float *x, const float beta, float *y, CPUContext *context) |
|
void | custom_fp16_gemm_batched (const bool use_acc_fp16, const bool use_custom_acc32, const bool use_temp_accumulator, const CBLAS_TRANSPOSE trans_A, const CBLAS_TRANSPOSE trans_B, const int batch_size, const int M, const int N, const int K, const float alpha, const float **A, const float **B, const float beta, float **C, CPUContext *context) |
|
void | custom_fp16_gemm_strided_batched (const bool use_acc_fp16, const bool use_custom_acc32, const bool use_temp_accumulator, const CBLAS_TRANSPOSE trans_A, const CBLAS_TRANSPOSE trans_B, const int batch_size, const int M, const int N, const int K, const float alpha, const float *A, const int A_stride, const float *B, const int B_stride, const float beta, float *C, const int C_stride, CPUContext *context) |
|
void | transpose (const float *A, float *A_trans, int M, int N) |
|
| REGISTER_CPU_OPERATOR (Int8DequantizeNNPI, int8::Int8DequantizeNNPIOp) |
|
| IdenticalTypeAndShape () .NumInputs(1) .NumOutputs(1) .Input(0 |
|
Int8 Tensor qX | Output (0, "Y", "FP32 Tensor that represents mapped real value of qX.") |
|
| REGISTER_CPU_OPERATOR (Int8QuantizeNNPI, int8::Int8QuantizeNNPIOp) |
|
Output tensor quantization scale | Arg ("Y_zero_point", "Output tensor quantization offset") .NumInputs(1) .NumOutputs(1) .Input(0 |
|
Output tensor quantization scale FP32 Tensor X | Output (0, "Y", "Int8 Tensor qX representing X with linear quantization.") |
|
| REGISTER_CPU_OPERATOR (SwishFakeInt8NNPI, int8::SwishInt8NNPIOp) |
|
Inout tensor quantization scale | Arg ("X_zero_point", "Input tensor quantization offset") .Arg("Y_scale" |
|
| REGISTER_CPU_OPERATOR (LayerNormFakeFP16NNPI, LayerNormFakeFp16Op< false >) |
|
| OPERATOR_SCHEMA (LayerNormFakeFP16NNPI).NumInputs( |
|
| NumOutputs (3) |
|
| REGISTER_CPU_OPERATOR (LayerNormInt8QuantizeFakeNNPI, LayerNormFakeFp16Op< true >) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsSumFused4BitRowwiseFakeFP16NNPI, SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, false >) |
|
| NumInputs (3) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext |
|
false SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, false >::LENGTHS | SetDoc (R"DOC(
Performs the same operation as SparseLengthsSum, but operating on
4-bit rowwise quantized matrices with fused storage (where each row
stores quantized values, and then 2-byte scale and 2-byte bias).
)DOC") .Input(0 |
|
false SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, false >::LENGTHS uint8 tensor obtained with | operator FloatToFused4BitRowwiseQuantized") .Input (1, "INDICES", "Integer vector containing indices of the first " "dimension of DATA for the slices that are being aggregated") .Input(2 |
|
| NO_GRADIENT (SparseLengthsSumFused4BitRowwiseFakeFP16NNPI) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsSumFused4BitRowwiseFakeFP16EmbeddingOnly, SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, false, true >) |
|
true SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, false, true >::LENGTHS | SetDoc (R"DOC(
Performs the same operation as SparseLengthsSum, but operating on
4-bit rowwise quantized matrices with fused storage (where each row
stores quantized values, and then 2-byte scale and 2-byte bias).
Convert only embedding entries using fake fp16.
)DOC") .Input(0 |
|
| NO_GRADIENT (SparseLengthsSumFused4BitRowwiseFakeFP16EmbeddingOnly) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused4BitRowwiseFakeFP16NNPI, SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, true >) |
|
| NumInputs (4) .NumOutputs(1) .WeightedValueKeyLengthInputFillers(SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext |
|
true SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, true >::WEIGHTS | SetDoc (R"DOC(
Performs the same operation as SparseLengthsWeightedSum,
but operating on 4-bit rowwise quantized matrices with fused storage
(where each row stores quantized values, and then 2-byte scale and 2-byte bias).
)DOC") .Input(0 |
|
true SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, true >::WEIGHTS uint8 tensor obtained with Vector with the same sum of elements as the first dimension of DATA | Input (3, "WEIGHTS", "Vector of weights to scale rows of DATA with before reduction") .Output(0 |
|
| NO_GRADIENT (SparseLengthsWeightedSumFused4BitRowwiseFakeFP16NNPI) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused4BitRowwiseFakeFP16EmbeddingOnly, SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, true, true >) |
|
true SparseLengthsFused4BitRowwiseFakeFP16Op< CPUContext, true, true >::WEIGHTS | SetDoc (R"DOC(
Performs the same operation as SparseLengthsWeightedSum,
but operating on 4-bit rowwise quantized matrices with fused storage
(where each row stores quantized values, and then 2-byte scale and 2-byte bias).
Convert only embedding entries using fake fp16.
)DOC") .Input(0 |
|
| NO_GRADIENT (SparseLengthsWeightedSumFused4BitRowwiseFakeFP16EmbeddingOnly) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsSumFused8BitRowwiseFakeFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext >) |
|
NumInputs(3) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext > | NO_GRADIENT (SparseLengthsSumFused8BitRowwiseFakeFP16) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsSumFused8BitRowwiseFakeFP16EmbeddingOnly, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, false, false, false, false, true >) |
|
true SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, false, false, false, false, true >::LENGTHS | SetDoc (R"DOC(
Performs the same operation as SparseLengthsSum, but operating on
8-bit rowwise quantized matrices with fused storage (where each row
stores quantized values, and then 4-byte scale and 4-byte bias).
Convert only embedding entries using fake fp16.
)DOC") .Input(0 |
|
true SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, false, false, false, false, true >::LENGTHS uint8 tensor obtained with | operator FloatToFused8BitRowwiseQuantized") .Input (1, "INDICES", "Integer vector containing indices of the first " "dimension of DATA for the slices that are being aggregated") .Input(2 |
|
| NO_GRADIENT (SparseLengthsSumFused8BitRowwiseFakeFP16EmbeddingOnly) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsSumFused8BitRowwiseFakeFP16NNPI, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, false, true, false, true >) |
|
true SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, false, true >::LENGTHS | SetDoc (R"DOC(
Performs the same operation as SparseLengthsSum, but operating on
8-bit rowwise quantized matrices with fused storage (where each row
stores quantized values, and then 4-byte scale and 4-byte bias).
)DOC") .Input(0 |
|
| NO_GRADIENT (SparseLengthsSumFused8BitRowwiseFakeFP16NNPI) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsSumFused8BitRowwiseFakeFP32NNPI, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, false, false, false, false, false, true >) |
|
| NO_GRADIENT (SparseLengthsSumFused8BitRowwiseFakeFP32NNPI) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsSumFused8BitRowwiseFakeFP16AccFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, false, true >) |
|
| NO_GRADIENT (SparseLengthsSumFused8BitRowwiseFakeFP16AccFP16) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsSumFused8BitRowwiseFakeFP16AccInvScaleFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, 0, true, true >) |
|
| NO_GRADIENT (SparseLengthsSumFused8BitRowwiseFakeFP16AccInvScaleFP16) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true >) |
|
true SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true >::WEIGHTS | SetDoc (R"DOC(
Performs the same operation as SparseLengthsWeightedSum,
but operating on 8-bit rowwise quantized matrices with fused storage
(where each row stores quantized values, and then 4-byte scale and 4-byte bias).
)DOC") .Input(0 |
|
| NO_GRADIENT (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16EmbeddingOnly, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true, false, false, false, false, true >) |
|
true SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true, false, false, false, false, true >::WEIGHTS | SetDoc (R"DOC(
Performs the same operation as SparseLengthsWeightedSum,
but operating on 8-bit rowwise quantized matrices with fused storage
(where each row stores quantized values, and then 4-byte scale and 4-byte bias).
Convert only embedding entries using fake fp16.
)DOC") .Input(0 |
|
| NO_GRADIENT (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16EmbeddingOnly) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16AccFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true, false, true >) |
|
| NO_GRADIENT (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16AccFP16) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16NNPI, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true, false, true, false, true >) |
|
| NO_GRADIENT (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16NNPI) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused8BitRowwiseFakeFP32NNPI, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true, false, false, false, false, false, true >) |
|
| NO_GRADIENT (SparseLengthsWeightedSumFused8BitRowwiseFakeFP32NNPI) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16AccInvScaleFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, true, false, true, true >) |
|
| NO_GRADIENT (SparseLengthsWeightedSumFused8BitRowwiseFakeFP16AccInvScaleFP16) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsMeanFused8BitRowwiseFakeFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, true >) |
|
true SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, true >::LENGTHS | SetDoc (R"DOC(
Performs the same operation as SparseLengthsMean, but
operating on 8-bit rowwise quantized matrices with fused storage
(where each row stores quantized values, and then 4-byte scale and 4-byte bias).
)DOC") .Input(0 |
|
| NO_GRADIENT (SparseLengthsMeanFused8BitRowwiseFakeFP16) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsMeanFused8BitRowwiseFakeFP16AccFP16, SparseLengthsFused8BitRowwiseFakeFP16Op< CPUContext, false, true, true >) |
|
| NO_GRADIENT (SparseLengthsMeanFused8BitRowwiseFakeFP16AccFP16) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsSumFakeFP16, SparseLengthsSumOp) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFakeFP16, SparseLengthsWeightedSumOp) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsMeanFakeFP16, SparseLengthsMeanOp) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsSumFakeFP16AccFP16, SparseLengthsSumAccFP16Op) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFakeFP16AccFP16, SparseLengthsWeightedSumAccFP16Op) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsMeanFakeFP16AccFP16, SparseLengthsMeanAccFP16Op) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsSumFakeFP16EmbeddingOnly, SparseLengthsSumFakeFP16EmbeddingOnlyOp) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFakeFP16EmbeddingOnly, SparseLengthsWeightedSumFakeFP16EmbeddingOnlyOp) |
|
| REGISTER_CPU_OPERATOR (SparseLengthsMeanFakeFP16EmbeddingOnly, SparseLengthsMeanFakeFP16EmbeddingOnlyOp) |
|
template<typename Def > |
string | FormatDoc () |
|
NumInputs(SparseLengthsSumDef::ForwardOp::kNumInputs) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsSumOp | NO_GRADIENT (SparseLengthsSumFakeFP16) |
|
NumInputs(SparseLengthsWeightedSumDef::ForwardOp::kNumInputs) .NumOutputs(1) .WeightedValueKeyLengthInputFillers(SparseLengthsWeightedSumOp | NO_GRADIENT (SparseLengthsWeightedSumFakeFP16) |
|
NumInputs(SparseLengthsMeanDef::ForwardOp::kNumInputs) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsMeanOp | NO_GRADIENT (SparseLengthsMeanFakeFP16) |
|
NumInputs(SparseLengthsSumDef::ForwardOp::kNumInputs) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsSumOp | NO_GRADIENT (SparseLengthsSumFakeFP16AccFP16) |
|
NumInputs(SparseLengthsWeightedSumDef::ForwardOp::kNumInputs) .NumOutputs(1) .WeightedValueKeyLengthInputFillers(SparseLengthsWeightedSumOp | NO_GRADIENT (SparseLengthsWeightedSumFakeFP16AccFP16) |
|
NumInputs(SparseLengthsMeanDef::ForwardOp::kNumInputs) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsMeanOp | NO_GRADIENT (SparseLengthsMeanFakeFP16AccFP16) |
|
NumInputs(SparseLengthsSumDef::ForwardOp::kNumInputs) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsSumFakeFP16EmbeddingOnlyOp | NO_GRADIENT (SparseLengthsSumFakeFP16EmbeddingOnly) |
|
NumInputs(SparseLengthsWeightedSumDef::ForwardOp::kNumInputs) .NumOutputs(1) .WeightedValueKeyLengthInputFillers(SparseLengthsWeightedSumFakeFP16EmbeddingOnlyOp | NO_GRADIENT (SparseLengthsWeightedSumFakeFP16EmbeddingOnly) |
|
NumInputs(SparseLengthsMeanDef::ForwardOp::kNumInputs) .NumOutputs(1) .ValueKeyLengthInputFillers(SparseLengthsMeanFakeFP16EmbeddingOnlyOp | NO_GRADIENT (SparseLengthsMeanFakeFP16EmbeddingOnly) |
|
| REGISTER_CPU_OPERATOR (TanhQuantFakeFp16NNPI, TanhInt8QuantizeNNPIOp) |
|
| Arg ("Y_scale", "Output tensor quantization scale") .Arg("Y_zero_point" |
|
Output tensor quantization offset | NumInputs (1) .NumOutputs(1) .SetDoc(R"DOC( Apply TanH and convert the result to Int8. <details> </details> )DOC") .Input(0 |
|
Output tensor quantization offset Float Tensor X | Output (0, "Y", "Int8 Tensor Y.") |
|
| REGISTER_CPU_OPERATOR (SpatialBNFakeLoweredFp16NNPI, SpatialBNFakeLoweredFp16Op) |
|
| OPERATOR_SCHEMA (SpatialBNFakeLoweredFp16NNPI).NumInputs( |
|
| NumOutputs (1) |
|
| REGISTER_CPU_OPERATOR (SpatialBNFakeFp16NNPI, SpatialBNFakeFp16Op) |
|
| OPERATOR_SCHEMA (SpatialBNFakeFp16NNPI).NumInputs( |
|
OpSchema::Cost | CostInferenceForRelu (const OperatorDef &def, const vector< TensorShape > &in) |
|
| REGISTER_CPU_OPERATOR (ReluFakeFp16, UnaryElementwiseOp< TensorTypes< float >, CPUContext, ReluFakeFp16Functor< CPUContext > >) |
|
| CostInferenceFunction (CostInferenceForRelu) .IdenticalTypeAndShape() .SetDoc(R"DOC( Applies rectified linear unit operation to the input data element-wise. The Relu operation takes one input $X$ |
|
| CAFFE_KNOWN_TYPE (::gloo::float16) |
|
| CAFFE_KNOWN_TYPE (std::shared_ptr<::gloo::Context >) |
|
nccl::NCCLExecution | getNCCLElements (OperatorBase *op, const CUDAContext &context) |
|
| REGISTER_CPU_OPERATOR_WITH_ENGINE (Conv, NNPACK, NNPACKConvOp) |
|
| REGISTER_CPU_OPERATOR_WITH_ENGINE (MaxPool, NNPACK, NNPACKMaxPoolOp) |
|
| REGISTER_CPU_OPERATOR_WITH_ENGINE (Relu, NNPACK, NNPACKReluOp) |
|
| REGISTER_CPU_OPERATOR_WITH_ENGINE (LeakyRelu, NNPACK, NNPACKLeakyReluOp) |
|
| CAFFE_KNOWN_TYPE (Tensor< OpenCLContext >) |
|
void | EventCreateOPENCL (const DeviceOption &, Event *) |
|
void | EventRecordOPENCL (Event *, const void *, const char *) |
|
void | EventWaitOPENCL (const Event *, void *) |
|
void | EventFinishOPENCL (const Event *) |
|
void | EventResetOPENCL (Event *) |
|
| REGISTER_EVENT_CREATE_FUNCTION (OPENCL, EventCreateOPENCL) |
|
| REGISTER_EVENT_RECORD_FUNCTION (OPENCL, EventRecordOPENCL) |
|
| REGISTER_EVENT_WAIT_FUNCTION (OPENCL, OPENCL, EventWaitOPENCL) |
|
| REGISTER_EVENT_FINISH_FUNCTION (OPENCL, EventFinishOPENCL) |
|
| REGISTER_EVENT_RESET_FUNCTION (OPENCL, EventResetOPENCL) |
|
| OPERATOR_SCHEMA (CudaProfileInitialize) |
|
| OPERATOR_SCHEMA (CudaProfileStart) |
|
| OPERATOR_SCHEMA (CudaProfileStop) |
|
| REGISTER_CPU_OPERATOR (CudaProfileInitialize, CudaProfileInitializeOp) |
|
| REGISTER_CPU_OPERATOR (CudaProfileStart, CudaProfileStartOp) |
|
| REGISTER_CPU_OPERATOR (CudaProfileStop, CudaProfileStopOp) |
|
| REGISTER_CUDA_OPERATOR (CudaProfileInitialize, CudaProfileInitializeOp) |
|
| REGISTER_CUDA_OPERATOR (CudaProfileStart, CudaProfileStartOp) |
|
| REGISTER_CUDA_OPERATOR (CudaProfileStop, CudaProfileStopOp) |
|
| CAFFE_KNOWN_TYPE (std::unique_ptr< Module >) |
|
| REGISTER_BLOB_SERIALIZER ((TypeMeta::Id< std::unique_ptr< Module > >()), ScriptModuleSerializer) |
|
| REGISTER_BLOB_DESERIALIZER (torch::jit::Module, ScriptModuleDeserializer) |
|
| NumInputs (1, INT_MAX) .NumOutputs(0 |
|
INT_MAX | Input (0, "script_module_instance", "Instance of shared_ptr<Module>") |
|
| REGISTER_CPU_OPERATOR (ScriptModule, ScriptModuleOp< CPUContext >) |
|
| SHOULD_NOT_DO_GRADIENT (ScriptModule) |
|
| NumInputs (0) .NumOutputs(1) .DisallowInputFillers() .Output(0 |
|
New instance of shared_ptr< Module > | Arg ("serialized_binary", "Binary string representing contents of .pt file (zip container)") |
|
| REGISTER_CPU_OPERATOR (ScriptModuleLoad, ScriptModuleLoadOp) |
|
| NO_GRADIENT (ScriptModuleLoad) |
|
| NumInputs (0, INT_MAX) .NumOutputs(0 |
|
INT_MAX | SetDoc (R"DOC(
The TensorRT operator is a black-box operator serialized from prebuilt TensorRT
Engine string. It will take the input, do the computation by calling TensorRT
inference engine and generate the outputs.
This is a GPU only operator.
)DOC") .Arg("log_verbosity" |
|
INT_MAX int default verbosity of the TensorRt engine log | Arg ("backend_buffer", "(string default=\"\" blob for serialized TensorRT engine." "Note that serialized engine is not compatible across platform and " "different TensorRT version.") .Arg("max_batch_size" |
|
| REGISTER_CUDA_OPERATOR (TensorRT, TensorRTOp) |
|
void | BuildInitializationList (Workspace *ws, ::ONNX_NAMESPACE::GraphProto *g, std::unordered_set< std::string > *initialization_list) |
|
| REGISTER_CPU_OPERATOR (CTC, CTCOp< float, CPUContext >) |
|
| OPERATOR_SCHEMA (CTC).NumInputs(3 |
|
| NumOutputs (2, 3) |
|
| REGISTER_GRADIENT (CTC, GetCTCGradient) |
|
| REGISTER_CUDA_OPERATOR (CTC, CTCOp< float, CUDAContext >) |
|
bool | BlobIsInt8TensorCPUType (const Blob &blob) |
|
bool | BlobIsTensorType (const Blob &blob, DeviceType device_type) |
|
Tensor * | BlobSetTensor (Blob *blob, Tensor &&tensor) |
|
Tensor | GetSizedTensorWithOptions (Tensor &&previous_tensor, at::IntArrayRef dims, at::TensorOptions options) |
|
Tensor * | BlobGetMutableTensor (Blob *blob, at::IntArrayRef dims, at::TensorOptions options) |
|
Tensor | XBlobGetMutableTensor (Blob *blob, at::IntArrayRef dims, at::TensorOptions options) |
|
Tensor * | BlobGetMutableTensor (Blob *blob, DeviceType device_type) |
|
const Tensor & | BlobGetTensor (const Blob &blob, DeviceType device_type) |
|
Tensor | BlobGetTensorOrUndefined (const Blob &blob) |
|
void | SerializeBlob (const Blob &blob, const string &name, BlobSerializerBase::SerializationAcceptor acceptor, int chunk_size=kDefaultChunkSize) |
| Serializes the given blob, if possible. More...
|
|
std::string | SerializeBlob (const Blob &blob, const string &name) |
| Convenience function to serialize a blob to a string. More...
|
|
static bool | EnableByteEncoding (const TensorProto::DataType &dataType, const size_t &typeSize) |
|
template<typename T , typename S = T> |
static void | SerializeUsingBytesOrInt32 (const Tensor &input, const TensorProto::DataType &dataType, size_t chunkBegin, int32_t chunkSize, BaseContext *context, TensorProto &proto) |
|
int | GetGPUIDForPointer (const void *ptr) |
| Gets the GPU id that the current pointer is located at. More...
|
|
| C10_DEFINE_TYPED_REGISTRY (BlobSerializerRegistry, TypeIdentifier, BlobSerializerBase, std::unique_ptr) |
|
| C10_DEFINE_REGISTRY (BlobDeserializerRegistry, BlobDeserializerBase) |
|
void | DeserializeBlob (const string &content, Blob *result) |
| Deserializes from a string containing either BlobProto or TensorProto. More...
|
|
void | DeserializeBlob (const BlobProto &blob_proto, Blob *result) |
|
std::vector< int64_t > | DimsFromTensorProto (const TensorProto &proto) |
|
int64_t | NumelFromTensorProto (const TensorProto &tensor_proto) |
|
TypeMeta | GetDataType (const TensorProto &tensor_proto) |
|
static at::TensorOptions | TensorOptionsFromProto (const TensorProto &tensor_proto) |
|
std::unique_ptr< BaseContext > | ContextFromProto (const TensorProto &tensor_proto) |
|
Tensor | EmptyTensorFromProto (const TensorProto &tensor_proto) |
|
template<typename T , typename D = T> |
void | DeserializeFromBytesOrInt32 (const TensorProto &tensor_proto, size_t chunkBegin, int32_t chunkSize, BaseContext *context, Tensor *tensor) |
|
std::string | SerializeAsString_EnforceCheck (const google::protobuf::MessageLite &msg, const char *error_location) |
|
std::string | SerializeBlobProtoAsString_EnforceCheck (const BlobProto &blob) |
|
| C10_DECLARE_TYPED_REGISTRY (BlobSerializerRegistry, TypeIdentifier, BlobSerializerBase, std::unique_ptr) |
|
unique_ptr< BlobSerializerBase > | CreateSerializer (TypeIdentifier id) |
|
| C10_DECLARE_REGISTRY (BlobDeserializerRegistry, BlobDeserializerBase) |
|
unique_ptr< BlobDeserializerBase > | CreateDeserializer (const string &type) |
|
| CAFFE_KNOWN_TYPE (BlobTestFoo) |
|
| CAFFE_KNOWN_TYPE (BlobTestBar) |
|
| CAFFE_KNOWN_TYPE (BlobTestNonDefaultConstructible) |
|
| REGISTER_BLOB_SERIALIZER ((TypeMeta::Id< BlobTestFoo >()), BlobTestFooSerializer) |
|
| REGISTER_BLOB_DESERIALIZER (BlobTestFoo, BlobTestFooDeserializer) |
|
| CAFFE_KNOWN_TYPE (DummyType) |
|
bool | HasCudaRuntime () |
|
bool | HasHipRuntime () |
|
const std::map< string, string > & | GetBuildOptions () |
|
template<typename Dst , typename Src > |
Dst | dynamic_cast_if_rtti (Src ptr) |
|
size_t | cudnnCompiledVersion () |
|
size_t | cudnnRuntimeVersion () |
|
void | CheckCuDNNVersions () |
|
cudnnTensorFormat_t | GetCudnnTensorFormat (const StorageOrder &order) |
| A wrapper function to convert the Caffe storage order to cudnn storage order enum values. More...
|
|
int | NumCudaDevices () |
| Returns the number of devices. More...
|
|
void | SetDefaultGPUID (const int deviceid) |
|
int | GetDefaultGPUID () |
|
int | CaffeCudaGetDevice () |
| Gets the current GPU id. More...
|
|
void | CaffeCudaSetDevice (const int id) |
| Gets the current GPU id. More...
|
|
const cudaDeviceProp & | GetDeviceProperty (const int device) |
| Gets the device property for the given device. More...
|
|
void | DeviceQuery (const int deviceid) |
| Runs a device query function and prints out the results to LOG(INFO). More...
|
|
bool | GetCudaPeerAccessPattern (vector< vector< bool > > *pattern) |
| Return a peer access pattern by returning a matrix (in the format of a nested vector) of boolean values specifying whether peer access is possible. More...
|
|
bool | TensorCoreAvailable () |
| Return the availability of TensorCores for math. More...
|
|
const char * | cublasGetErrorString (cublasStatus_t error) |
| Return a human readable cublas error string. More...
|
|
const char * | curandGetErrorString (curandStatus_t error) |
| Return a human readable curand error string. More...
|
|
int | CudaVersion () |
| A runtime function to report the cuda version that Caffe2 is built with. More...
|
|
bool | HasCudaGPU () |
| Check if the current running session has a cuda gpu present. More...
|
|
int | CAFFE_GET_BLOCKS (const int N) |
| Compute the number of blocks needed to run N threads. More...
|
|
dim3 | CAFFE_GET_BLOCKS_2D (const int N, const int) |
| Compute the number of blocks needed to run N threads for a 2D grid. More...
|
|
| TEST (CommonTest, TestStoi) |
|
| TEST (CommonTest, TestStod) |
|
uint32_t | RandomNumberSeed () |
| A function to generate a random number seed that is unique in a best-effort basis, using an ever-incrementing seed and the current time. More...
|
|
CAFFE2_CUDA_API CudaMemoryPoolType | GetCudaMemoryPoolType () |
| Gets the current memory pool type used by Caffe2. More...
|
|
| TEST (CUDATest, HasCudaRuntime) |
|
| TEST (CUDAContextTest, TestAllocDealloc) |
|
| TEST (CUDAContextTest, TestSetGetDeviceWithoutCaffeMode) |
|
| TEST (CUDAContextTest, MemoryPoolAllocateDealloc) |
|
cudaStream_t | getStreamForHandle (cublasHandle_t handle) |
|
| TEST (CUDAContextTest, TestSameThreadSameObject) |
|
| TEST (CUDAContextTest, TestSameThreadTempObject) |
|
| TEST (CUDAContextTest, TestSameThreadDifferntObjectIfDifferentDevices) |
|
| TEST (CUDAContextTest, TestDifferntThreadDifferentobject) |
|
| TEST (CPUContextTest, TestAllocAlignment) |
|
| TEST (CPUContextTest, TestAllocDealloc) |
|
| CAFFE_KNOWN_TYPE (db::DBReader) |
|
| CAFFE_KNOWN_TYPE (db::Cursor) |
|
void | EventCreateCPU (const DeviceOption &option, Event *event) |
|
void | EventRecordCPU (Event *event, const void *, const char *err_msg) |
|
void | EventFinishCPU (const Event *event) |
|
void | EventWaitCPUCPU (const Event *event, void *) |
|
EventStatus | EventQueryCPU (const Event *event) |
|
const std::string & | EventErrorMessageCPU (const Event *event) |
|
void | EventSetFinishedCPU (const Event *event, const char *err_msg) |
|
void | EventSetCallbackCPU (Event *event, EventCallbackFunction callback) |
|
void | EventResetCPU (Event *event) |
|
| REGISTER_EVENT_CREATE_FUNCTION (CPU, EventCreateCPU) |
|
| REGISTER_EVENT_RECORD_FUNCTION (CPU, EventRecordCPU) |
|
| REGISTER_EVENT_WAIT_FUNCTION (CPU, CPU, EventWaitCPUCPU) |
|
| REGISTER_EVENT_FINISH_FUNCTION (CPU, EventFinishCPU) |
|
| REGISTER_EVENT_QUERY_FUNCTION (CPU, EventQueryCPU) |
|
| REGISTER_EVENT_ERROR_MESSAGE_FUNCTION (CPU, EventErrorMessageCPU) |
|
| REGISTER_EVENT_SET_FINISHED_FUNCTION (CPU, EventSetFinishedCPU) |
|
| REGISTER_EVENT_RESET_FUNCTION (CPU, EventResetCPU) |
|
| REGISTER_EVENT_SET_CALLBACK_FUNCTION (CPU, EventSetCallbackCPU) |
|
bool | EventCanScheduleCPU (const Event *, const Event *) |
|
void | EventCreateCUDA (const DeviceOption &option, Event *event) |
|
void | EventRecordCUDA (Event *event, const void *context, const char *err_msg) |
|
void | EventFinishCUDA (const Event *event) |
|
void | EventWaitCUDACUDA (const Event *event, void *context) |
|
void | EventWaitCPUCUDA (const Event *event, void *context) |
|
void | EventWaitCUDACPU (const Event *event, void *context) |
|
EventStatus | EventQueryCUDA (const Event *event) |
|
const std::string & | EventErrorMessageCUDA (const Event *event) |
|
void | EventSetFinishedCUDA (const Event *event, const char *err_msg) |
|
void | EventResetCUDA (Event *event) |
|
| REGISTER_EVENT_CREATE_FUNCTION (CUDA, EventCreateCUDA) |
|
| REGISTER_EVENT_RECORD_FUNCTION (CUDA, EventRecordCUDA) |
|
| REGISTER_EVENT_WAIT_FUNCTION (CUDA, CUDA, EventWaitCUDACUDA) |
|
| REGISTER_EVENT_WAIT_FUNCTION (CPU, CUDA, EventWaitCPUCUDA) |
|
| REGISTER_EVENT_WAIT_FUNCTION (CUDA, CPU, EventWaitCUDACPU) |
|
| REGISTER_EVENT_FINISH_FUNCTION (CUDA, EventFinishCUDA) |
|
| REGISTER_EVENT_QUERY_FUNCTION (CUDA, EventQueryCUDA) |
|
| REGISTER_EVENT_ERROR_MESSAGE_FUNCTION (CUDA, EventErrorMessageCUDA) |
|
| REGISTER_EVENT_SET_FINISHED_FUNCTION (CUDA, EventSetFinishedCUDA) |
|
| REGISTER_EVENT_RESET_FUNCTION (CUDA, EventResetCUDA) |
|
| REGISTER_EVENT_WAIT_FUNCTION (MKLDNN, CUDA, EventWaitCPUCUDA) |
|
| REGISTER_EVENT_WAIT_FUNCTION (CUDA, MKLDNN, EventWaitCUDACPU) |
|
| TEST (EventCUDATest, EventBasics) |
|
| TEST (EventCPUTest, EventBasics) |
|
| TEST (EventCPUTest, EventErrors) |
|
| C10_DEFINE_REGISTRY (C10OperatorRegistry, OperatorBase, const OperatorDef &, Workspace *) |
|
OperatorDef * | AddOp (NetDef *netdef_ptr, string op_type, std::vector< string > inputs, std::vector< string > outputs) |
|
bool | MatchStrings (string p, string s) |
| This allows for the use of * and | to match operator types, engines, or any other property that is represented by strings. More...
|
|
bool | MatchArguments (const OperatorDef &p_op, const OperatorDef &g_op) |
| This ensures that each named arg that exists in the pattern exists in g_op, is equal in value. More...
|
|
size_t | miopenCompiledVersion () |
|
size_t | miopenRuntimeVersion () |
|
void | CheckMIOPENVersions () |
|
bool | GlobalInitAlreadyRun () |
| Determine whether GlobalInit has already been run. More...
|
|
bool | GlobalInit (int *pargc, char ***argv) |
| Initialize the global environment of caffe2. More...
|
|
bool | GlobalInit () |
| Initialize the global environment without command line arguments. More...
|
|
bool | unsafeRunCaffe2InitFunction (const char *name, int *pargc, char ***pargv) |
|
void | QuitIfFeatureUnsupported (const bool cpu_has_feature, const string &feature) |
|
static void | WarnIfFeatureUnused (const bool cpu_has_feature, const string &feature) |
|
bool | Caffe2CheckIntrinsicsFeatures (int *, char ***) |
|
| REGISTER_CAFFE2_INIT_FUNCTION (Caffe2CheckIntrinsicsFeatures, &Caffe2CheckIntrinsicsFeatures, "Check intrinsics compatibility between the CPU feature and the binary.") |
|
| TEST (InitTest, TestInitFunctionHasRun) |
|
| TEST (InitTest, CanRerunGlobalInit) |
|
void | LateRegisterInitFunction () |
|
void | LateRegisterEarlyInitFunction () |
|
void | LateRegisterFailInitFunction () |
|
| TEST (InitTest, FailLateRegisterInitFunction) |
|
void | run_schema_check (const NetDef &net) |
|
static std::mutex & | gModuleChangeMutex () |
|
static CaffeMap< string, const ModuleSchema * > & | MutableCurrentModules () |
|
static CaffeMap< string, void * > | CurrentModuleHandles () |
|
const CaffeMap< string, const ModuleSchema * > & | CurrentModules () |
| Current Modules present in the Caffe2 runtime. More...
|
|
bool | HasModule (const string &name) |
| Checks whether a module is already present in the current binary. More...
|
|
void | LoadModule (const string &name, const string &filename="") |
| Load a module. More...
|
|
| REGISTER_CPU_OPERATOR (Caffe2ModuleTestStaticDummy, Caffe2ModuleTestStaticDummyOp) |
|
| OPERATOR_SCHEMA (Caffe2ModuleTestStaticDummy) |
|
| TEST (ModuleTest, StaticModule) |
|
| C10_DEFINE_REGISTRY (NetRegistry, NetBase, const std::shared_ptr< const NetDef > &, Workspace *) |
|
void | AddGlobalNetObserverCreator (NetObserverCreator creator) |
|
void | ClearGlobalNetObservers () |
|
unique_ptr< NetBase > | CreateNet (const NetDef &net_def, Workspace *ws) |
| Creates a network, accessing / creating blobs in the given workspace. More...
|
|
unique_ptr< NetBase > | CreateNet (const std::shared_ptr< const NetDef > &net_def, Workspace *ws) |
|
| C10_DECLARE_REGISTRY (NetRegistry, NetBase, const std::shared_ptr< const NetDef > &, Workspace *) |
|
template<class TaskThreadPoolImpl , int device_type> |
std::shared_ptr< TaskThreadPoolBase > | GetAsyncNetThreadPool (int device_id, int pool_size, bool create_new) |
|
| REGISTER_NET (async_scheduling, AsyncSchedulingNet) |
|
| TEST (DagUtilTest, Empty) |
|
| TEST (DagUtilTest, AllSync) |
|
| TEST (DagUtilTest, AllAsync) |
|
| TEST (DagUtilTest, Mixed0) |
|
| TEST (DagUtilTest, Mixed1) |
|
| TEST (DagUtilTest, Mixed2) |
|
void | testExecution (std::unique_ptr< NetBase > &net, int num_ops) |
|
void | checkChainingAndRun (const char *spec, const dag_utils::ExecutionChains &expected) |
|
| TEST (NetTest, DISABLED_ChainingForDifferentDevices) |
|
std::shared_ptr< AsyncTaskGraphBase > | GetAsyncTaskGraph (ExecutorHelper *helper, const ExecutionOptions &options) |
|
| C10_DEFINE_SHARED_REGISTRY (TaskGraphRegistry, AsyncTaskGraphBase, ExecutorHelper *, const ExecutionOptions &) |
|
| C10_REGISTER_CREATOR (TaskGraphRegistry, futures, GetAsyncTaskGraph) |
|
| REGISTER_NET (parallel, ParallelNet) |
|
| C10_DECLARE_SHARED_REGISTRY (TaskGraphRegistry, AsyncTaskGraphBase, ExecutorHelper *, const ExecutionOptions &) |
|
| REGISTER_NET (simple, SimpleNet) |
|
| REGISTER_NET (simple_refcount, SimpleRefCountNet) |
|
| TEST (NetTest, ConstructionNoDeclaredInputOutput) |
|
| TEST (NetTest, ConstructionDeclaredInput) |
|
| TEST (NetTest, ConstructionDeclaredOutput) |
|
| TEST (NetTest, DeclaredInputInsufficient) |
|
| TEST (NetDeathTest, DeclaredOutputNotMet) |
|
void | checkNumChainsAndRun (const char *spec, const int expected_num_chains) |
|
| TEST (NetTest, DISABLED_ChainingForLinearModel) |
|
| TEST (NetTest, DISABLED_ChainingForFork) |
|
| TEST (NetTest, DISABLED_ChainingForForkJoin) |
|
| TEST (NetTest, DISABLED_ChainingForwardBackward) |
|
| TEST (NetTest, DISABLED_ChainingForHogwildModel) |
|
| TEST (NetTest, DISABLED_FailingOperator) |
|
| REGISTER_CPU_OPERATOR (ExecutorHelperDummy, ExecutorHelperDummyOp) |
|
| OPERATOR_SCHEMA (ExecutorHelperDummy) |
|
| TEST (NetTest, OperatorWithExecutorHelper) |
|
| TEST (NetTest, DISABLED_OperatorWithDisabledEvent) |
|
| TEST (NetTest, ExecutorOverride) |
|
| TEST (NetTest, AsyncEmptyNet) |
|
| TEST (NetTest, DISABLED_RunAsyncFailure) |
|
| TEST (NetTest, NoTypeNet) |
|
| REGISTER_CPU_OPERATOR (NotFinishingOp, NotFinishingOp) |
|
| OPERATOR_SCHEMA (NotFinishingOp) |
|
| TEST (NetTest, PendingOpsAndNetFailure) |
|
| REGISTER_CPU_OPERATOR (AsyncErrorOp, AsyncErrorOp) |
|
| OPERATOR_SCHEMA (AsyncErrorOp) |
|
std::unique_ptr< NetBase > | AsyncErrorNet (Workspace *ws, const std::string &net_name, bool throw_, bool fail_in_sync) |
|
| TEST (NetTest, AsyncErrorOpTest) |
|
| TEST (NetTest, AsyncErrorTimingsTest) |
|
| REGISTER_CPU_OPERATOR (SyncErrorOp, SyncErrorOp) |
|
| OPERATOR_SCHEMA (SyncErrorOp) |
|
std::unique_ptr< NetBase > | ChainErrorNet (Workspace *ws, const std::string &net_name, bool throw_) |
|
| TEST (NetTest, ChainErrorTest) |
|
void | testProfDAGNetErrorCase (bool test_error) |
|
| TEST (NetTest, ProfDAGNetErrorTest) |
|
| TEST (ObserverTest, TestNotify) |
|
| TEST (ObserverTest, TestUniqueMap) |
|
| TEST (ObserverTest, TestNotifyAfterDetach) |
|
| TEST (ObserverTest, TestDAGNetBase) |
|
const std::string | OpRegistryKey (const std::string &op_type, const std::string &engine) |
|
void | SetPerOpEnginePref (const PerOpEnginePrefType &per_op_engine_pref) |
|
void | SetGlobalEnginePref (const GlobalEnginePrefType &global_engine_pref) |
|
void | SetEnginePref (const PerOpEnginePrefType &per_op_engine_pref, const GlobalEnginePrefType &global_engine_pref) |
|
void | SetOpEnginePref (const std::string &op_type, const CaffeMap< DeviceType, EnginePrefType > &op_pref) |
|
unique_ptr< OperatorBase > | CreateOperator (const OperatorDef &operator_def, Workspace *ws, int net_position) |
|
std::map< DeviceType, OperatorRegistry * > * | gDeviceTypeRegistry () |
|
| C10_DEFINE_REGISTRY (CPUOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *) |
|
| CAFFE_REGISTER_DEVICE_TYPE (CPU, CPUOperatorRegistry) |
|
| C10_DEFINE_REGISTRY (CUDAOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *) |
|
| CAFFE_REGISTER_DEVICE_TYPE (CUDA, CUDAOperatorRegistry) |
|
| C10_DEFINE_REGISTRY (HIPOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *) |
|
| CAFFE_REGISTER_DEVICE_TYPE (HIP, HIPOperatorRegistry) |
|
| C10_DEFINE_REGISTRY (GradientRegistry, GradientMakerBase, const OperatorDef &, const vector< GradientWrapper > &) |
|
GradientOpsMeta | GetGradientForOp (const OperatorDef &def, const vector< GradientWrapper > &g_output) |
| Gets the GradientOpsMeta for the given operator def. More...
|
|
TensorShapes | InferBlobShapesAndTypes (CaffeMap< string, TensorShape > &blob_desc, const vector< NetDef * > &nets) |
|
void | LoadInt8TensorInfoOfBlob (std::vector< float > *scale, std::vector< float > *offset, uint32_t *axis, const Blob *b) |
|
TensorShape | GetTensorShapeOfBlob (const Blob *b) |
|
TensorShapes | InferBlobShapesAndTypesFromWorkspace (Workspace *ws, const vector< NetDef * > &nets) |
|
TensorShapes | InferBlobShapesAndTypesFromMap (const CaffeMap< std::string, std::vector< int64_t > > &blob_dimensions, const vector< NetDef * > &nets) |
|
TensorShapes | InferBlobShapesAndTypesFromMap (const CaffeMap< std::string, std::vector< int64_t > > &blob_dimensions, const CaffeMap< std::string, TensorProto_DataType > &blob_types, const vector< NetDef * > &nets) |
|
std::map< string, std::pair< DeviceOption, DeviceOption > > | ValidateTensorDevices (OperatorBase &op, const OperatorDef &op_def) |
|
std::set< std::string > | GetRegisteredOperators () |
|
void | SetOperatorLogger (std::function< void(const OperatorDef &)> tracer) |
|
std::function< void(const OperatorDef &)> | GetOperatorLogger () |
|
| C10_DEFINE_TYPED_REGISTRY (ExternalTensorFunctionsBaseRegistry, TypeIdentifier, ExternalTensorFunctionsBase, std::unique_ptr) |
|
| C10_DECLARE_REGISTRY (CPUOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *) |
|
| C10_DECLARE_REGISTRY (CUDAOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *) |
|
| C10_DECLARE_REGISTRY (HIPOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *) |
|
| C10_DECLARE_TYPED_REGISTRY (ExternalTensorFunctionsBaseRegistry, TypeIdentifier, ExternalTensorFunctionsBase, std::unique_ptr) |
|
unique_ptr< ExternalTensorFunctionsBase > | CreateExternalTensorFunctions (TypeIdentifier id) |
|
| OPERATOR_SCHEMA (JustTest).NumInputs(0 |
|
| NumOutputs (0, 1) |
|
| REGISTER_CUDA_OPERATOR (JustTest, JustTestCUDA) |
|
| REGISTER_CUDNN_OPERATOR (JustTest, JustTestCUDNN) |
|
| TEST (EnginePrefTest, GPUDeviceDefaultPreferredEngines) |
|
| C10_DECLARE_REGISTRY (GradientRegistry, GradientMakerBase, const OperatorDef &, const vector< GradientWrapper > &) |
|
C10_EXPORT std::ostream & | operator<< (std::ostream &out, const OpSchema &schema) |
|
template<typename T_I = int> |
TensorShape | CreateTensorShape (vector< T_I > dims, ::caffe2::TensorProto_DataType dt) |
|
vector< int64_t > | GetDimsVector (const TensorShape &shape) |
|
uint64_t | nElemFromDim (const TensorShape &X, int dim=0) |
|
uint64_t | nElemBetweenDim (const TensorShape &X, int start, int stop) |
|
std::pair< std::vector< DeviceOption >, std::vector< DeviceOption > > | InferOpInputOutputDevice (const OperatorDef &op) |
|
template<uint64_t OpsPerPoint> |
OpSchema::Cost | PointwiseCostInference (const OperatorDef &, const vector< TensorShape > &inputs) |
|
dummy input | Output (0, "out0", "dummy output.") |
|
| TEST (OperatorSchemaTest, BasicSchema) |
|
| NumInputs ({2, 4}).NumOutputs( |
|
| TEST (OperatorSchemaTest, SpecifiedInputOutput) |
|
| NumInputsOutputs ([](int in, int out) { return out==in||out==in *2;}) |
|
| TEST (OperatorSchemaTest, InputOutputRelation) |
|
| SameNumberOfOutput () |
|
| TEST (OperatorSchemaTest, SameInputOutput) |
|
| NumInputs (1, 5).NumOutputs(2 |
|
| OutputCalculator ([](int n) { return n+1;}) |
|
| TEST (OperatorSchemaTest, CalculateOutput) |
|
| EnforceInplace ({{1, 1}}) |
|
| TEST (OperatorSchemaTest, Inplace) |
|
| OPERATOR_SCHEMA (OpSchemaSameInputOutputTensorInference).IdenticalTypeAndShape() |
|
| TEST (OperatorSchemaTest, TensorInferenceIdentical) |
|
| TensorInferenceFunction ([](const OperatorDef &, const vector< TensorShape > &) { vector< TensorShape > shapes(1);shapes[0].set_data_type(TensorProto::FLOAT);shapes[0].add_dims(1701);return shapes;}) |
|
| TEST (OperatorSchemaTest, TensorInferenceArbitrary) |
|
| TEST (OperatorSchemaTest, TestCastSchema) |
|
| NumInputs (2) .NumOutputs(2) .CostInferenceFunction([](const OperatorDef & |
|
| TEST (OperatorSchemaTest, TestCostInference) |
|
| OPERATOR_SCHEMA (JustTestCPUOnly).NumInputs(0 |
|
| OPERATOR_SCHEMA (JustTestWithSomeOutput) |
|
| REGISTER_CPU_OPERATOR (JustTest, JustTest) |
|
| REGISTER_CPU_OPERATOR (JustTestCPUOnly, JustTest) |
|
| REGISTER_CPU_OPERATOR_WITH_ENGINE (JustTest, FOO, JustTestAndNeverConstructs) |
|
| REGISTER_CPU_OPERATOR_WITH_ENGINE (JustTest, BAR, JustTestAndDoesConstruct) |
|
| REGISTER_CPU_OPERATOR_WITH_ENGINE (JustTest, BAZ, JustTestAndDoesConstruct) |
|
| REGISTER_CUDA_OPERATOR (JustTest, JustTest) |
|
| REGISTER_CPU_OPERATOR (JustTestWithSomeOutput, JustTestWithSomeOutput) |
|
| TEST (OperatorTest, DeviceTypeRegistryWorks) |
|
| TEST (OperatorTest, RegistryWorks) |
|
| TEST (OperatorTest, RegistryWrongDevice) |
|
| TEST (OperatorTest, ExceptionWorks) |
|
| TEST (OperatorTest, FallbackIfEngineDoesNotBuild) |
|
| TEST (OperatorTest, MultipleEngineChoices) |
|
| TEST (OperatorTest, CannotUseUninitializedBlob) |
|
| TEST (OperatorTest, TestParameterAccess) |
|
| TEST (OperatorTest, CannotAccessParameterWithWrongType) |
|
| TEST (OperatorTest, TestDefaultValue) |
|
| TEST (OperatorTest, TestSetUp) |
|
| TEST (OperatorTest, TestSetUpInputOutputCount) |
|
| TEST (OperatorTest, TestOutputValues) |
|
NetDef | GetNetDefForTest () |
|
| TEST (NetTest, TestScaffoldingSimpleNet) |
|
| TEST (NetTest, TestScaffoldingDAGNet) |
|
| GRADIENT_OPERATOR_SCHEMA (FooGradient).NumInputs(1).NumOutputs(1) |
|
| REGISTER_CPU_GRADIENT_OPERATOR_WITH_ENGINE (FooGradient, DUMMY_ENGINE, FooGradientDummyEngineOp) REGISTER_GRADIENT(Foo |
|
| TEST (OperatorGradientRegistryTest, GradientSimple) |
|
| TEST (EnginePrefTest, PerOpEnginePref) |
|
| TEST (EnginePrefTest, GlobalEnginePref) |
|
| TEST (EnginePrefTest, GlobalEnginePrefAndPerOpEnginePref) |
|
| TEST (EnginePrefTest, GlobalEnginePrefAndPerOpEnginePrefAndOpDef) |
|
| TEST (EnginePrefTest, SetOpEnginePref) |
|
| TEST (EnginePrefTest, SetDefaultEngine) |
|
| REGISTER_CPU_OPERATOR (JustTestWithRequiredArg, JustTestWithRequiredArg) |
|
| NumInputs (0, 1) .NumOutputs(0 |
|
| Arg ("test_arg", "this arg is required", true) |
|
| TEST (RequiredArg, Basic) |
|
| REGISTER_CPU_OPERATOR (JustTestWithStandardIsTestArg, JustTestWithStandardIsTestArg) |
|
| ArgIsTest ("this is_test arg is required") |
|
| TEST (IsTestArg, standard) |
|
| REGISTER_CPU_OPERATOR (JustTestWithNonStandardIsTestArg, JustTestWithNonStandardIsTestArg) |
|
| Arg (OpSchema::Arg_IsTest, "this is_test arg is not required") |
|
| TEST (IsTestArg, non_standard) |
|
| OPERATOR_SCHEMA (Sleep).NumInputs(0 |
|
| REGISTER_CPU_OPERATOR (Sleep, SleepOp) |
|
| REGISTER_CUDA_OPERATOR (Sleep, SleepOp) |
|
| TEST (DAGNetTest, TestDAGNetTiming) |
|
| TEST (SimpleNetTest, TestSimpleNetTiming) |
|
| TEST (DAGNetTest, TestDAGNetTimingReadAfterRead) |
|
| TEST (SimpleNetTest, TestSimpleNetTimingReadAfterRead) |
|
| TEST (DAGNetTest, TestDAGNetTimingWriteAfterWrite) |
|
| TEST (SimpleNetTest, TestSimpleNetTimingWriteAfterWrite) |
|
| TEST (DAGNetTest, TestDAGNetTimingWriteAfterRead) |
|
| TEST (SimpleNetTest, TestSimpleNetTimingWriteAfterRead) |
|
| TEST (DAGNetTest, TestDAGNetTimingControlDependency) |
|
| TEST (SimpleNetTest, TestSimpleNetTimingControlDependency) |
|
bool | RunPlanOnWorkspace (Workspace *ws, const PlanDef &plan, ShouldContinue shouldContinue) |
|
| TEST (PlanExecutorTest, EmptyPlan) |
|
| REGISTER_CPU_OPERATOR (StuckBlocking, StuckBlockingOp) |
|
| OPERATOR_SCHEMA (StuckBlocking).NumInputs(0).NumOutputs(0) |
|
| REGISTER_CPU_OPERATOR (Noop, NoopOp) |
|
| OPERATOR_SCHEMA (Noop).NumInputs(0).NumOutputs(0) |
|
| REGISTER_CPU_OPERATOR (StuckAsync, StuckAsyncOp) |
|
| OPERATOR_SCHEMA (StuckAsync).NumInputs(0).NumOutputs(0) |
|
| REGISTER_CPU_OPERATOR (Error, ErrorOp) |
|
| OPERATOR_SCHEMA (Error).NumInputs(0).NumOutputs(0) |
|
| REGISTER_CPU_OPERATOR (BlockingError, BlockingErrorOp) |
|
| OPERATOR_SCHEMA (BlockingError).NumInputs(0).NumOutputs(0) |
|
PlanDef | parallelErrorPlan () |
|
PlanDef | parallelErrorPlanWithCancellableStuckNet () |
|
PlanDef | reporterErrorPlanWithCancellableStuckNet () |
|
| TEST (PlanExecutorTest, ErrorAsyncPlan) |
|
| TEST (PlanExecutorTest, BlockingErrorPlan) |
|
| TEST (PlanExecutorTest, ErrorPlanWithCancellableStuckNet) |
|
| TEST (PlanExecutorTest, ReporterErrorPlanWithCancellableStuckNet) |
|
PlanDef | shouldStopWithCancelPlan () |
|
| TEST (PlanExecutorTest, ShouldStopWithCancel) |
|
| CAFFE_KNOWN_TYPE (QTensor< CPUContext >) |
|
template<typename F > |
detail::ScopeGuardImplDecay< F > | MakeGuard (F &&f) noexcept(noexcept(detail::ScopeGuardImplDecay< F >(static_cast< F && >(f)))) |
| ScopeGuard is a general implementation of the "Initialization is
Resource Acquisition" idiom. More...
|
|
ExportedStatMap | toMap (const ExportedStatList &stats) |
|
| CAFFE_KNOWN_TYPE (Tensor) |
|
TypeMeta | GetTensorType (const void *c) |
|
TypeMeta | GetInt8TensorType (const void *c) |
|
TypeCall | GetTypeCallFunction (TypeIdentifier id) |
|
void | RegisterTypeCallFunction (TypeIdentifier id, TypeCall c) |
|
vector< int64_t > | GetTensorInfo (const void *c, size_t *capacity, DeviceOption *device) |
|
vector< int64_t > | GetInt8TensorInfo (const void *c, size_t *capacity, DeviceOption *device) |
|
TensorInfoCall | GetTensorInfoFunction (TypeIdentifier id) |
|
void | RegisterTensorInfoFunction (TypeIdentifier id, TensorInfoCall c) |
|
void | TensorVectorResize (std::vector< Tensor > &tensors, int size, DeviceType type) |
|
Tensor | empty (at::IntArrayRef dims, at::TensorOptions options) |
|
void | ReinitializeTensor (Tensor *t, at::IntArrayRef dims, at::TensorOptions options) |
| Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if the Tensor already has correct size and data type. More...
|
|
void | ReinitializeAndCopyFrom (Tensor *t, at::TensorOptions options, const Tensor &src, bool async) |
|
template<typename T > |
Tensor | TensorCPUFromValues (at::IntArrayRef dims, at::ArrayRef< T > values) |
| Creates a CPU tensor, and fills its contents with the given values. More...
|
|
| CAFFE_KNOWN_TYPE (int8::Int8TensorCPU) |
|
| C10_DEFINE_REGISTRY (TransformRegistry, Transform) |
|
unique_ptr< Transform > | CreateTransform (string key) |
|
NetDef | ApplyTransform (const string &key, const NetDef &netdef) |
|
double | average_net_run_duration (const NetDef &netdef, const NetDef &init_netdef, const int warmup_runs, const int main_runs) |
|
NetDef | ApplyTransformIfFaster (const string &key, const NetDef &netdef, const NetDef &init_netdef, const int warmup_runs, const int main_runs, const double improvement_threshold) |
|
| C10_DECLARE_REGISTRY (TransformRegistry, Transform) |
|
TensorProto::DataType | TypeMetaToDataType (const TypeMeta meta) |
|
const TypeMeta | DataTypeToTypeMeta (const TensorProto::DataType &dt) |
|
StorageOrder | StringToStorageOrder (const string &str) |
|
int32_t | GetDimFromOrderString (const std::string &str) |
|
constexpr char | NameScopeSeparator () |
|
template<typename T > |
bool | fp16_type () |
|
template<> |
bool | fp16_type< at::Half > () |
|
| CAFFE_KNOWN_TYPE (WorkspaceTestFoo) |
|
| TEST (WorkspaceTest, BlobAccess) |
|
| TEST (WorkspaceTest, RunEmptyPlan) |
|
| TEST (WorkspaceTest, Sharing) |
|
| TEST (WorkspaceTest, BlobMapping) |
|
static void | forEachCheck (std::initializer_list< Workspace * > workspaces) |
| Checks that Workspace::ForEach(f) applies f on the specified set of workspaces in any order. More...
|
|
| TEST (WorkspaceTest, ForEach) |
|
std::string | GetUniqueName () |
|
| REGISTER_CPU_OPERATOR (CreateDB, CreateDBOp< CPUContext >) |
|
| OPERATOR_SCHEMA (CreateDB).NumInputs(0).NumOutputs(1) |
|
| NO_GRADIENT (CreateDB) |
|
| REGISTER_CUDA_OPERATOR (CreateDB, CreateDBOp< CUDAContext >) |
|
static std::string | encodeName (const std::string &name) |
|
| REGISTER_CPU_OPERATOR (FileStoreHandlerCreate, FileStoreHandlerCreateOp< CPUContext >) |
|
such as NFS This store handler is not built to be fast Its recommended use is for integration tests and prototypes where extra dependencies are cumbersome Use an ephemeral path to ensure multiple processes or runs don t interfere DOC | Arg ("path", "base path used by the FileStoreHandler") .Arg("prefix" |
|
such as NFS This store handler is not built to be fast Its recommended use is for integration tests and prototypes where extra dependencies are cumbersome Use an ephemeral path to ensure multiple processes or runs don t interfere DOC prefix for all keys used by this store | Output (0, "handler", "unique_ptr<StoreHandler>") |
|
| NO_GRADIENT (FileStoreHandlerCreateOp) |
|
| REGISTER_CUDA_OPERATOR (FileStoreHandlerCreate, FileStoreHandlerCreateOp< CUDAContext >) |
|
| REGISTER_CPU_OPERATOR (RedisStoreHandlerCreate, RedisStoreHandlerCreateOp< CPUContext >) |
|
host name of Redis server | Arg ("port", "port number of Redis server") .Arg("prefix" |
|
| NO_GRADIENT (RedisStoreHandlerCreateOp) |
|
| REGISTER_CUDA_OPERATOR (RedisStoreHandlerCreate, RedisStoreHandlerCreateOp< CUDAContext >) |
|
| CAFFE_KNOWN_TYPE (std::unique_ptr< StoreHandler >) |
|
| REGISTER_CPU_OPERATOR (StoreSet, StoreSetOp) |
|
alternative key for the | blob (optional)") .Input(0 |
|
alternative key for the unique_ptr< StoreHandler > | Input (1, "data", "data blob") |
|
| REGISTER_CPU_OPERATOR (StoreGet, StoreGetOp) |
|
alternative key for the unique_ptr< StoreHandler > | Output (0, "data", "data blob") |
|
| REGISTER_CPU_OPERATOR (StoreAdd, StoreAddOp) |
|
the store initializes it to and then performs the add operation The operation returns the resulting counter value DOC | Arg ("blob_name", "key of the counter (required)") .Arg("add_value" |
|
the store initializes it to and then performs the add operation The operation returns the resulting counter value DOC value that is | added (optional, default:1)") .Input(0 |
|
the store initializes it to and then performs the add operation The operation returns the resulting counter value DOC value that is unique_ptr< StoreHandler > | Output (0, "value", "the current value of the counter") |
|
| REGISTER_CPU_OPERATOR (StoreWait, StoreWaitOp) |
|
| NumInputs (1, 2) .NumOutputs(0) .SetDoc(R"DOC( Wait for the specified blob names to be set. The blob names can be passed either as an input blob with blob names or as an argument. )DOC") .Arg("blob_names" |
|
names of the blobs to wait | for (optional)") .Input(0 |
|
names of the blobs to wait unique_ptr< StoreHandler > | Input (1, "names", "names of the blobs to wait for (optional)") |
|
| REGISTER_CPU_OPERATOR (FC_Decomp, FullyConnectedOpDecomp< float, CPUContext >) |
|
| REGISTER_CPU_OPERATOR (FCGradient_Decomp, FullyConnectedDecompGradientOp< float, CPUContext >) |
|
| OPERATOR_SCHEMA (FC_Decomp).NumInputs(4).NumOutputs(1) |
|
| OPERATOR_SCHEMA (FCGradient_Decomp).NumInputs(4).NumOutputs(3 |
|
| REGISTER_GRADIENT (FC_Decomp, GetFCDecompGradient) |
|
| REGISTER_CUDA_OPERATOR (FC_Decomp, FullyConnectedOpDecomp< float, CUDAContext >) |
|
| REGISTER_CUDA_OPERATOR (FCGradient_Decomp, FullyConnectedDecompGradientOp< float, CUDAContext >) |
|
| REGISTER_CPU_OPERATOR (TTContraction, TTContractionOp< float, CPUContext >) |
|
| REGISTER_CUDA_OPERATOR (TTContraction, TTContractionOp< float, CUDAContext >) |
|
| REGISTER_CUDA_OPERATOR (TTContractionGradient, TTContractionGradientOp< float, CUDAContext >) |
|
| REGISTER_IDEEP_COMPARE_OPERATOR (EQ) |
|
| REGISTER_IDEEP_COMPARE_OPERATOR (GT) |
|
| REGISTER_IDEEP_COMPARE_OPERATOR (GE) |
|
| REGISTER_IDEEP_COMPARE_OPERATOR (LT) |
|
| REGISTER_IDEEP_COMPARE_OPERATOR (LE) |
|
| REGISTER_IDEEP_COMPARE_OPERATOR (NE) |
|
| REGISTER_IDEEP_OPERATOR (Softmax, IDEEPFallbackOp< SoftmaxOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (LabelCrossEntropy, IDEEPFallbackOp< LabelCrossEntropyOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (AveragedLoss, IDEEPFallbackOp< AveragedLoss< float, CPUContext >, SkipIndices< 0 > >) |
|
| REGISTER_IDEEP_OPERATOR (Flatten, IDEEPFallbackOp< FlattenOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (ResizeLike, IDEEPFallbackOp< ResizeLikeOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (Slice, IDEEPFallbackOp< SliceOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (Clip, IDEEPFallbackOp< ClipOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (ScatterAssign, IDEEPFallbackOp< ScatterAssignOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (Cast, IDEEPFallbackOp< CastOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (XavierFill, IDEEPFallbackOp< XavierFillOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (ConstantFill, IDEEPFallbackOp< ConstantFillOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (GaussianFill, IDEEPFallbackOp< GaussianFillOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (MSRAFill, IDEEPFallbackOp< MSRAFillOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (GivenTensorFill, IDEEPFallbackOp< GivenTensorFillOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (GivenTensorDoubleFill, IDEEPFallbackOp< GivenTensorFillOp< double, CPUContext >, SkipIndices< 0 > >) |
|
| REGISTER_IDEEP_OPERATOR (GivenTensorBoolFill, IDEEPFallbackOp< GivenTensorFillOp< bool, CPUContext >, SkipIndices< 0 > >) |
|
| REGISTER_IDEEP_OPERATOR (GivenTensorIntFill, IDEEPFallbackOp< GivenTensorFillOp< int, CPUContext >, SkipIndices< 0 > >) |
|
| REGISTER_IDEEP_OPERATOR (GivenTensorInt64Fill, IDEEPFallbackOp< GivenTensorFillOp< int64_t, CPUContext >, SkipIndices< 0 > >) |
|
| REGISTER_IDEEP_OPERATOR (GivenTensorStringFill, IDEEPFallbackOp< GivenTensorFillOp< std::string, CPUContext >, SkipIndices< 0 > >) |
|
| REGISTER_IDEEP_OPERATOR (Load, IDEEPFallbackOp< LoadOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (Save, IDEEPFallbackOp< SaveOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (RMACRegions, IDEEPFallbackOp< RMACRegionsOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (RoIPool, IDEEPFallbackOp< RoIPoolOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (RoIAlign, IDEEPFallbackOp< RoIAlignOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (RoIAlignRotated, IDEEPFallbackOp< RoIAlignRotatedOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (GenerateProposals, IDEEPFallbackOp< GenerateProposalsOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (GenerateProposalsCPP, IDEEPFallbackOp< GenerateProposalsOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (CollectAndDistributeFpnRpnProposals, IDEEPFallbackOp< CollectAndDistributeFpnRpnProposalsOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (BoxWithNMSLimit, IDEEPFallbackOp< BoxWithNMSLimitOp< CPUContext >, SkipIndices< 0, 1, 2 > >) |
|
| REGISTER_IDEEP_OPERATOR (BBoxTransform, IDEEPFallbackOp< BBoxTransformOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (AffineChannel, IDEEPFallbackOp< AffineChannelOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (StopGradient, IDEEPFallbackOp< StopGradientOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (PadImage, IDEEPFallbackOp< PadImageOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (PRelu, IDEEPFallbackOp< PReluOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (CTCGreedyDecoder, IDEEPFallbackOp< CTCGreedyDecoderOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (CTCBeamSearchDecoder, IDEEPFallbackOp< CTCBeamSearchDecoderOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (AveragedLossGradient, IDEEPFallbackOp< AveragedLossGradient< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (LabelCrossEntropyGradient, IDEEPFallbackOp< LabelCrossEntropyGradientOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (SoftmaxGradient, IDEEPFallbackOp< SoftmaxGradientOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (Iter, IDEEPFallbackOp< IterOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (LearningRate, IDEEPFallbackOp< LearningRateOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (Abs, IDEEPFallbackOp< UnaryElementwiseOp< TensorTypes< float >, CPUContext, AbsFunctor< CPUContext > > >) |
|
| REGISTER_IDEEP_OPERATOR (Atan, IDEEPFallbackOp< UnaryElementwiseOp< TensorTypes< float >, CPUContext, AtanFunctor< CPUContext > > >) |
|
| REGISTER_IDEEP_OPERATOR (Sqrt, IDEEPFallbackOp< UnaryElementwiseOp< TensorTypes< float >, CPUContext, SqrtFunctor< CPUContext > > >) |
|
| REGISTER_IDEEP_OPERATOR (Sign, IDEEPFallbackOp< UnaryElementwiseOp< TensorTypes< float >, CPUContext, SignFunctor< CPUContext > > >) |
|
| REGISTER_IDEEP_OPERATOR (Div, IDEEPFallbackOp< BinaryElementwiseOp< NumericTypes, CPUContext, DivFunctor< CPUContext > > >) |
|
| REGISTER_IDEEP_OPERATOR (Mul, IDEEPFallbackOp< BinaryElementwiseOp< NumericTypes, CPUContext, MulFunctor< CPUContext > > >) |
|
| REGISTER_IDEEP_OPERATOR (Sub, IDEEPFallbackOp< BinaryElementwiseOp< NumericTypes, CPUContext, SubFunctor< CPUContext > > >) |
|
| REGISTER_IDEEP_OPERATOR (Tanh, IDEEPFallbackOp< UnaryElementwiseOp< TensorTypes< float >, CPUContext, TanhFunctor< CPUContext > > >) |
|
| REGISTER_IDEEP_OPERATOR (L1Distance, IDEEPFallbackOp< L1DistanceOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (Scale, IDEEPFallbackOp< ScaleOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (Accuracy, IDEEPFallbackOp< AccuracyOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (AddGradient, IDEEPFallbackOp< BinaryElementwiseGradientOp< NumericTypes, CPUContext, AddFunctor< CPUContext > > >) |
|
| REGISTER_IDEEP_OPERATOR (TanhGradient, IDEEPFallbackOp< BinaryElementwiseOp< TensorTypes< float >, CPUContext, TanhGradientFunctor< CPUContext > > >) |
|
| REGISTER_IDEEP_OPERATOR (MulGradient, IDEEPFallbackOp< BinaryElementwiseGradientOp< NumericTypes, CPUContext, MulFunctor< CPUContext > > >) |
|
| REGISTER_IDEEP_OPERATOR (TensorProtosDBInput, IDEEPFallbackOp< TensorProtosDBInput< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (CloseBlobsQueue, IDEEPFallbackOp< CloseBlobsQueueOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (SoftmaxWithLoss, IDEEPFallbackOp< SoftmaxWithLossOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (SoftmaxWithLossGradient, IDEEPFallbackOp< SoftmaxWithLossGradientOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (Expand, IDEEPFallbackOp< ExpandOp< TensorTypes< std::int32_t, std::int64_t, float, double >, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (Gather, IDEEPFallbackOp< GatherOp< CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (Normalize, IDEEPFallbackOp< NormalizeOp< float, CPUContext > >) |
|
| REGISTER_IDEEP_OPERATOR (ReduceL2, IDEEPFallbackOp< ReduceOp< TensorTypes< float >, CPUContext, L2Reducer< CPUContext > > >) |
|
| REGISTER_IDEEP_OPERATOR (ReduceSum, IDEEPFallbackOp< ReduceOp< TensorTypes< std::int32_t, std::int64_t, float, double >, CPUContext, SumReducer< CPUContext > > >) |
|
| REGISTER_IDEEP_OPERATOR (ReduceMean, IDEEPFallbackOp< ReduceOp< TensorTypes< float >, CPUContext, MeanReducer< CPUContext > > >) |
|
| REGISTER_IDEEP_OPERATOR (BatchMatMul, IDEEPFallbackOp< BatchMatMulOp< CPUContext > >) |
|
| C10_DECLARE_REGISTRY (IDEEPOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *) |
|
ideep::scale_t | ConvertScales (const std::vector< float > scales_z) |
|
ideep::tensor::dims | CanonicalDims (ideep::tensor::dims adims, int32_t axis) |
|
| CAFFE_KNOWN_TYPE (ideep::tensor) |
|
| C10_DEFINE_REGISTRY (IDEEPOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *) |
|
| CAFFE_REGISTER_DEVICE_TYPE (DeviceType::IDEEP, IDEEPOperatorRegistry) |
|
| REGISTER_EVENT_CREATE_FUNCTION (IDEEP, EventCreateCPU) |
|
| REGISTER_EVENT_RECORD_FUNCTION (IDEEP, EventRecordCPU) |
|
| REGISTER_EVENT_WAIT_FUNCTION (IDEEP, IDEEP, EventWaitCPUCPU) |
|
| REGISTER_EVENT_WAIT_FUNCTION (IDEEP, CPU, EventWaitCPUCPU) |
|
| REGISTER_EVENT_WAIT_FUNCTION (CPU, IDEEP, EventWaitCPUCPU) |
|
| REGISTER_EVENT_FINISH_FUNCTION (IDEEP, EventFinishCPU) |
|
| REGISTER_EVENT_QUERY_FUNCTION (IDEEP, EventQueryCPU) |
|
| REGISTER_EVENT_ERROR_MESSAGE_FUNCTION (IDEEP, EventErrorMessageCPU) |
|
| REGISTER_EVENT_SET_FINISHED_FUNCTION (IDEEP, EventSetFinishedCPU) |
|
| REGISTER_EVENT_RESET_FUNCTION (IDEEP, EventResetCPU) |
|
| REGISTER_CPU_OPERATOR (ImageInput, ImageInputOp< CPUContext >) |
|
INT_MAX | TensorInferenceFunction ([](const OperatorDef &def, const vector< TensorShape > &) { vector< TensorShape > out(2);ArgumentHelper helper(def);int batch_size=helper.GetSingleArgument< int >("batch_size", 0);int crop=helper.GetSingleArgument< int >("crop", -1);int color=helper.GetSingleArgument< int >("color", 1);CHECK_GT(crop, 0);out[0]=CreateTensorShape(vector< int >{batch_size, crop, crop, color ? 3 :1}, TensorProto::FLOAT);out[1]=CreateTensorShape(vector< int >{1, batch_size}, TensorProto::INT32);return out;}) .SetDoc(R"DOC( Imports and processes images from a database. For each run of the operator |
|
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial | image (optional) - The image is rescaled either up or down(with the scale argument) or just up(with the minsize argument) - The image is randomly cropped(crop size is passed as an argument but the location of the crop is random except if is_test is passed in which case the image in cropped at the center) - The image is normalized. Each of its color channels can have separate normalization values The dimension of the output image will always be cropxcrop) DOC") .Arg( "batch_size" |
|
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the | operator" ".Must be 1 or greater") .Arg ("color", "Number of color channels (1 or 3). Defaults to 1") .Arg("color_jitter" |
|
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the Whether or not to do color jitter Defaults to | Arg ("img_saturation", "Image saturation scale used in color jittering. " "Defaults to 0.4") .Arg("img_brightness" |
|
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the Whether or not to do color jitter Defaults to Image brightness scale used in color jittering Defaults to | Arg ("img_contrast", "Image contrast scale used in color jittering. " "Defaults to 0.4") .Arg("color_lighting" |
|
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the Whether or not to do color jitter Defaults to Image brightness scale used in color jittering Defaults to Whether or not to do color lighting Defaults to | Arg ("color_lighting_std", "Std of normal distribution where color lighting" " scaling factor is sampled. Defaults to 0.1") .Arg("scale_jitter_type" |
|
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the Whether or not to do color jitter Defaults to Image brightness scale used in color jittering Defaults to Whether or not to do color lighting Defaults to Scale the size of the smallest dimension of the image to this Scale and minsize are mutually exclusive Must be larger than crop | Arg ("minsize", "Scale the size of the smallest dimension of the image to" " this only if the size is initially smaller. Scale and minsize are" " mutually exclusive. Must be larger than crop.") .Arg("warp" |
|
the other dimension is proportionally scaled Defaults to | Arg ("crop", "Size to crop the image to. Must be provided") .Arg("mirror" |
|
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to | Arg ("mean", "Mean by which to normalize color channels." " Defaults to 0.") .Arg("mean_per_channel" |
|
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color | channel (1 or 3 elements). Defaults to mean argument. Channel order BGR") .Arg( "std" |
|
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to | Arg ("std_per_channel", "Vector of standard dev. per color channel " " (1 or 3 elements). Defaults to std argument. Channel order is BGR") .Arg("bounding_ymin" |
|
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults | to (none)") .Arg("bounding_xmin" |
|
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to | Arg ("use_gpu_transform", "1 if GPU acceleration should be used." " Defaults to 0. Can only be 1 in a CUDAContext") .Arg("decode_threads" |
|
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to | Arg ("output_type", "If gpu_transform, can set to FLOAT or FLOAT16.") .Arg("db" |
|
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Name of the | database (if not passed as input)") .Arg( "db_type" |
|
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Name of the Type of The sizes of any outputs besides the data and | label (should have a number of elements equal to the number of additional "
"outputs)") .Arg( "random_scale" |
|
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Name of the Type of The sizes of any outputs besides the data and shortest side desired for image resize Defaults to[-1, -1] or no random resize desired | Input (0, "reader", "The input reader (a db::DBReader)") .Output(0 |
|
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Name of the Type of The sizes of any outputs besides the data and shortest side desired for image resize Defaults to[-1, -1] or no random resize desired Tensor containing the images | Output (1, "label", "Tensor containing the labels") .Output(2 |
|
| NO_GRADIENT (ImageInput) |
|
template<class Context > |
bool | RandomSizedCropping (cv::Mat *img, const int crop, std::mt19937 *randgen) |
|
template<class Context > |
void | Saturation (float *img, const int img_size, const float alpha_rand, std::mt19937 *randgen) |
|
template<class Context > |
void | Brightness (float *img, const int img_size, const float alpha_rand, std::mt19937 *randgen) |
|
template<class Context > |
void | Contrast (float *img, const int img_size, const float alpha_rand, std::mt19937 *randgen) |
|
template<class Context > |
void | ColorJitter (float *img, const int img_size, const float saturation, const float brightness, const float contrast, std::mt19937 *randgen) |
|
template<class Context > |
void | ColorLighting (float *img, const int img_size, const float alpha_std, const std::vector< std::vector< float > > &eigvecs, const std::vector< float > &eigvals, std::mt19937 *randgen) |
|
template<class Context > |
void | ColorNormalization (float *img, const int img_size, const int channels, const std::vector< float > &mean, const std::vector< float > &std) |
|
template<class Context > |
void | TransformImage (const cv::Mat &scaled_img, const int channels, float *image_data, const bool color_jitter, const float saturation, const float brightness, const float contrast, const bool color_lighting, const float color_lighting_std, const std::vector< std::vector< float > > &color_lighting_eigvecs, const std::vector< float > &color_lighting_eigvals, const int crop, const bool mirror, const std::vector< float > &mean, const std::vector< float > &std, std::mt19937 *randgen, std::bernoulli_distribution *mirror_this_image, bool is_test=false) |
|
template<class Context > |
void | CropTransposeImage (const cv::Mat &scaled_img, const int channels, uint8_t *cropped_data, const int crop, const bool mirror, std::mt19937 *randgen, std::bernoulli_distribution *mirror_this_image, bool is_test=false) |
|
| REGISTER_CUDA_OPERATOR (ImageInput, ImageInputOp< CUDAContext >) |
|
template<typename T_IN , typename T_OUT , class Context > |
bool | TransformOnGPU (Tensor &X, Tensor *Y, Tensor &mean, Tensor &std, Context *context) |
|
bool | tryConvertToMPSCNN (const NetDef &initNet, const NetDef &predictNet, NetDef *mpscnnPredictNet) |
|
NetDef | annotateDefWithReadCounts (const NetDef &net) |
|
NetDef | rewriteForMetal (const NetDef &net) |
|
NetDef | runMPSCNNFusion (const NetDef &net) |
|
void | dumpDef (const NetDef &d) |
|
void | mpscnnRecordExecutionFinish () |
|
MPSCNNContext & | getMPSCNNContext () |
|
Analysis | analyzeNet (const NetDef &net) |
|
static void | rewriteInput (OperatorDef *op, int i) |
|
static void | rewriteOutput (OperatorDef *op, int i) |
|
static void | insertOutputCopyFromMPSCNNOp (NetDef &predictNet, const std::vector< std::string > &cpu_blobs) |
|
NetDef | insertInputOutputCopyOps (const NetDef &def) |
|
bool | nextIsOnlyUserOfCurrent (const Analysis &analysis, size_t currentIdx, const OperatorDef ¤tOp, const OperatorDef &nextOp) |
|
bool | tryFuseAdjacentOps (const Analysis &analysis, size_t currentIdx, const OperatorDef ¤tOp, const OperatorDef &nextOp, OperatorDef *fusedOp) |
|
bool | tryConvertToMPSCNNIntermediateCopies (const NetDef &initNet, const NetDef &predictNet, NetDef *mpscnnPredictNet) |
|
NetDef | setSpecialArgs (const NetDef &def) |
|
void | testMPSCNN () |
|
void | compareModels (const NetDef &initNet, NetDef predictNet) |
|
void | ver |