pytorch  1.8.2
About: PyTorch provides Tensor computation (like NumPy) with strong GPU acceleration and Deep Neural Networks (in Python) built on a tape-based autograd system. LTS (Long Term Support) release.
  Fossies Dox: pytorch-1.8.2.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

batch_matmul_op_gpu_test.cc
Go to the documentation of this file.
1#include <memory>
2#include <vector>
3
4#include <gtest/gtest.h>
5
8
9namespace caffe2 {
10namespace {
11
12class BatchMatMulOpGPUTest : public testing::Test {
13 protected:
14 void SetUp() override {
15 if (!HasCudaGPU()) {
16 return;
17 }
18 option_.set_device_type(PROTO_CUDA);
19 cuda_context_ = make_unique<CUDAContext>(option_);
20 def_.set_name("test");
21 def_.set_type("BatchMatMul");
22 def_.add_input("A");
23 def_.add_input("B");
24 def_.add_output("Y");
25 def_.mutable_device_option()->set_device_type(PROTO_CUDA);
26 }
27
28 void AddConstInput(
29 const std::vector<int64_t>& dims,
30 const float value,
31 const string& name) {
32 Blob* blob = ws_.CreateBlob(name);
34 tensor->Resize(dims);
35 math::Set<float, CUDAContext>(
36 tensor->numel(),
37 value,
38 tensor->template mutable_data<float>(),
39 cuda_context_.get());
40 }
41
42 void VerifyOutput(const std::vector<int64_t>& dims, const float value) const {
43 const Blob* Y_blob = ws_.GetBlob("Y");
44 ASSERT_NE(nullptr, Y_blob);
45 const auto& Y = Y_blob->Get<Tensor>();
46 Tensor Y_cpu(Y, CPU);
47 const auto Y_dims = Y_cpu.sizes();
48 ASSERT_EQ(dims.size(), Y_dims.size());
49 for (std::size_t i = 0; i < dims.size(); ++i) {
50 ASSERT_EQ(dims[i], Y_dims[i]);
51 }
52 for (int i = 0; i < Y_cpu.numel(); ++i) {
53 EXPECT_FLOAT_EQ(value, Y_cpu.data<float>()[i]);
54 }
55 }
56
58 std::unique_ptr<CUDAContext> cuda_context_;
60 OperatorDef def_;
61};
62
63TEST_F(BatchMatMulOpGPUTest, BatchMatMulOpGPUNormalTest) {
64 if (!HasCudaGPU()) {
65 return;
66 }
67 AddConstInput(std::vector<int64_t>{3, 5, 10}, 1.0f, "A");
68 AddConstInput(std::vector<int64_t>{3, 10, 6}, 1.0f, "B");
69 std::unique_ptr<OperatorBase> op(CreateOperator(def_, &ws_));
70 ASSERT_NE(nullptr, op);
71 ASSERT_TRUE(op->Run());
72 VerifyOutput(std::vector<int64_t>{3, 5, 6}, 10.0f);
73}
74
75TEST_F(BatchMatMulOpGPUTest, BatchMatMulOpGPUBroadcastTest) {
76 if (!HasCudaGPU()) {
77 return;
78 }
79 auto* arg = def_.add_arg();
80 arg->set_name("broadcast");
81 arg->set_i(1);
82 AddConstInput(std::vector<int64_t>{3, 5, 10}, 1.0f, "A");
83 AddConstInput(std::vector<int64_t>{2, 3, 10, 6}, 1.0f, "B");
84 std::unique_ptr<OperatorBase> op(CreateOperator(def_, &ws_));
85 ASSERT_NE(nullptr, op);
86 ASSERT_TRUE(op->Run());
87 VerifyOutput(std::vector<int64_t>{2, 3, 5, 6}, 10.0f);
88}
89
90} // namespace
91} // namespace caffe2
OperatorDef def_
DeviceOption option_
std::unique_ptr< CUDAContext > cuda_context_
Workspace ws_
#define ASSERT_TRUE(cmd)
std::string name
def DeviceOption(device_type, device_id=0, random_seed=None, node_name=None, numa_node_id=None, extra_info=None)
Definition: core.py:101
Copyright (c) 2016-present, Facebook, Inc.
Definition: blob.h:13
Tensor * BlobGetMutableTensor(Blob *blob, at::IntArrayRef dims, at::TensorOptions options)
Definition: blob.h:65
void AddConstInput(const vector< int64_t > &shape, const float value, const string &name, Workspace *ws)
core.CreateOperator("Slice",["X"],["Y"], starts=(0, 1), ends=(-1, 3)) workspace.FeedBlob("X", np.array()) print("X:", workspace.FetchBlob("X")) workspace.RunOperatorOnce(op) print("Y:", workspace.FetchBlob("Y")) ``` **Result **``` X:Y:```</details >) DOC") .Input(0, "X", "(*Tensor *):tensor to extract slices from") .Input( 1, "starts", "(*Tensor`< int >` *):1D tensor of start-indices for each dimension of data(dimensions following the sliced one might be omitted)") .Input( 2, "ends", "(*Tensor`< int >` *):1D tensor of end-indices for each dimension of data(dimensions following the sliced one might be omitted)") .Arg("starts", "(*Tuple(int) *):list of starting indices") .Arg("ends", "(*Tuple(int) *):list of ending indices") .TensorInferenceFunction([](const OperatorDef& def, const vector<TensorShape>& in) { if (in.size() > 1) { return vector<TensorShape>() op
Definition: slice_op.cc:82
constexpr DeviceType CPU
Definition: caffe2_pb.h:9
reconstruct values together according to masks A comprehensive False False True Reconstruct Note that for all mask there must be at least one True This is not False False we accept the first value
bool HasCudaGPU()
Check if the current running session has a cuda gpu present.
Definition: common_gpu.h:120
constexpr DeviceType CUDA
Definition: caffe2_pb.h:10
Output tensor quantization scale the filter blob
The common world The allreduced tensor
unique_ptr< OperatorBase > CreateOperator(const OperatorDef &operator_def, Workspace *ws, int net_position)
Definition: operator.cc:356
*and produces a single output tensor *expanded *The op also takes an argument *dims *with a list of dimensions for where to add the single dimensional entries If the same blob is provided as input and the operation is copy free This is the exact inverse operation of *Squeeze *Github dims
TEST_F(StringJoinOpTest, testString1DJoin)
string arg
Definition: setup.py:234