4#include <gtest/gtest.h>
12class BatchMatMulOpGPUTest :
public testing::Test {
14 void SetUp()
override {
18 option_.set_device_type(PROTO_CUDA);
20 def_.set_name(
"test");
21 def_.set_type(
"BatchMatMul");
25 def_.mutable_device_option()->set_device_type(PROTO_CUDA);
29 const std::vector<int64_t>&
dims,
35 math::Set<float, CUDAContext>(
38 tensor->template mutable_data<float>(),
42 void VerifyOutput(
const std::vector<int64_t>&
dims,
const float value)
const {
43 const Blob* Y_blob =
ws_.GetBlob(
"Y");
44 ASSERT_NE(
nullptr, Y_blob);
45 const auto&
Y = Y_blob->Get<
Tensor>();
47 const auto Y_dims = Y_cpu.sizes();
48 ASSERT_EQ(
dims.size(), Y_dims.size());
49 for (std::size_t i = 0;
i <
dims.size(); ++
i) {
50 ASSERT_EQ(
dims[i], Y_dims[i]);
52 for (
int i = 0;
i < Y_cpu.numel(); ++
i) {
53 EXPECT_FLOAT_EQ(
value, Y_cpu.data<
float>()[i]);
63TEST_F(BatchMatMulOpGPUTest, BatchMatMulOpGPUNormalTest) {
70 ASSERT_NE(
nullptr,
op);
72 VerifyOutput(std::vector<int64_t>{3, 5, 6}, 10.0f);
75TEST_F(BatchMatMulOpGPUTest, BatchMatMulOpGPUBroadcastTest) {
80 arg->set_name(
"broadcast");
85 ASSERT_NE(
nullptr,
op);
87 VerifyOutput(std::vector<int64_t>{2, 3, 5, 6}, 10.0f);
std::unique_ptr< CUDAContext > cuda_context_
def DeviceOption(device_type, device_id=0, random_seed=None, node_name=None, numa_node_id=None, extra_info=None)
Copyright (c) 2016-present, Facebook, Inc.
Tensor * BlobGetMutableTensor(Blob *blob, at::IntArrayRef dims, at::TensorOptions options)
void AddConstInput(const vector< int64_t > &shape, const float value, const string &name, Workspace *ws)
core.CreateOperator("Slice",["X"],["Y"], starts=(0, 1), ends=(-1, 3)) workspace.FeedBlob("X", np.array()) print("X:", workspace.FetchBlob("X")) workspace.RunOperatorOnce(op) print("Y:", workspace.FetchBlob("Y")) ``` **Result **``` X:Y:```</details >) DOC") .Input(0, "X", "(*Tensor *):tensor to extract slices from") .Input( 1, "starts", "(*Tensor`< int >` *):1D tensor of start-indices for each dimension of data(dimensions following the sliced one might be omitted)") .Input( 2, "ends", "(*Tensor`< int >` *):1D tensor of end-indices for each dimension of data(dimensions following the sliced one might be omitted)") .Arg("starts", "(*Tuple(int) *):list of starting indices") .Arg("ends", "(*Tuple(int) *):list of ending indices") .TensorInferenceFunction([](const OperatorDef& def, const vector<TensorShape>& in) { if (in.size() > 1) { return vector<TensorShape>() op
reconstruct values together according to masks A comprehensive False False True Reconstruct Note that for all mask there must be at least one True This is not False False we accept the first value
bool HasCudaGPU()
Check if the current running session has a cuda gpu present.
constexpr DeviceType CUDA
Output tensor quantization scale the filter blob
The common world The allreduced tensor
unique_ptr< OperatorBase > CreateOperator(const OperatorDef &operator_def, Workspace *ws, int net_position)
*and produces a single output tensor *expanded *The op also takes an argument *dims *with a list of dimensions for where to add the single dimensional entries If the same blob is provided as input and the operation is copy free This is the exact inverse operation of *Squeeze *Github dims
TEST_F(StringJoinOpTest, testString1DJoin)