1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
crate::ix!();
#[inline] pub fn execute_gpu_binary_op_test(
shapex0: i32,
shapex1: i32,
shapey: i32,
input0: fn(unnamed_0: i32) -> f32,
input1: fn(unnamed_0: i32) -> f32,
operation: fn(
n0: i32,
n1: i32,
src0: *const f32,
src1: *const f32,
dst: *mut f32,
context: *mut CUDAContext
) -> (),
correct_output: fn(unnamed_0: i32) -> f32) {
todo!();
/*
if (!HasCudaGPU())
return;
Workspace ws;
DeviceOption option;
option.set_device_type(PROTO_CUDA);
CUDAContext context(option);
Blob* blobx0 = ws.CreateBlob("X0");
Blob* blobx1 = ws.CreateBlob("X1");
Blob* bloby = ws.CreateBlob("Y");
Blob* bloby_host = ws.CreateBlob("Y_host");
auto* tensorx0 = BlobGetMutableTensor(blobx0, CUDA);
auto* tensorx1 = BlobGetMutableTensor(blobx1, CUDA);
auto* tensory = BlobGetMutableTensor(bloby, CUDA);
vector<int> shapex0_vector{shapex0};
vector<int> shapex1_vector{shapex1};
vector<int> shapey_vector{shapey};
tensorx0->Resize(shapex0_vector);
tensorx1->Resize(shapex1_vector);
tensory->Resize(shapey_vector);
for (int i = 0; i < shapex0; i++) {
math::Set<float, CUDAContext>(
1, input0(i), tensorx0->mutable_data<float>() + i, &context);
}
for (int i = 0; i < shapex1; i++) {
math::Set<float, CUDAContext>(
1, input1(i), tensorx1->mutable_data<float>() + i, &context);
}
operation(
shapex0,
shapex1,
tensorx0->template data<float>(),
tensorx1->template data<float>(),
tensory->mutable_data<float>(),
&context);
context.FinishDeviceComputation();
// Copy result to CPU so we can inspect it
auto* tensory_host = BlobGetMutableTensor(bloby_host, CPU);
tensory_host->CopyFrom(*tensory);
for (int i = 0; i < shapey; ++i) {
EXPECT_EQ(tensory_host->data<float>()[i], correct_output(i));
}
*/
}