#include "megbrain/test/helper.h"
#include "megbrain/opr/basic_arith_wrapper.h"
#include "megbrain/opr/blas.h"
#include "megbrain/opr/dnn/convolution.h"
#include "megbrain/opr/io.h"
using namespace mgb;
using Elemwise = opr::Elemwise;
using Mode = Elemwise::Mode;
#if MGB_ENABLE_MEMORY_SWAP
auto run = [](const int flag) {
auto KEY = "MGB_MEMORY_SWAP_PARAM_BUCKET_IMPLEMENT";
auto old_value = getenv(KEY);
if (flag)
setenv(KEY, "1", 1);
else
setenv(KEY, "0", 1);
HostTensorGenerator<> gen_;
auto gen = [&](const TensorShape& shp) { return gen_(shp, "gpu0"); };
constexpr size_t batch_size = 5, C = 8, H = 100, W = 128;
constexpr size_t limit = 200;
auto host_data = gen({batch_size, C, H, W});
auto graph = ComputingGraph::make();
SymbolVarArray kernels;
SymbolVarArray conv_res;
auto data = opr::Host2DeviceCopy::make(*graph, host_data).rename("data");
conv_res.push_back(data);
size_t out_chl = host_data->shape(1), layer_count = 0;
auto add_layer = [&](size_t oc, size_t kernal_shape, size_t padding) {
gen_.std(sqrt(2.0 / (out_chl * kernal_shape * kernal_shape)));
auto host_kern = gen({oc, out_chl, kernal_shape, kernal_shape});
auto dev_kern = std::make_shared<DeviceTensorND>();
dev_kern->copy_from(*host_kern);
auto current_param = opr::Convolution::Param();
kernels.emplace_back(opr::SharedDeviceTensor::make(*graph, dev_kern));
current_param.pad_h = current_param.pad_w = padding;
conv_res.push_back(opr::relu(opr::Convolution::make(
conv_res[layer_count],
kernels.back().rename(ssprintf("param%zu", layer_count)),
current_param)));
layer_count++;
out_chl = oc;
};
for (size_t i = 1; i <= limit; ++i)
add_layer(30, 5, 2);
auto loss = opr::Dot::make(conv_res[limit].flatten(), conv_res[limit].flatten());
std::vector<HostTensorND> grad_kernels_get(kernels.size());
ComputingGraph::OutputSpec out_spec;
for (size_t i = 0; i < kernels.size(); ++i) {
out_spec.emplace_back(
make_callback_copy(cg::grad(loss, kernels[i]), grad_kernels_get[i]));
}
std::vector<HostTensorND> grad_kernels_expect(grad_kernels_get.size());
for (bool swap : {false, true}) {
graph->options().enable_memory_swap = swap;
auto func = graph->compile(out_spec);
func->execute();
if (!swap) {
for (size_t i = 0; i < grad_kernels_get.size(); ++i)
grad_kernels_expect[i].copy_from(grad_kernels_get[i]);
}
}
for (size_t i = 0; i < grad_kernels_get.size(); ++i)
MGB_ASSERT_TENSOR_NEAR(grad_kernels_get[i], grad_kernels_expect[i], 1e-3);
if (old_value) {
setenv(KEY, old_value, 1);
} else {
unsetenv(KEY);
}
};
TEST(TestMemorySwap, FullConvSerial) {
REQUIRE_GPU(1);
run(0);
}
TEST(TestMemorySwap, FullConvParallel) {
REQUIRE_GPU(1);
run(0);
}
#endif