#include "./comp_node_helper.h"
#include "megbrain/opr/basic_arith_wrapper.h"
#include "megbrain/opr/io.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/utility.h"
#include "megbrain/serialization/serializer.h"
using namespace mgb;
using namespace comp_node_test;
namespace {
void run_comp_seq_rec_basic(CompNode cn, bool fake_first) {
using ConvParam = opr::Convolution::Param;
ConvParam param;
param.sparse = ConvParam::Sparse::GROUP;
HostTensorGenerator<> gen;
auto host_x = gen({3, 4, 10, 8}, cn), host_y = gen({2, 3, 2, 3, 3}, cn);
int iter = 0;
std::vector<int> executed;
HostTensorND host_z;
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Host2DeviceCopy::make(*graph, host_y),
z = opr::CallbackInjector::make(
opr::Convolution::make(x, y, param),
[&](DeviceTensorND&dv) { executed.push_back(iter); });
graph->options().comp_node_seq_record_level = 1;
if (fake_first) {
graph->options().fake_next_exec = true;
graph->options().var_sanity_check_first_run = false;
}
auto func = graph->compile({make_callback_copy(z, host_z)});
if (fake_first) {
func->execute(); }
int change = 5;
for (; iter < 10; ++iter) {
if (iter == change) {
*host_x = *gen({2, 4, 15, 13}, cn);
}
host_x->copy_from_fixlayout(*gen(host_x->shape(), cn));
func->execute();
auto expect = eval_conv_cpu<opr::Convolution>(*host_x, *host_y, param);
MGB_ASSERT_TENSOR_NEAR(expect, host_z, 1e-3) << "iter " << iter;
}
ASSERT_EQ(executed.size(), 4u);
ASSERT_EQ(executed[0], 0);
ASSERT_EQ(executed[1], fake_first ? 0 : 1);
ASSERT_EQ(executed[2], change);
ASSERT_EQ(executed[3], change + 1);
}
void run_comp_seq_rec_basic_level2(CompNode cn) {
using ConvParam = opr::ConvBias::Param;
ConvParam param;
param.sparse = ConvParam::Sparse::GROUP;
HostTensorGenerator<> gen;
auto host_x = gen({3, 4, 10, 8}, cn), host_y = gen({2, 3, 2, 3, 3}, cn);
int iter = 0;
std::vector<int> executed;
HostTensorND host_z;
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Host2DeviceCopy::make(*graph, host_y),
z = opr::CallbackInjector::make(
opr::ConvBias::make(x, y, param),
[&](DeviceTensorND&dv) { executed.push_back(iter); });
graph->options().comp_node_seq_record_level = 2;
graph->options().var_sanity_check_first_run = false;
auto func = graph->compile({make_callback_copy(z, host_z)});
ComputingGraph::assert_destroy(graph);
for (; iter < 10; ++iter) {
host_x->copy_from_fixlayout(*gen(host_x->shape(), cn));
func->execute();
auto expect = eval_conv_cpu<opr::ConvBias>(*host_x, *host_y, param);
MGB_ASSERT_TENSOR_NEAR(expect, host_z, 1e-3) << "iter " << iter;
}
ASSERT_EQ(executed.size(), 2u);
{
HostTensorND hz;
graph = ComputingGraph::make();
x = opr::Host2DeviceCopy::make(*graph, host_x);
y = opr::Host2DeviceCopy::make(*graph, host_y);
z = opr::ConvBias::make(x, y, param);
z = opr::GetVarShape::make(z);
graph->options().comp_node_seq_record_level = 2;
graph->options().var_sanity_check_first_run = false;
auto func = graph->compile({make_callback_copy(z, hz, true)});
ComputingGraph::assert_destroy(graph);
func->execute();
ASSERT_TRUE(hz.comp_node() == cn);
ASSERT_EQ(hz.ptr<int>()[0], 3);
ASSERT_EQ(hz.ptr<int>()[1], 6);
ASSERT_EQ(hz.ptr<int>()[2], 8);
ASSERT_EQ(hz.ptr<int>()[3], 6);
}
}
void run_comp_seq_rec_dyn_elemwise(CompNode cn, bool fake_first) {
HostTensorGenerator<> gen;
auto host_x = gen({3, 3}, cn), host_y = gen({1, 3}, cn), host_z = gen({3, 1}, cn);
auto check = [&]() {
HostTensorND ret(CompNode::load("cpux"), host_x->shape());
auto px = host_x->ptr<float>(), py = host_y->ptr<float>(),
pz = host_z->ptr<float>(), pw = ret.ptr<float>();
auto sz0 = host_x->shape()[0], sz1 = host_x->shape()[1];
for (size_t i = 0; i < sz0; ++i) {
for (size_t j = 0; j < sz1; ++j) {
pw[i * sz1 + j] = px[i * sz1 + j] * py[j] + pz[i];
}
}
return ret;
};
auto graph = ComputingGraph::make();
graph->options().var_sanity_check_first_run = false;
graph->options().graph_opt_level = 0;
graph->options().comp_node_seq_record_level = 1;
if (fake_first) {
graph->options().fake_next_exec = true;
}
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Host2DeviceCopy::make(*graph, host_y),
z = opr::Host2DeviceCopy::make(*graph, host_z),
w = opr::Elemwise::make({x, y, z}, opr::Elemwise::Mode::FUSE_MUL_ADD3);
HostTensorND host_w;
auto func = graph->compile({make_callback_copy(w, host_w)});
if (fake_first) {
func->execute();
}
for (int i = 0; i < 10; ++i) {
if (i == 5) {
*host_x = *gen({10, 8}, cn);
*host_y = *gen({1, 8}, cn);
*host_z = *gen({10, 1}, cn);
}
host_x->copy_from(*gen(host_x->shape(), cn));
func->execute();
auto expect = check();
MGB_ASSERT_TENSOR_EQ(expect, host_w) << "iter " << i;
}
}
void run_level2(CompNode cn, bool use_multi_holder) {
HostTensorGenerator<> gen;
auto host_x = gen({4, 3, 6, 7}, cn), host_w = gen({2, 3, 2, 3}, cn),
host_y = gen({1, 25}, cn), host_z = gen({8, 1}, cn),
host_large = gen({8, 25}, cn);
auto make_func = [&](bool enable) -> thin_function<const HostTensorND&()> {
auto graph = ComputingGraph::make();
graph->options().graph_opt_level = 0;
if (enable) {
graph->options().var_sanity_check_first_run = false;
graph->options().comp_node_seq_record_level = 2;
}
auto repeat2 = [](SymbolVar x) { return opr::Concat::make({x, x}, 0); };
SymbolVar w;
auto dev_w = std::make_shared<DeviceTensorND>();
if (use_multi_holder) {
dev_w->copy_from(*host_w).sync();
w = opr::MultipleDeviceTensorHolder::make(*graph, {dev_w})[0];
} else {
w = opr::SharedDeviceTensor::make(*graph, *host_w);
}
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
c = opr::Convolution::make(x, w).reshape({8, 25}),
y = opr::Host2DeviceCopy::make(*graph, host_y),
large = opr::ImmutableTensor::make(*graph, *host_large),
z = opr::Host2DeviceCopy::make(*graph, host_z),
t0 = opr::Elemwise::make(
{c, y, z}, opr::Elemwise::Mode::FUSE_MUL_ADD3) +
large,
t1 = opr::reduce_sum(t0, z.symshape()),
t2 = opr::Elemwise::make(
{repeat2(c), y, repeat2(t1)}, opr::Elemwise::Mode::FUSE_MUL_ADD3),
large1 = opr::ImmutableTensor::make(*graph, *host_large);
t2 * 2;
graph->static_infer_manager().infer_value(large.node());
graph->static_infer_manager().infer_value(large1.node());
graph->static_infer_manager().infer_value((t1.symshape() + 1).node());
auto result = std::make_shared<HostTensorND>();
auto func = graph->compile({make_callback_copy(t2, *result)});
std::shared_ptr<cg::AsyncExecutable> sh_func(func.release());
if (enable) {
ComputingGraph::assert_destroy(graph);
}
auto exec = [result, sh_func]() -> const HostTensorND& {
sh_func->execute();
return *result;
};
return exec;
};
auto f0 = make_func(false), f1 = make_func(true);
for (int i = 0; i < 3; ++i) {
host_x->copy_from(*gen(host_x->shape(), cn));
host_y->copy_from(*gen(host_y->shape(), cn));
host_z->copy_from(*gen(host_z->shape(), cn));
auto&& expect = f0();
auto&& get = f1();
MGB_ASSERT_TENSOR_EQ(expect, get);
}
host_x->resize({1});
ASSERT_THROW(f1(), MegBrainError);
}
}
namespace mgb {
namespace comp_node_test {
namespace seq_rec {
template <>
void run<basic>(CompNode cn) {
run_comp_seq_rec_basic(cn, false);
}
template <>
void run<basic_level2>(CompNode cn) {
run_comp_seq_rec_basic_level2(cn);
}
template <>
void run<basic_fake_exec>(CompNode cn) {
run_comp_seq_rec_basic(cn, true);
}
template <>
void run<dyn_elemwise>(CompNode cn) {
run_comp_seq_rec_dyn_elemwise(cn, false);
}
template <>
void run<dyn_elemwise_fake_exec>(CompNode cn) {
run_comp_seq_rec_dyn_elemwise(cn, true);
}
template <>
void run<level2>(CompNode cn) {
run_level2(cn, false);
}
template <>
void run<level2_multi_holder>(CompNode cn) {
run_level2(cn, true);
}
template <>
void run<level2_share_storage>(CompNode cn) {
HostTensorGenerator<> gen;
auto host_x = gen({1}, cn), host_y = gen({1}, cn), host_z = gen({10}, cn);
auto make_func =
[&](bool enable) -> thin_function<std::array<const HostTensorND*, 2>()> {
auto g0 = ComputingGraph::make(), g1 = ComputingGraph::make();
if (enable) {
g0->options().var_sanity_check_first_run = false;
g0->options().comp_node_seq_record_level = 2;
g1->options().var_sanity_check_first_run = false;
g1->options().comp_node_seq_record_level = 2;
g0->share_device_memory_with(*g1);
}
auto x0 = opr::Host2DeviceCopy::make(*g0, host_x),
x1 = opr::Host2DeviceCopy::make(*g1, host_x),
y = opr::Host2DeviceCopy::make(*g0, host_y),
z = opr::Host2DeviceCopy::make(*g1, host_z);
auto t0 = x0 + y, t1 = x1 + z;
auto host_t0 = std::make_shared<HostTensorND>(),
host_t1 = std::make_shared<HostTensorND>();
auto f0 = g0->compile({make_callback_copy(t0, *host_t0)});
auto f1 = g1->compile({make_callback_copy(t1, *host_t1)});
std::shared_ptr<cg::AsyncExecutable> sh_f0(f0.release()), sh_f1(f1.release());
if (enable) {
ComputingGraph::assert_destroy(g0);
ComputingGraph::assert_destroy(g1);
}
auto exec = [host_t0, host_t1, sh_f0,
sh_f1]() -> std::array<const HostTensorND*, 2> {
sh_f0->execute();
sh_f1->execute();
return {host_t0.get(), host_t1.get()};
};
return exec;
};
auto f0 = make_func(false), f1 = make_func(true);
for (int i = 0; i < 3; ++i) {
host_x->copy_from(*gen(host_x->shape(), cn));
host_y->copy_from(*gen(host_y->shape(), cn));
host_z->copy_from(*gen(host_z->shape(), cn));
auto&& expect = f0();
auto&& get = f1();
MGB_ASSERT_TENSOR_EQ(*expect[0], *get[0]);
MGB_ASSERT_TENSOR_EQ(*expect[1], *get[1]);
}
}
template <>
void run<level2_exec_check>(CompNode cn) {
HostTensorGenerator<> gen;
auto host_x = gen({1}, cn);
for (int testcase = 0; testcase < 3; ++testcase) {
host_x->copy_from(*gen({1}));
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x), y = x * 2;
HostTensorND host_y;
graph->options().var_sanity_check_first_run = false;
graph->options().comp_node_seq_record_level = 2;
auto func = graph->compile({make_callback_copy(y, host_y)});
ASSERT_EQ(host_y.shape(), host_x->shape());
auto expect = host_x->ptr<float>()[0] * 2;
ASSERT_NE(expect, host_y.ptr<float>()[0]);
if (testcase == 0) {
ComputingGraph::assert_destroy(graph);
func->execute();
ASSERT_EQ(expect, host_y.ptr<float>()[0]);
} else if (testcase == 1) {
ASSERT_THROW(func->execute(), MegBrainError);
} else {
func.reset();
graph.reset();
}
};
}
template <>
void run<sync_from_func>(CompNode cn) {
REQUIRE_THREAD();
HostTensorGenerator<> gen;
auto host_x = gen({1}, cn);
for (int level : {1, 2}) {
for (bool sync : {false, true}) {
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Sleep::make(x, 0.15) * 2;
HostTensorND host_y;
graph->options().var_sanity_check_first_run = false;
graph->options().comp_node_seq_record_level = level;
auto cb = [&](const DeviceTensorND& dv) {
host_y.copy_from(dv);
if (sync) {
host_y.sync();
}
};
auto func = graph->compile({{y, cb}});
if (level == 2) {
ComputingGraph::assert_destroy(graph);
}
for (int i = 0; i < 3; ++i) {
host_x->ptr<float>()[0] = i + 0.3;
func->execute();
if (!sync) {
func->wait();
}
auto got = host_y.ptr<float>()[0];
MGB_ASSERT_FLOAT_EQ((i + 0.3) * 2, got)
<< "level=" << level << " i=" << i;
}
}
}
}
template <>
void run<cb_non_contig>(CompNode cn) {
REQUIRE_THREAD();
HostTensorGenerator<> gen;
auto host_x = gen({4, 5}, cn);
for (int level : {1, 2}) {
for (bool sync : {false, true}) {
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Dimshuffle::make(x, {1, 0});
HostTensorND host_y;
graph->options().var_sanity_check_first_run = false;
graph->options().comp_node_seq_record_level = level;
auto cb = [&](const DeviceTensorND& dv) {
host_y.copy_from(dv);
if (sync) {
host_y.sync();
}
};
auto func = graph->compile({{y, cb}});
if (level == 2) {
ComputingGraph::assert_destroy(graph);
}
for (int i = 0; i < 3; ++i) {
host_x->copy_from(*gen(host_x->shape()));
HostTensorND expect{host_x->comp_node(), {5, 4}};
auto px = host_x->ptr<float>(), py = expect.ptr<float>();
for (int i = 0; i < 5; ++i) {
for (int j = 0; j < 4; ++j) {
py[i * 4 + j] = px[j * 5 + i];
}
}
func->execute();
if (!sync) {
func->wait();
}
MGB_ASSERT_TENSOR_EQ(expect, host_y);
}
}
}
}
template <>
void run<shape_dep_const_shape>(CompNode cn) {
using namespace serialization;
HostTensorGenerator<> gen;
auto host_x = gen({4, 5}, cn);
auto fname = output_file("test_comp_node_record_shape_dep_const_shape");
HostTensorND y_expect;
{
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x, OperatorNodeConfig{"x"}),
y = x.flatten() +
opr::reduce_sum(opr::GetVarShape::make(x), x.make_scalar(1));
graph->compile({make_callback_copy(y, y_expect)})->execute();
auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()));
dumper->dump({y});
}
HostTensorND host_y;
{
GraphLoadConfig config;
config.const_var_shape = true;
auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()));
auto load_rst = loader->load(config);
load_rst.graph->options().comp_node_seq_record_level = 2;
load_rst.graph->options().var_sanity_check_first_run = false;
auto x_inp = load_rst.tensor_map.at("x");
auto y = load_rst.output_var_list.at(0);
auto func = load_rst.graph_compile({make_callback_copy(y, host_y)});
x_inp->copy_from(*host_x);
func->execute();
}
MGB_ASSERT_TENSOR_EQ(y_expect, host_y);
}
template <>
void run<multi_recorder_run>(CompNode cn) {
using ConvParam = opr::Convolution::Param;
ConvParam param;
param.sparse = ConvParam::Sparse::GROUP;
HostTensorGenerator<> gen;
std::vector<HostTensorND> host_z_v(2, HostTensorND());
std::vector<std::unique_ptr<mgb::cg::AsyncExecutable>> funcs;
auto host_x = gen({3, 4, 10, 8}, cn), host_y = gen({2, 3, 2, 3, 3}, cn);
auto gen_graph = [&](int graph_id) -> std::unique_ptr<mgb::cg::AsyncExecutable> {
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Host2DeviceCopy::make(*graph, host_y),
z = opr::Convolution::make(x, y, param);
graph->options().comp_node_seq_record_level = 1;
return graph->compile({make_callback_copy(z, host_z_v[graph_id])});
};
funcs.push_back(gen_graph(0));
funcs.push_back(gen_graph(1));
for (int iter = 0; iter < 10; ++iter) {
host_x->copy_from_fixlayout(*gen(host_x->shape(), cn));
funcs[0]->execute();
funcs[1]->execute();
auto expect = eval_conv_cpu<opr::Convolution>(*host_x, *host_y, param);
MGB_ASSERT_TENSOR_NEAR(expect, host_z_v[0], 1e-3) << "iter " << iter;
MGB_ASSERT_TENSOR_NEAR(expect, host_z_v[1], 1e-3) << "iter " << iter;
}
}
template <>
void run<void>(CompNode) {}
} } }