#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/basic_arith_wrapper.h"
#include "megbrain/opr/blas.h"
#include "megbrain/opr/io.h"
#include "megbrain/opr/misc.h"
#include "megbrain/opr/tensor_gen.h"
#include "megbrain/opr/utility.h"
#include "megbrain/test/autocheck.h"
#include "megbrain/test/helper.h"
#include "megbrain/test/megdnn_helper.h"
#include "megbrain/utils/arith_helper.h"
using namespace mgb;
using namespace opr;
TEST(TestTensorManip, GetVarShape) {
HostTensorGenerator<> gen;
auto host_x = gen({3, 1}), host_y = gen({1, 2});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Host2DeviceCopy::make(*graph, host_y),
z0 = opr::GetVarShape::make({x, y, x.make_scalar(5)}),
z1 = opr::GetVarShape::make({x, y}, 1);
ASSERT_EQ(2u, z0.node()->owner_opr()->input().size());
constexpr auto tdt = cg::OperatorNodeBase::NodeProp::DepType::SHAPE;
auto&& dt = z0.node()->owner_opr()->node_prop().dep_map();
ASSERT_EQ(2u, dt.size());
ASSERT_EQ(tdt, dt.at(x.node()));
ASSERT_EQ(tdt, dt.at(y.node()));
auto as_shp = [](const HostTensorND& hv) {
mgb_assert(hv.dtype() == dtype::Int32());
mgb_assert(hv.shape().ndim == 1);
TensorShape ret;
ret.ndim = hv.shape()[0];
auto p = hv.ptr<int>();
for (size_t i = 0; i < ret.ndim; ++i)
ret[i] = p[i];
return ret;
};
HostTensorND host_z0, host_z1;
auto func = graph->compile(
{make_callback_copy(z0, host_z0), make_callback_copy(z1, host_z1)});
func->execute();
ASSERT_EQ(TensorShape({3, 2}), as_shp(host_z0));
ASSERT_EQ(TensorShape({2}), as_shp(host_z1));
*host_x = *gen({5, 1, 6});
*host_y = *gen({1, 8, 1});
func->execute();
ASSERT_EQ(TensorShape({5, 8, 6}), as_shp(host_z0));
ASSERT_EQ(TensorShape({8}), as_shp(host_z1));
}
TEST(TestTensorManip, GetVarShapeBypass) {
HostTensorGenerator<> gen;
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, gen({3, 2})),
t = opr::Host2DeviceCopy::make(*graph, gen({2, 3})),
tshp = opr::GetVarShape::make(t),
y = opr::GetVarShape::make(opr::Reshape::make(x, tshp));
ASSERT_EQ(tshp, y);
}
TEST(TestTensorManip, GetVarShapeNegativeAxis) {
HostTensorGenerator<> gen;
auto host_x = gen({1, 3}), host_y = gen({2, 1});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Host2DeviceCopy::make(*graph, host_y),
z0 = opr::GetVarShape::make({x, y}, -1),
z1 = opr::GetVarShape::make({x, y}, -2);
ASSERT_EQ(2u, z0.node()->owner_opr()->input().size());
constexpr auto tdt = cg::OperatorNodeBase::NodeProp::DepType::SHAPE;
auto&& dt = z0.node()->owner_opr()->node_prop().dep_map();
ASSERT_EQ(2u, dt.size());
ASSERT_EQ(tdt, dt.at(x.node()));
ASSERT_EQ(tdt, dt.at(y.node()));
auto as_shp = [](const HostTensorND& hv) {
mgb_assert(hv.dtype() == dtype::Int32());
mgb_assert(hv.shape().ndim == 1);
TensorShape ret;
ret.ndim = hv.shape()[0];
auto p = hv.ptr<int>();
for (size_t i = 0; i < ret.ndim; ++i)
ret[i] = p[i];
return ret;
};
HostTensorND host_z0, host_z1;
auto func = graph->compile(
{make_callback_copy(z0, host_z0), make_callback_copy(z1, host_z1)});
func->execute();
ASSERT_EQ(TensorShape({3}), as_shp(host_z0));
ASSERT_EQ(TensorShape({2}), as_shp(host_z1));
*host_x = *gen({5, 1, 6});
*host_y = *gen({1, 8, 1});
func->execute();
ASSERT_EQ(TensorShape({6}), as_shp(host_z0));
ASSERT_EQ(TensorShape({8}), as_shp(host_z1));
}
TEST(TestTensorManip, Reshape) {
constexpr size_t N = 123, C = 456;
HostTensorGenerator<> gen;
auto host_opr0 = gen({N * C}), host_opr1 = gen({N, C});
auto graph = ComputingGraph::make();
SymbolVar opr0 = opr::Host2DeviceCopy::make(*graph, host_opr0, {"opr0"}),
opr1 = opr::Host2DeviceCopy::make(*graph, host_opr1, {"opr1"}),
opr0_reshp = opr::Reshape::make(opr0, opr::GetVarShape::make(opr1)),
sum = opr::add(opr0_reshp, opr1);
{
auto op = opr0_reshp.node()->owner_opr();
auto&& dep_map = opr0_reshp.node()->owner_opr()->node_prop().dep_map();
using DT = cg::OperatorNodeBase::NodeProp::DepType;
ASSERT_EQ(2u, dep_map.size());
ASSERT_EQ(DT::DEV_VALUE | DT::VALUE_ALLOW_EMPTY, dep_map.at(op->input(0)));
ASSERT_EQ(DT::HOST_VALUE, dep_map.at(op->input(1)));
}
HostTensorND host_sum;
auto func = graph->compile({make_callback_copy(sum, host_sum)});
func->execute();
ASSERT_TRUE(cg::is_static_var_storage(opr0_reshp.node()));
ASSERT_FALSE(host_sum.layout().eq_layout(host_opr0->layout()));
ASSERT_TRUE(host_sum.layout().eq_layout(host_opr1->layout()));
ASSERT_EQ(dev_ptr(opr0), dev_ptr(opr0_reshp));
auto o0 = host_opr0->ptr<float>(), o1 = host_opr1->ptr<float>(),
s = host_sum.ptr<float>();
for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; i++) {
MGB_ASSERT_FLOAT_EQ(o0[i] + o1[i], s[i])
<< ssprintf("failed opr0(%.5f)+opr1(%.5f) at %zd", o0[i], o1[i], i);
}
}
TEST(TestTensorManip, ReshapeNoncontigValueInfer) {
HostTensorGenerator<> gen;
auto host_x = gen({2, 1});
auto graph = ComputingGraph::make();
auto x = opr::ImmutableTensor::make(*graph, *host_x), y = x.broadcast({2, 2}),
z = opr::Reshape::make(y, {1, 0}, 1);
auto&& mgr = graph->static_infer_manager();
ASSERT_EQ(cg::static_infer::InferType::CONST, mgr.get_infer_type(z.node()).value);
auto zv = mgr.infer_value(z.node());
auto xp = host_x->ptr<float>(), zp = zv.ptr<float>();
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 2; ++j) {
ASSERT_EQ(xp[i], zp[i * 2 + j]);
}
}
ASSERT_THROW(opr::Reshape::make(y, {3, 0}, 1), TensorReshapeError);
ASSERT_THROW(opr::Reshape::make(y, {3, 2}), TensorReshapeError);
}
TEST(TestTensorManip, ReshapeSameShapeBypass) {
HostTensorGenerator<> gen;
auto host_x = gen({2, 3});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x), x1 = x.reshape({6}),
x2 = x1.reshape({6}), x3 = x.reshape(opr::GetVarShape::make(x));
ASSERT_EQ(x1.node(), x2.node());
ASSERT_EQ(x.node(), x3.node());
ASSERT_NE(x.node(), x1.node());
}
TEST(TestTensorManip, ReshapeAndInplace) {
constexpr size_t C = 456;
HostTensorGenerator<> gen;
auto host_opr0 = gen({C}), host_opr1 = gen({C / 2, 2});
auto graph = ComputingGraph::make();
SymbolVar opr0 = opr::Host2DeviceCopy::make_no_fwd(*graph, host_opr0),
opr1 = opr::Host2DeviceCopy::make_no_fwd(*graph, host_opr1),
reshape = opr::Reshape::make(opr0, TensorShape{C / 2, 2}),
sum = reshape + opr1;
opr1.node()->add_flag(cg::VarNode::Flag::NO_MEM_RECLAIM);
HostTensorND host_sum(CompNode::load("xpu0"));
auto func = graph->compile({make_callback_copy(sum, host_sum)});
func->execute();
ASSERT_EQ(dev_ptr(reshape), dev_ptr(sum));
ASSERT_EQ(host_opr1->layout(), host_sum.layout());
auto o0 = host_opr0->ptr<float>(), o1 = host_opr1->ptr<float>(),
s = host_sum.sync().ptr<float>();
for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; ++i) {
MGB_ASSERT_FLOAT_EQ(o0[i] + o1[i], s[i])
<< ssprintf("failed opr0(%.5f)+opr1(%.5f) at %zd", o0[i], o1[i], i);
}
}
TEST(TestTensorManip, DynamicReshape) {
HostTensorGenerator<> gen;
auto host_x = gen({3, 4}), host_tshp = std::make_shared<HostTensorND>(
host_x->comp_node(), dtype::Int32());
host_tshp->resize({1}).ptr<int>()[0] = 12;
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"),
x_rshp_shp = opr::MarkDynamicVar::make(
opr::Host2DeviceCopy::make(*graph, host_tshp).rename("x_rshp_shp")),
x_rshp = opr::Reshape::make(x, x_rshp_shp).rename("x_rshp"),
x_flat = x_rshp.flatten(),
gx = cg::grad(opr::Dot::make(x_flat, x_flat).rename("loss"), x).rename("gx");
ASSERT_FALSE(cg::is_static_var_shape(x_rshp.node()));
ASSERT_TRUE(cg::is_static_var_shape(gx.node()));
ASSERT_EQ(host_x->shape(), gx.node()->shape());
HostTensorND host_rshp, host_gx;
auto func = graph->compile(
{make_callback_copy(x_rshp, host_rshp), make_callback_copy(gx, host_gx)});
auto check = [&](const TensorShape& ishp, const TensorShape& tshp) {
host_x->copy_from(*gen(ishp));
{
DeviceTensorND tmp;
cg::copy_shape_to_tensor_value(tmp, tshp);
host_tshp->copy_from(tmp);
}
func->execute();
ASSERT_EQ(tshp, host_rshp.shape());
ASSERT_EQ(host_x->shape(), host_gx.shape());
for (size_t i = 0, it = host_x->shape().total_nr_elems(); i < it; ++i)
MGB_ASSERT_FLOAT_EQ(host_x->ptr<float>()[i] * 2, host_gx.ptr<float>()[i]);
};
check({3, 4}, {12});
check({5, 3}, {15});
check({3, 4, 35}, {21, 20});
}
TEST(TestTensorManip, ReshapeWithUnspec) {
HostTensorGenerator<> gen;
auto host_x = gen({4, 8});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Reshape::make(x, {1, 8}, 0);
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
for (size_t ishp : {1, 5, 6}) {
host_x->copy_from(*gen({ishp * 8}));
func->execute();
TensorShape expect_shape({ishp, 8});
ASSERT_EQ(expect_shape, host_y.shape());
MGB_ASSERT_TENSOR_EQ(
host_x->sub(SubTensorSpec::make_from_layout(
host_x->layout().reshape(expect_shape))),
host_y);
}
}
TEST(TestTensorManip, ReshapeInferShapeForDynamicInput) {
constexpr size_t N0 = 2, C0 = 3;
HostTensorGenerator<> gen;
auto host_x = gen({N0, C0}), host_tshp = gen({1});
auto graph = ComputingGraph::make();
host_tshp->ptr<float>()[0] = N0 * C0;
SymbolVar x = opr::Host2DeviceCopy::make(*graph, host_x),
xd = opr::MarkDynamicVar::make(x),
tshp = opr::Host2DeviceCopy::make(*graph, host_tshp),
y0 = opr::Reshape::make(xd, tshp) + 1,
y1 = opr::Reshape::make(xd, opr::GetVarShape::make(x)) + 2;
ASSERT_EQ(y0.shape(), TensorShape({N0 * C0}));
ASSERT_EQ(y1.shape(), TensorShape({N0, C0}));
HostTensorND host_y0, host_y1;
auto func = graph->compile(
{make_callback_copy(y0, host_y0), make_callback_copy(y1, host_y1)});
auto run = [&](const TensorShape& ishp) {
auto tot = ishp.total_nr_elems();
host_x->copy_from(*gen(ishp));
host_tshp->ptr<float>()[0] = tot;
func->execute();
ASSERT_EQ(host_y0.shape(), TensorShape({tot}));
ASSERT_EQ(host_y1.shape(), ishp);
for (size_t i = 0; i < tot; ++i) {
ASSERT_EQ(host_x->ptr<float>()[i] + 1, host_y0.ptr<float>()[i]);
ASSERT_EQ(host_x->ptr<float>()[i] + 2, host_y1.ptr<float>()[i]);
}
};
run({3, 2});
run({23, 12, 5});
}
TEST(TestTensorManip, ReshapeEmptyShape) {
HostTensorGenerator<> gen;
constexpr size_t x_length = 233;
auto host_x = gen({x_length}), host_v = gen({2, 3, 3, 3});
for (size_t i = 0; i < x_length; ++i) {
host_x->ptr<float>()[i] = 1.f;
}
constexpr auto INVALID_AXIS = opr::Reshape::Param::INVALID_AXIS;
for (auto unspec_axis : {INVALID_AXIS, 0, 1, 3}) {
auto graph = ComputingGraph::make();
graph->options().graph_opt_level = 0;
TensorShape tshape{2, 3, 3, 3};
auto zero_axis = unspec_axis;
if (unspec_axis == INVALID_AXIS) {
tshape[zero_axis = 2] = 0;
}
using CondTakeMode = opr::CondTake::Param::Mode;
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
x_empty = opr::CondTake::make(x, x, {CondTakeMode::EQ, 0.f})[0],
v = opr::Host2DeviceCopy::make(*graph, host_v),
x_reshape = opr::Reshape::make(x_empty, tshape, {unspec_axis}),
y = opr::Concat::make({x_reshape, v}, zero_axis);
HostTensorND host_empty, host_y;
auto func = graph->compile(
{make_callback_copy(x_reshape, host_empty),
make_callback_copy(y, host_y)});
func->execute().wait();
ASSERT_TRUE(host_empty.layout().is_empty());
MGB_ASSERT_TENSOR_EQ(*host_v, host_y);
}
}
TEST(TestTensorManip, ReshapeWithNegativeUnspec) {
HostTensorGenerator<> gen;
auto host_x = gen({4, 8});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Reshape::make(x, {1, 8}, -2);
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
for (size_t ishp : {1, 5, 6}) {
host_x->copy_from(*gen({ishp * 8}));
func->execute();
TensorShape expect_shape({ishp, 8});
ASSERT_EQ(expect_shape, host_y.shape());
MGB_ASSERT_TENSOR_EQ(
host_x->sub(SubTensorSpec::make_from_layout(
host_x->layout().reshape(expect_shape))),
host_y);
}
}
TEST(TestTensorManip, Broadcast) {
constexpr size_t N = 20, C = 30;
HostTensorGenerator<> gen;
auto host_opr0 = gen({1, 1}), host_opr1 = gen({N, C});
auto graph = ComputingGraph::make();
SymbolVar opr0 = opr::Host2DeviceCopy::make(*graph, host_opr0, {"opr0"}),
opr1 = opr::Host2DeviceCopy::make(*graph, host_opr1, {"opr1"}),
sum = opr::add(opr::Broadcast::make(opr0, host_opr1->shape()), opr1);
HostTensorND host_sum(CompNode::load("xpu0"));
auto func =
graph->compile({{sum, [&](DeviceTensorND& s) { host_sum.copy_from(s); }}});
func->execute();
ASSERT_TRUE(host_sum.layout().eq_layout(host_opr1->layout()));
auto o0 = host_opr0->ptr<float>(), o1 = host_opr1->ptr<float>(),
s = host_sum.sync().ptr<float>();
for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; i++) {
MGB_ASSERT_FLOAT_EQ(o0[0] + o1[i], s[i])
<< ssprintf("failed opr0(%.5f)+opr1(%.5f) at %zd", o0[i], o1[i], i);
}
}
TEST(TestTensorManip, BroadcastEmptyShape) {
HostTensorGenerator<> gen;
for (auto&& arg :
{std::make_pair(TensorShape{1}, TensorShape{0}),
{{1, 2, 3}, {0, 2, 3}},
{{2, 3}, {1, 0, 2, 3}},
{{1, 0, 2, 3}, {4, 0, 2, 3}},
{{0, 1, 2, 3}, {3, 0, 4, 2, 3}}}) {
auto host_x = gen(arg.first);
auto graph = ComputingGraph::make();
graph->options().graph_opt_level = 0;
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Broadcast::make(x, arg.second);
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
func->execute();
ASSERT_TRUE(host_y.shape().eq_shape(arg.second));
}
}
TEST(TestTensorManip, Dimshuffle) {
HostTensorGenerator<> gen;
constexpr size_t S0 = 8, S1 = 3;
auto host_x = gen({S0, S1}), host_prod = gen({S1, 1, S0, 1});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"),
prod = opr::Host2DeviceCopy::make(*graph, host_prod).rename("prod"),
x_ds = opr::Dimshuffle::make(x, {1, -1, 0, -1}).rename("x_ds"),
y = (x_ds * prod).reshape({S0 * S1}).rename("y"),
loss = opr::Dot::make(y, y).rename("loss"),
gx = cg::grad(loss, x).rename("gx");
ASSERT_TRUE(cg::is_static_var_shape(gx.node()));
ASSERT_EQ(host_x->shape(), gx.node()->shape());
HostTensorND host_gx;
auto func = graph->compile({make_callback_copy(gx, host_gx)});
func->execute();
for (size_t i = 0; i < S0; i++)
for (size_t j = 0; j < S1; j++) {
float x = host_x->ptr<float>({i, j})[0],
prod = host_prod->ptr<float>({j, 0, i, 0})[0],
gx = host_gx.ptr<float>({i, j})[0];
MGB_ASSERT_FLOAT_EQ(2 * prod * prod * x, gx) << ssprintf(
"failed at (%zd, %zd): x=%g prod=%g gx=%g", i, j, x, prod, gx);
}
}
TEST(TestTensorManip, DimshuffleEmptyShape) {
HostTensorGenerator<> gen;
for (auto&& arg :
{std::make_pair(TensorShape{3, 0}, std::vector<int>{1, -1, 0, -1}),
{{3, 1, 0, 4}, {-1, 3, -1, 0, 2}},
{{2, 0, 3, 0}, {1, 0, 2, 3}}}) {
auto host_x = gen(arg.first);
auto graph = ComputingGraph::make();
graph->options().graph_opt_level = 0;
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Dimshuffle::make(x, arg.second);
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
func->execute();
auto&& y_shape = host_y.shape();
for (size_t idx = 0; idx < arg.second.size(); ++idx) {
auto elem = arg.second[idx];
if (elem == -1) {
ASSERT_EQ(y_shape[idx], 1u);
} else {
ASSERT_EQ(arg.first[elem], y_shape[idx]);
}
}
}
}
TEST(TestTensorManip, DimshuffleCombined) {
using Checker = AutoOprChecker<1, 1>;
constexpr int RED0 = 2, RED1 = 3;
for (bool dyn : {false, true}) {
auto make_graph =
[dyn](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
auto x = inputs[0];
if (dyn)
x = opr::MarkDynamicVar::make(x);
auto cv = [&](int v) {
auto rst = x.make_scalar(v);
if (dyn)
rst = opr::MarkDynamicVar::make(rst);
return rst;
};
auto xshp = opr::GetVarShape::make(x);
auto sub = [&](int idx) {
return opr::IndexAt::make(xshp, {{0, cv(idx)}});
};
auto tshp0 = opr::Concat::make(
{sub(0), sub(1) / (RED0 * RED1), cv(RED0), cv(RED1), sub(2),
sub(3)},
0),
tshp1 = opr::Concat::make(
{sub(0), sub(1) / (RED0 * RED1), sub(2) * RED0, sub(3) * RED1},
0);
auto y0 = opr::Reshape::make(x, tshp0),
y1 = opr::Dimshuffle::make(y0, {0, 1, 3, 2, 4, 5}),
y2 = opr::Reshape::make(y1, tshp1);
return {y2.node()};
};
auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
auto&& iv = *inp.at(0);
auto&& ov = dest.at(0);
auto ishp = iv.shape();
auto oshp = ishp;
oshp.shape[1] /= RED0 * RED1;
oshp.shape[2] *= RED0;
oshp.shape[3] *= RED1;
ov.comp_node(iv.comp_node()).resize(oshp);
size_t tmpshp[6] = {oshp.shape[0], oshp.shape[1], RED1,
RED0, ishp.shape[2], ishp.shape[3]},
tmpidx[6];
for (size_t oidx = 0, oidxt = oshp.total_nr_elems(); oidx < oidxt; ++oidx) {
for (int i = 5, x = oidx; i >= 0; --i) {
tmpidx[i] = x % tmpshp[i];
x /= tmpshp[i];
mgb_assert(i || !x);
}
std::swap(tmpshp[2], tmpshp[3]);
std::swap(tmpidx[2], tmpidx[3]);
size_t iidx = 0;
for (int i = 5, d = 1; i >= 0; --i) {
iidx += d * tmpidx[i];
d *= tmpshp[i];
}
std::swap(tmpshp[2], tmpshp[3]);
ov.ptr<float>()[oidx] = iv.ptr<float>()[iidx];
}
};
Checker::RunOptions opt;
opt.numdiff_eps = 1; constexpr size_t R = RED0 * RED1;
Checker(make_graph, fwd)
.run({{{1, R, 1, 1}}}, opt)
.run({{{5, R * 2, 3, 2}}}, opt)
.run({{{2, R * 3, 4, 3}}}, opt);
}
}
TEST(TestTensorManip, Subtensor) {
using Checker = AutoOprChecker<1, 1>;
SymbolVar sub0, sub1, sub2, sub3, sub4;
auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
using AIdx = opr::Subtensor::AxisIndexer;
auto x = inputs[0];
x = x.rename("x");
auto cv = [&](int v, bool dyn = false) {
auto rst = x.make_scalar(v);
if (dyn)
rst = opr::MarkDynamicVar::make(rst);
return rst;
};
sub0 = opr::Subtensor::make(
x * 0.9f,
{AIdx::make_interval(
0, cv(10, true), opr::GetVarShape::make(x, 0), cv(2))})
.rename("sub0");
sub1 = opr::Subtensor::make(
opr::MarkDynamicVar::make(x),
{AIdx::make_interval(0, None, cv(-10), cv(2))})
.rename("sub1");
auto sub2_raw = opr::Subtensor::make(
opr::IndexAt::make(x, {{1, cv(3)}}),
{AIdx::make_interval(0, cv(5), cv(-5), cv(2))});
{
auto opr = sub2_raw.node()->owner_opr();
auto&& inp = opr->input();
auto&& dmap = opr->node_prop().dep_map();
for (size_t i = 1; i < inp.size(); ++i) {
mgb_assert(
dmap.at(inp[i]) &
cg::OperatorNodeBase::NodeProp::DepType::HOST_VALUE);
}
}
sub2 = opr::AxisAddRemove::make(
sub2_raw, {opr::AxisAddRemove::AxisDesc::make_add(1)})
.rename("sub2");
sub3 = opr::Subtensor::make(
x, {AIdx::make_interval(0, cv(4), cv(-6), cv(2)),
AIdx::make_interval(1, cv(-1), None, None)});
sub4 = opr::Subtensor::make(
x + .1f, {AIdx::make_interval(0, cv(-3), cv(7), cv(-2)),
AIdx::make_interval(1, cv(1), None, cv(-3, true))});
return {(sub0 + sub1 + sub2 + sub3 + sub4).rename("y")};
};
auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
auto iptr = inp[0]->ptr<float>();
auto ishp = inp[0]->shape();
auto oshp = ishp;
auto s0 = ishp.shape[0], s1 = ishp.total_nr_elems() / s0,
s2 = s1 / ishp.shape[1];
auto os0 = (s0 - 10 + 1) / 2;
oshp.shape[0] = os0;
dest[0].comp_node(inp[0]->comp_node());
dest[0].resize(oshp);
auto optr = dest[0].ptr<float>();
for (size_t i = 0; i < os0; ++i)
for (size_t j = 0; j < s1; ++j) {
optr[i * s1 + j] =
iptr[(i * 2 + 10) * s1 + j] * .9f + iptr[(i * 2) * s1 + j] +
iptr[(i * 2 + 5) * s1 + j % s2 + s2 * 3] +
iptr[(i * 2 + 4) * s1 + j % s2 + s2 * (ishp.shape[1] - 1)] +
iptr[(ishp.shape[0] - 3 - i * 2) * s1 + j % s2 + s2 * 1] + 0.1;
}
};
Checker::RunOptions opt;
opt.numdiff_eps = 1; Checker checker(make_graph, fwd);
checker.run({{{11, 5}}}, opt).run({{{20, 6}}}, opt).run({{{56, 6, 4}}}, opt);
ASSERT_FALSE(cg::is_static_var_shape(sub0.node()));
ASSERT_FALSE(cg::is_static_var_shape(sub1.node()));
ASSERT_TRUE(cg::is_static_var_storage(sub2.node()));
ASSERT_TRUE(cg::is_static_var_storage(sub3.node()));
ASSERT_FALSE(cg::is_static_var_storage(sub4.node()));
}
TEST(TestTensorManip, SubtensorNegativeAxis) {
using Checker = AutoOprChecker<1, 1>;
auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
using AIdx = opr::Subtensor::AxisIndexer;
auto x = inputs[0];
return {opr::Subtensor::make(x, {AIdx::make_index(-1, x.make_scalar(2))})};
};
auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
auto iptr = inp[0]->ptr<float>();
auto ishp = inp[0]->shape();
auto oshp = ishp;
--oshp.ndim;
auto stride = oshp.shape[oshp.ndim];
if (!oshp.ndim)
oshp = {1};
auto optr = dest[0].resize(oshp).ptr<float>();
for (size_t i = 0, it = oshp.total_nr_elems(); i < it; ++i) {
optr[i] = iptr[i * stride + 2];
}
};
Checker checker(make_graph, fwd);
checker.run({TensorShape{5}})
.run({TensorShape{2, 3}})
.run({TensorShape{2, 3, 4}})
.run({TensorShape{2, 3, 4, 5}});
}
TEST(TestTensorManip, SubtensorWithEmptyIndexDesc) {
using Checker = AutoOprChecker<1, 1>;
auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
auto x = inputs[0];
return {opr::Subtensor::make(x, {})};
};
auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
auto iptr = inp[0]->ptr<float>();
auto oshp = inp[0]->shape();
auto optr = dest[0].resize(oshp).ptr<float>();
for (size_t i = 0, it = oshp.total_nr_elems(); i < it; ++i) {
optr[i] = iptr[i];
}
};
Checker checker(make_graph, fwd);
checker.run({TensorShape{5}})
.run({TensorShape{2, 3}})
.run({TensorShape{2, 3, 4}})
.run({TensorShape{2, 3, 4, 5}});
}
TEST(TestTensorManip, SubtensorShapeInferForDynAxisIdx) {
HostTensorGenerator<> gen;
auto host_x = gen({5, 6, 3});
auto host_idx = gen({1});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
idx = opr::MarkDynamicVar::make(opr::Host2DeviceCopy::make(*graph, host_idx));
auto cv = [&](int v) { return x.make_scalar(v); };
using Ad = opr::Subtensor::AxisIndexer;
auto y = opr::Subtensor::make(
x, {Ad::make_interval(1, cv(1), cv(-2), cv(2)), Ad::make_index(0, idx)});
ASSERT_TRUE(cg::is_static_var_shape(y.node()));
ASSERT_EQ(y.node()->shape(), TensorShape({2, 3}));
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
host_idx->ptr<float>()[0] = 2;
func->execute();
HostTensorND expt{host_x->comp_node(), host_x->dtype()};
expt.resize({2, 3});
for (size_t i = 0; i < 2; ++i)
for (size_t j = 0; j < 3; ++j) {
expt.ptr<float>()[i * 3 + j] = host_x->ptr<float>({2, i * 2 + 1, j})[0];
}
MGB_ASSERT_TENSOR_EQ(expt, host_y);
}
TEST(TestTensorManip, SubtensorDynCaseMemFwd) {
auto run = [](int dyn_type) {
ASSERT_FALSE(HasFailure()) << "already failed before " << dyn_type;
HostTensorGenerator<> gen;
auto host_x = gen({2, 3});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make_no_fwd(*graph, host_x);
SymbolVar idx;
if (dyn_type == 0 || dyn_type == 3) {
idx = x.make_scalar(1);
if (dyn_type == 3) {
auto xrd = opr::Copy::make(x, host_x->comp_node().change_stream(1));
graph->options().extra_vardeps[x.node()].push_back(xrd.node());
}
} else {
auto host_idx =
std::make_shared<HostTensorND>(host_x->comp_node(), dtype::Int32{});
host_idx->resize({1}).ptr<int>()[0] = 1;
idx = opr::Host2DeviceCopy::make(*graph, host_idx);
if (dyn_type == 2) {
idx = opr::MarkDynamicVar::make(idx);
}
}
auto y = opr::Subtensor::make(
x, {opr::Subtensor::AxisIndexer::make_interval(0, idx, None, None)});
if (dyn_type != 2) {
ASSERT_EQ(TensorShape({1, 3}), y.shape());
}
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
func->execute();
auto xsub = host_x->sub(SubTensorSpec::make_from_offset_elem(
TensorLayout({1, 3}, dtype::Float32{}), 3));
MGB_ASSERT_TENSOR_EQ(xsub, host_y);
ASSERT_EQ(dyn_type == 0, cg::is_static_var_storage(y.node()));
ASSERT_EQ(dyn_type != 2, cg::is_static_var_shape(y.node()));
ASSERT_EQ(
static_cast<const uint8_t*>(prev_dev_ptr(x)) + 3 * sizeof(float),
prev_dev_ptr(y));
};
run(0);
run(1);
run(2);
run(3);
}
TEST(TestTensorManip, SubtensorWithNoValInferInp) {
HostTensorGenerator<> gen;
auto host_x = gen({5, 1}), host_idx = gen({1});
auto graph = ComputingGraph::make();
using Ad = opr::Subtensor::AxisIndexer;
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
idx = opr::Host2DeviceCopy::make_no_value_infer(*graph, host_idx),
y = opr::Subtensor::make(x, {Ad::make_index(0, idx)});
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
host_idx->ptr<float>()[0] = 2;
func->execute();
HostTensorND expt{host_x->comp_node(), host_x->dtype()};
expt.resize({1}).ptr<float>()[0] = host_x->ptr<float>()[2];
MGB_ASSERT_TENSOR_EQ(expt, host_y);
}
TEST(TestTensorManip, SubtensorDedup) {
HostTensorGenerator<> gen;
auto host_x = gen({5, 5, 5, 5});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x);
auto cv = [&](int v) { return x.make_scalar(v); };
using S = opr::Subtensor;
using D = S::AxisIndexer;
std::unordered_set<VarNode*> nodes;
for (int i : {0, 1, 1, 0}) {
nodes.insert(S::make(x, {D::make_index(i, cv(2))}).node());
nodes.insert(S::make(x, {D::make_interval(i, cv(2), None, None)}).node());
nodes.insert(S::make(x, {D::make_interval(i, None, cv(2), None)}).node());
nodes.insert(S::make(x, {D::make_interval(i, None, None, cv(2))}).node());
}
ASSERT_EQ(8u, nodes.size());
}
TEST(TestTensorManip, SubtensorIdxChange) {
auto run = [](bool dyn) {
HostTensorGenerator<> gen;
auto host_x = gen({10});
auto host_idx =
std::make_shared<HostTensorND>(host_x->comp_node(), dtype::Int32());
host_idx->resize({1}).ptr<int>()[0] = 1;
bool idx_exec = false, idx_infered = false;
auto cb_set_idx_exec = [&](DeviceTensorND& dv) {
if (dv.comp_node() == CompNode::default_cpu()) {
idx_infered = true;
} else {
idx_exec = true;
}
};
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x);
SymbolVar idx_;
if (dyn) {
idx_ = opr::Host2DeviceCopy::make(*graph, host_idx);
} else {
idx_ = opr::ImmutableTensor::make(*graph, *host_idx);
}
auto idx = opr::CallbackInjector::make(idx_, {false, true, cb_set_idx_exec}),
y = opr::Subtensor::make(
x, {opr::Subtensor::AxisIndexer::make_interval(
0, idx, idx + 1, None)});
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
ASSERT_TRUE(cg::is_static_var_shape(y.node()));
ASSERT_TRUE(cg::is_static_var_value(y.node()));
ASSERT_EQ(!dyn, cg::is_static_var_storage(y.node()));
ASSERT_EQ(TensorShape({1}), y.node()->shape());
auto px = host_x->ptr<float>();
func->execute();
ASSERT_EQ(px[1], host_y.ptr<float>()[0]);
host_idx->ptr<int>()[0] = 5;
func->execute();
if (dyn) {
ASSERT_EQ(px[5], host_y.ptr<float>()[0]);
} else {
ASSERT_EQ(px[1], host_y.ptr<float>()[0]);
}
ASSERT_TRUE(idx_infered);
ASSERT_FALSE(idx_exec);
};
run(true);
run(false);
}
TEST(TestTensorManip, SubtensorEmptyIO) {
using AIdx = opr::Subtensor::AxisIndexer;
using IndexDesc = std::vector<AIdx>;
using IndexDescCreater = thin_function<IndexDesc(SymbolVar)>;
HostTensorGenerator<> gen;
auto run = [&](const TensorShape& inp_shp, const TensorShape& out_shp,
const IndexDescCreater& c) {
auto host_x = gen(inp_shp);
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x);
auto y = opr::Subtensor::make(x, c(x));
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
func->execute();
ASSERT_EQ(host_y.shape(), out_shp);
ASSERT_TRUE(host_y.empty());
};
run({0}, {0}, [&](SymbolVar x) -> IndexDesc {
return {AIdx::make_interval(0, None, x.make_scalar(0), None)};
});
run({100, 0}, {45, 0}, [&](SymbolVar x) -> IndexDesc {
return {AIdx::make_interval(
0, x.make_scalar(0), x.make_scalar(-10), x.make_scalar(2))};
});
run({100, 0}, {40, 0}, [&](SymbolVar x) -> IndexDesc {
return {AIdx::make_interval(
0, x.make_scalar(10), x.make_scalar(-10), x.make_scalar(2)),
AIdx::make_interval(1, x.make_scalar(0), x.make_scalar(0), None)};
});
run({10, 0, 10}, {0, 10}, [&](SymbolVar x) -> IndexDesc {
return {AIdx::make_index(0, x.make_scalar(5)),
AIdx::make_interval(
1, x.make_scalar(10), x.make_scalar(-10), x.make_scalar(2))};
});
run({10}, {0}, [&](SymbolVar x) -> IndexDesc {
return {AIdx::make_interval(0, x.make_scalar(100), None, None)};
});
}
TEST(TestTensorManip, SetSubtensorEmptyIO) {
using AIdx = opr::SetSubtensor::AxisIndexer;
using IndexDesc = std::vector<AIdx>;
using IndexDescCreater = thin_function<IndexDesc(SymbolVar)>;
HostTensorGenerator<> gen;
auto run = [&](const TensorShape& inp_shp, const TensorShape& val_shp,
const IndexDescCreater& c) {
auto host_x = gen(inp_shp), host_v = gen(val_shp);
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
v = opr::Host2DeviceCopy::make(*graph, host_v);
auto y = opr::SetSubtensor::make(x, v, c(x));
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
func->execute();
ASSERT_EQ(host_y.shape(), inp_shp);
};
run({0}, {0}, [&](SymbolVar x) -> IndexDesc {
return {AIdx::make_interval(0, None, x.make_scalar(0), None)};
});
run({100, 0}, {45, 0}, [&](SymbolVar x) -> IndexDesc {
return {AIdx::make_interval(
0, x.make_scalar(0), x.make_scalar(-10), x.make_scalar(2))};
});
run({100, 0}, {40, 0}, [&](SymbolVar x) -> IndexDesc {
return {AIdx::make_interval(
0, x.make_scalar(10), x.make_scalar(-10), x.make_scalar(2)),
AIdx::make_interval(1, x.make_scalar(0), x.make_scalar(0), None)};
});
run({10, 0, 10}, {0, 10}, [&](SymbolVar x) -> IndexDesc {
return {AIdx::make_index(0, x.make_scalar(5)),
AIdx::make_interval(
1, x.make_scalar(10), x.make_scalar(-10), x.make_scalar(2))};
});
run({10}, {0}, [&](SymbolVar x) -> IndexDesc {
return {AIdx::make_interval(0, x.make_scalar(100), None, None)};
});
}
namespace {
void test_subtensor_fwdonly(bool dyn_inp, bool dyn_idx) {
constexpr size_t SIZE = 25;
auto mkhost = [](size_t size, DType dtype) {
auto rst = std::make_shared<HostTensorND>(CompNode::load("xpu0"), dtype);
rst->resize({size});
return rst;
};
auto host_x = mkhost(SIZE, dtype::Float32()), host_idx0 = mkhost(1, dtype::Int32()),
host_idx1 = mkhost(1, dtype::Int32());
for (size_t i = 0; i < SIZE; ++i) {
host_x->ptr<float>()[i] = i;
}
host_idx0->ptr<int>()[0] = 2;
host_idx1->ptr<int>()[0] = 6;
auto graph = ComputingGraph::make();
using AIdx = opr::Subtensor::AxisIndexer;
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
idx0 = opr::Host2DeviceCopy::make(*graph, host_idx0),
idx1 = opr::Host2DeviceCopy::make(*graph, host_idx1);
float *x_ptr = nullptr, *x_ptr_end = nullptr, *xsub_ptr = nullptr;
if (dyn_inp)
x = opr::MarkDynamicVar::make(x);
x = opr::CallbackInjector::make(x, [&](DeviceTensorND& v) {
x_ptr = v.ptr<float>();
x_ptr_end = v.ptr<float>() + v.layout().total_nr_elems();
});
if (dyn_idx)
idx0 = opr::MarkDynamicVar::make(idx0);
auto xsub = opr::Subtensor::make(x, {AIdx::make_interval(0, idx0, idx1, None)});
xsub = opr::CallbackInjector::make(
xsub, [&](DeviceTensorND& v) { xsub_ptr = v.ptr<float>(); });
ASSERT_EQ(!dyn_inp && !dyn_idx, cg::is_static_var_shape(xsub.node()));
HostTensorND host_sub;
auto func = graph->compile({make_callback_copy(xsub, host_sub)});
bool failed = false;
auto run_and_check = [&](size_t begin, size_t end) {
ASSERT_FALSE(failed);
failed = true;
host_idx0->ptr<int>()[0] = begin;
host_idx1->ptr<int>()[0] = end;
func->execute();
if (!(!dyn_inp && dyn_idx)) {
ASSERT_GE(xsub_ptr, x_ptr);
ASSERT_LE(xsub_ptr, x_ptr_end);
}
ASSERT_EQ(TensorShape({end - begin}), host_sub.shape());
for (size_t i = 0; i < end - begin; ++i)
ASSERT_EQ(host_x->ptr<float>()[i + begin], host_sub.ptr<float>()[i])
<< ssprintf("failed [%zu, %zu): i=%zu", begin, end, i);
failed = false;
};
run_and_check(0, 1);
run_and_check(2, 3);
run_and_check(0, 5);
run_and_check(1, 6);
run_and_check(3, 21);
run_and_check(0, SIZE);
run_and_check(1, SIZE);
run_and_check(0, SIZE - 1);
}
}
TEST(TestTensorManip, SubtensorFwdOnly00) {
test_subtensor_fwdonly(false, false);
}
TEST(TestTensorManip, SubtensorFwdOnly01) {
test_subtensor_fwdonly(false, true);
}
TEST(TestTensorManip, SubtensorFwdOnly10) {
test_subtensor_fwdonly(true, false);
}
TEST(TestTensorManip, SubtensorFwdOnly11) {
test_subtensor_fwdonly(true, true);
}
TEST(TestTensorManip, OverlapSetSubtensor) {
constexpr size_t SIZE = 2048, SIZE_SUB = (SIZE - 4) / 2;
auto host_x =
std::make_shared<HostTensorND>(CompNode::load("xpu0"), dtype::Float32());
host_x->resize({SIZE});
for (size_t i = 0; i < SIZE; ++i)
host_x->ptr<float>()[i] = i;
auto graph = ComputingGraph::make();
graph->options().allocate_static_mem_after_graph_compile = true;
auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x");
auto cv = [&](int v, bool dyn = false) {
auto rst = x.make_scalar(v);
if (dyn)
rst = opr::MarkDynamicVar::make(rst);
return rst;
};
using AIdx = opr::Subtensor::AxisIndexer;
auto xsub = opr::Subtensor::make(x, {AIdx::make_interval(0, cv(2), cv(-2), cv(2))})
.rename("xsub"),
y = opr::SetSubtensor::make(
xsub,
opr::Subtensor::make(
xsub, {AIdx::make_interval(0, cv(10), None, None)})
.rename("xsub[10:]"),
{AIdx::make_interval(0, None, cv(-10), None)})
.rename("y");
HostTensorND expected(host_x->comp_node(), dtype::Float32());
expected.resize({SIZE_SUB});
for (size_t i = 0; i < SIZE_SUB; ++i) {
auto i0 = i;
if (i0 < SIZE_SUB - 10)
i0 += 10;
expected.ptr<float>()[i] = i0 * 2 + 2;
}
ASSERT_TRUE(cg::is_static_var_value(y.node()));
HostTensorND infer_result;
infer_result.copy_from(graph->static_infer_manager().infer_value(y.node()));
MGB_ASSERT_TENSOR_EQ(expected, infer_result);
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
func->to_json()->writeto_fpath(output_file("OverlapSetSubtensor.json"));
func->execute();
MGB_ASSERT_TENSOR_EQ(expected, host_y);
}
TEST(TestTensorManip, OverlapSetSubtensor2) {
constexpr size_t SIZE_X = 20, SIZE_Y = 23;
auto run = [](bool should_overlap) {
auto host_x = std::make_shared<HostTensorND>(
CompNode::load("xpu0"), dtype::Float32());
host_x->resize({SIZE_X, SIZE_Y});
for (size_t i = 0; i < SIZE_X * SIZE_Y; ++i)
host_x->ptr<float>()[i] = i;
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x");
auto cv = [&](int v) { return x.make_scalar(v); };
auto make_sub_desc = [&](int begin, int end) -> opr::Subtensor::IndexDesc {
using AIdx = opr::Subtensor::AxisIndexer;
return {AIdx::make_interval(0, cv(begin), cv(end), None)};
};
auto slice = [&](SymbolVar inp, int begin, int end) {
return opr::Subtensor::make(inp, make_sub_desc(begin, end));
};
auto xsub = slice(x, 4, 9).rename("xsub"),
y0 = opr::SetSubtensor::make(x, xsub, make_sub_desc(2, 7)).rename("y0"),
y0sub = slice(y0, 3, 8).rename("y0sub"),
ypar = should_overlap ? y0 : y0 + 1,
y = opr::IncrSubtensor::make(ypar, y0sub, make_sub_desc(1, 6))
.rename("y1");
HostTensorND expect;
expect.copy_from(*host_x);
auto ptr = expect.ptr<float>();
memmove(ptr + 2 * SIZE_Y, ptr + 4 * SIZE_Y, 5 * SIZE_Y * sizeof(float));
for (size_t i = 1; i < 6; ++i) {
for (size_t j = 0; j < SIZE_Y; ++j) {
ptr[i * SIZE_Y + j] += ptr[(i + 2) * SIZE_Y + j];
}
}
if (!should_overlap) {
for (size_t i = 0; i < SIZE_X * SIZE_Y; ++i) {
++ptr[i];
}
}
ASSERT_TRUE(cg::is_static_var_value(y.node()));
HostTensorND infer_result;
infer_result.copy_from(graph->static_infer_manager().infer_value(y.node()));
MGB_ASSERT_TENSOR_EQ(expect, infer_result);
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
func->execute();
MGB_ASSERT_TENSOR_EQ(expect, host_y);
if (!should_overlap) {
ASSERT_EQ(prev_dev_ptr(ypar), prev_dev_ptr(y));
}
};
run(false);
run(true);
}
TEST(TestTensorManip, SetSubtensor) {
using Checker = AutoOprChecker<3, 1>;
auto make_graph = [](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
using AIdx = opr::Subtensor::AxisIndexer;
auto x = inputs[0], v0 = inputs[1], v1 = inputs[2];
x = x.rename("x");
v0 = v0.rename("v0");
v1 = v1.rename("v1");
auto cv = [&](int v, bool dyn = false) {
auto rst = x.make_scalar(v);
if (dyn)
rst = opr::MarkDynamicVar::make(rst);
return rst;
};
auto
x0 = opr::SetSubtensor::make(
x, v0, {AIdx::make_interval(0, cv(10), None, cv(2))})
.rename("x0"),
x1 = opr::SetSubtensor::make(
opr::MarkDynamicVar::make(x),
opr::SetSubtensor::make(
v0, v1, {AIdx::make_index(1, cv(3))}),
{AIdx::make_interval(0, None, cv(-10), cv(2))})
.rename("x_sub1"),
x2_t = opr::Subtensor::make(
x, {AIdx::make_interval(0, cv(4), cv(9), None)})
.rename("x2_t"),
x2 = opr::SetSubtensor::make(
opr::SetSubtensor::make(
x, x2_t,
{AIdx::make_interval(0, None, cv(5), None)}),
v1,
{AIdx::make_interval(0, cv(3), cv(-7), cv(2)),
AIdx::make_index(1, cv(-1))})
.rename("x2"),
y = (x0 + x1 + x2).rename("y");
mgb_assert(cg::is_static_var_storage(x0.node()));
mgb_assert(!cg::is_static_var_shape(x1.node()));
mgb_assert(cg::is_static_var_storage(x2.node()));
return {y};
};
auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
auto px = inp[0]->ptr<float>(), pv0 = inp[1]->ptr<float>(),
pv1 = inp[2]->ptr<float>();
auto ishp = inp[0]->shape();
dest[0].comp_node(inp[0]->comp_node());
dest[0].resize(ishp);
auto optr = dest[0].ptr<float>();
auto s0 = ishp.shape[0], s1 = ishp.total_nr_elems() / s0,
s2 = s1 / ishp.shape[1];
for (size_t i = 0; i < s0; ++i) {
for (size_t j = 0; j < s1; ++j) {
float x0, x1, x2;
x0 = x1 = x2 = px[i * s1 + j];
if (i >= 10 && (i - 10) % 2 == 0)
x0 = pv0[((i - 10) / 2) * s1 + j];
if (i < s0 - 10 && i % 2 == 0) {
auto row = i / 2;
if (j / s2 == 3)
x1 = pv1[row * s2 + j % s2];
else
x1 = pv0[row * s1 + j];
}
if (i >= 3 && i < s0 - 7 && (i - 3) % 2 == 0 &&
j / s2 == ishp.shape[1] - 1)
x2 = pv1[((i - 3) / 2) * s2 + j % s2];
else if (i < 5)
x2 = px[(i + 4) * s1 + j];
optr[i * s1 + j] = x0 + x1 + x2;
}
}
};
auto mkshp = [](const TensorShape& shp0) -> Checker::ShapeInpArray {
mgb_assert(shp0.shape[0] > 10 && shp0.ndim >= 2 && shp0.shape[1] >= 4);
auto shp1 = shp0;
shp1.shape[0] = (shp0.shape[0] - 10) / 2;
auto shp2 = shp1;
for (size_t i = 2; i < shp2.ndim; ++i)
shp2.shape[i - 1] = shp2.shape[i];
--shp2.ndim;
return {shp0, shp1, shp2};
};
Checker::RunOptions opt;
opt.numdiff_eps = 1;
Checker(make_graph, fwd)
.run(mkshp({16, 4, 2}), opt)
.run(mkshp({14, 10}), opt)
.run(mkshp({18, 5, 2, 3}), opt);
}
TEST(TestTensorManip, SetSubtensorCheckByShapeInfer) {
HostTensorGenerator<> gen;
HostTensorGenerator<dtype::Int32> gen_int;
auto host_x = gen({12}), host_sub = gen({1}), host_idx = gen_int({1});
host_idx->ptr<int>()[0] = 13;
auto graph = ComputingGraph::make();
using Ad = opr::Subtensor::AxisIndexer;
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
sub = opr::Host2DeviceCopy::make(*graph, host_sub);
auto idx1 = Ad::make_index(0, opr::ImmutableTensor::make(*graph, *host_idx)),
idx2 = Ad::make_index(0, opr::Host2DeviceCopy::make(*graph, host_idx));
MGB_MARK_USED_VAR(x);
MGB_MARK_USED_VAR(sub);
MGB_MARK_USED_VAR(idx1);
MGB_MARK_USED_VAR(idx2);
ASSERT_THROW(opr::SetSubtensor::make(x, sub, {idx1}), MegBrainError);
ASSERT_THROW(opr::SetSubtensor::make(x, sub, {idx2}), MegBrainError);
}
TEST(TestTensorManip, SetSubtensorShapeInfer) {
HostTensorGenerator<> gen;
HostTensorGenerator<dtype::Int32> gen_int;
auto host_x = gen({12}), host_sub = gen({1}), host_idx = gen_int({1});
host_idx->ptr<int>()[0] = 13;
auto graph = ComputingGraph::make();
auto&& mgr = graph->static_infer_manager();
using Ad = opr::Subtensor::AxisIndexer;
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
sub = opr::Host2DeviceCopy::make(*graph, host_sub),
index = opr::Host2DeviceCopy::make_no_value_infer(*graph, host_idx);
auto rt_static_idx = Ad::make_index(0, index * 2);
auto y = opr::SetSubtensor::make(x, sub, {rt_static_idx});
ASSERT_TRUE(mgr.infer_shape_fallible(y.node()));
}
TEST(TestTensorManip, SetSubtensorDynIdx) {
HostTensorGenerator<> gen;
auto host_x = gen({12}), host_sub = gen({1}), host_idx = gen({1});
host_idx->ptr<float>()[0] = 3;
auto dev_idx = std::make_shared<DeviceTensorND>();
dev_idx->copy_from(*host_idx);
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
sub = opr::Host2DeviceCopy::make(*graph, host_sub),
idx = opr::SharedDeviceTensor::make(*graph, dev_idx),
y = opr::SetSubtensor::make(
x, sub, {opr::SetSubtensor::AxisIndexer::make_index(0, idx)});
ASSERT_TRUE(cg::is_static_var_storage(y.node()));
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
func->execute();
host_x->ptr<float>()[3] = host_sub->ptr<float>()[0];
MGB_ASSERT_TENSOR_EQ(*host_x, host_y);
}
TEST(TestTensorManip, SetSubtensorWithEmptyIndexDesc) {
HostTensorGenerator<> gen;
auto host_x = gen({12}), host_y = gen({12});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Host2DeviceCopy::make(*graph, host_y),
z = opr::SetSubtensor::make(x, y, {});
ASSERT_TRUE(cg::is_static_var_storage(z.node()));
HostTensorND host_z;
auto func = graph->compile({make_callback_copy(z, host_z)});
func->execute();
MGB_ASSERT_TENSOR_EQ(*host_y, host_z);
}
TEST(TestTensorManip, IncrSubtensor) {
using Checker = AutoOprChecker<2, 1>;
auto make_graph = [](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
using AIdx = opr::Subtensor::AxisIndexer;
auto x = inputs[0];
return {opr::IncrSubtensor::make(
x, inputs[1],
{AIdx::make_interval(
0, x.make_scalar(2), x.make_scalar(-2), x.make_scalar(2))})};
};
auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
auto nr = inp[0]->shape(0);
auto pv = inp[1]->ptr<float>(), pd = dest[0].copy_from(*inp[0]).ptr<float>();
for (size_t i = 0; i < (nr - 3) / 2; ++i) {
pd[i * 2 + 2] += pv[i];
}
};
Checker{make_graph, fwd}
.run({TensorShape{5}, {1}})
.run({TensorShape{8}, {2}})
.run({TensorShape{23}, {10}});
}
TEST(TestTensorManip, Concat) {
auto cns = load_multiple_xpus(4);
using Checker = AutoOprChecker<3, 1>;
auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
auto sub0 = inputs[0], sub1 = opr::Copy::make(inputs[1], cns[1]),
sub2 = opr::Copy::make(inputs[2], cns[2]),
ret = opr::Concat::make({sub0, sub1, sub2}, 1, cns[3]);
return {opr::Copy::make(ret, cns[0])};
};
auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
size_t n = inp[0]->shape(0), c0 = inp[0]->shape(1), c1 = inp[1]->shape(1),
c2 = inp[2]->shape(1), c = c0 + c1 + c2;
auto i0 = inp[0]->ptr<float>(), i1 = inp[1]->ptr<float>(),
i2 = inp[2]->ptr<float>(), o = dest[0].resize({n, c}).ptr<float>();
for (size_t i = 0; i < n; ++i) {
for (size_t j = 0; j < c; ++j) {
float cur;
if (j < c0) {
cur = i0[i * c0 + j];
} else if (j < c0 + c1) {
cur = i1[i * c1 + j - c0];
} else {
cur = i2[i * c2 + j - c0 - c1];
}
o[i * c + j] = cur;
}
}
};
Checker checker{make_graph, fwd, cns[0]};
checker.run({TensorShape{2, 3}, {2, 4}, {2, 5}})
.run({TensorShape{2, 8}, {2, 3}, {2, 9}})
.run({TensorShape{5, 10}, {5, 3}, {5, 4}});
}
TEST(TestTensorManip, ConcatWithNegativeAxis) {
auto cns = load_multiple_xpus(4);
using Checker = AutoOprChecker<3, 1>;
auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
auto sub0 = inputs[0], sub1 = opr::Copy::make(inputs[1], cns[1]),
sub2 = opr::Copy::make(inputs[2], cns[2]),
ret = opr::Concat::make({sub0, sub1, sub2}, -1, cns[3]);
return {opr::Copy::make(ret, cns[0])};
};
auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
size_t n = inp[0]->shape(0), c0 = inp[0]->shape(1), c1 = inp[1]->shape(1),
c2 = inp[2]->shape(1), c = c0 + c1 + c2;
auto i0 = inp[0]->ptr<float>(), i1 = inp[1]->ptr<float>(),
i2 = inp[2]->ptr<float>(), o = dest[0].resize({n, c}).ptr<float>();
for (size_t i = 0; i < n; ++i) {
for (size_t j = 0; j < c; ++j) {
float cur;
if (j < c0) {
cur = i0[i * c0 + j];
} else if (j < c0 + c1) {
cur = i1[i * c1 + j - c0];
} else {
cur = i2[i * c2 + j - c0 - c1];
}
o[i * c + j] = cur;
}
}
};
Checker checker{make_graph, fwd, cns[0]};
checker.run({TensorShape{2, 3}, {2, 4}, {2, 5}})
.run({TensorShape{2, 8}, {2, 3}, {2, 9}})
.run({TensorShape{5, 10}, {5, 3}, {5, 4}});
}
TEST(TestTensorManip, ConcatEmpty) {
HostTensorGenerator<> gen;
auto host_x = gen({2, 3, 5}), host_y = gen({2, 0, 5});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Host2DeviceCopy::make(*graph, host_y),
z = opr::Concat::make({x, y}, 1);
HostTensorND host_z;
auto func = graph->compile({make_callback_copy(z, host_z)});
func->execute();
MGB_ASSERT_TENSOR_EQ(*host_x, host_z);
host_x->resize({2, 0, 5});
func->execute();
MGB_ASSERT_TENSOR_EQ(*host_y, host_z);
}
TEST(TestTensorManip, ConcatEmpty2) {
HostTensorGenerator<> gen;
auto host_x = gen({2, 0, 5}), host_y = gen({2, 0, 6});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Host2DeviceCopy::make(*graph, host_y),
z = opr::Concat::make({x, y}, 2);
HostTensorND host_z;
auto func = graph->compile({make_callback_copy(z, host_z)});
func->execute();
ASSERT_EQ(TensorShape({2, 0, 11}), host_z.shape());
}
TEST(TestTensorManip, AxisAddRemove) {
HostTensorGenerator<> gen;
for (bool dyn_shape : {false, true}) {
auto host_x = gen({2, 1, 5});
using AD = opr::AxisAddRemove::AxisDesc;
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x);
if (dyn_shape) {
x = opr::MarkDynamicVar::make(x);
}
auto y = opr::AxisAddRemove::make(x, {AD::make_add(0)}),
z = opr::AxisAddRemove::make(x, {AD::make_remove(1)});
HostTensorND host_y, host_z;
auto func = graph->compile(
{make_callback_copy(y, host_y), make_callback_copy(z, host_z)});
func->execute();
ASSERT_EQ(TensorShape({1, 2, 1, 5}), host_y.shape());
ASSERT_EQ(TensorShape({2, 5}), host_z.shape());
MGB_ASSERT_TENSOR_EQ(*host_x, host_y.resize(host_x->shape()));
MGB_ASSERT_TENSOR_EQ(*host_x, host_z.resize(host_x->shape()));
host_x->resize({2, 1, 0});
func->execute();
ASSERT_EQ(TensorShape({1, 2, 1, 0}), host_y.shape());
ASSERT_EQ(TensorShape({2, 0}), host_z.shape());
}
}
TEST(TestTensorManip, Split) {
auto cns = load_multiple_xpus(3);
constexpr size_t C1 = 20, C2 = 30;
constexpr size_t N = 2, C = C1 + C2;
HostTensorGenerator<> gen;
auto host_opr0 = gen({N, C}, cns[0]);
auto graph = ComputingGraph::make();
SymbolVar opr0 = opr::Host2DeviceCopy::make(*graph, host_opr0, {"opr0"});
auto spl = opr::Split::make(
opr0, Split::Options::make_partition(opr0, 1, {C1, C2}),
OperatorNodeConfig("split").comp_node_arr({cns[1], cns[2]}));
auto cost0 = opr::Dot::make(spl[0].flatten(), spl[0].flatten()),
cost1_ = opr::Dot::make(spl[1].flatten(), spl[1].flatten()),
cost1 = opr::Copy::make(cost1_, OperatorNodeConfig().follow_comp_node(cost0)),
cost = opr::Copy::make(
cost0 + cost1, OperatorNodeConfig().follow_comp_node(opr0)),
grad = cg::grad(cost, opr0);
HostTensorND host_spl0, host_spl1, host_grad;
auto func = graph->compile(
{{spl[0], [&](DeviceTensorND& s) { host_spl0.copy_from(s); }},
{spl[1], [&](DeviceTensorND& s) { host_spl1.copy_from(s); }},
{grad, [&](DeviceTensorND& s) { host_grad.copy_from(s); }}});
func->execute();
auto o0 = host_spl0.sync().ptr<float>(), o1 = host_spl1.sync().ptr<float>(),
c = host_opr0->ptr<float>(), g = host_grad.sync().ptr<float>();
for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; i++) {
auto ch = i % C;
auto n = i / C;
if (ch < C1) {
MGB_ASSERT_FLOAT_EQ(o0[n * C1 + ch], c[i]) << ssprintf("failed at %zd", i);
} else {
MGB_ASSERT_FLOAT_EQ(o1[n * C2 + ch - C1], c[i])
<< ssprintf("failed at %zd", i);
}
MGB_ASSERT_FLOAT_EQ(c[i] * 2, g[i]) << ssprintf("grad failed at %zd", i);
}
}
TEST(TestTensorManip, SplitWithNegativeAxis) {
auto cns = load_multiple_xpus(3);
constexpr size_t C1 = 20, C2 = 30;
constexpr size_t N = 2, C = C1 + C2;
HostTensorGenerator<> gen;
auto host_opr0 = gen({N, C}, cns[0]);
auto graph = ComputingGraph::make();
SymbolVar opr0 = opr::Host2DeviceCopy::make(*graph, host_opr0, {"opr0"});
auto spl = opr::Split::make(
opr0, Split::Options::make_partition(opr0, -1, {C1, C2}),
OperatorNodeConfig("split").comp_node_arr({cns[1], cns[2]}));
auto cost0 = opr::Dot::make(spl[0].flatten(), spl[0].flatten()),
cost1_ = opr::Dot::make(spl[1].flatten(), spl[1].flatten()),
cost1 = opr::Copy::make(cost1_, OperatorNodeConfig().follow_comp_node(cost0)),
cost = opr::Copy::make(
cost0 + cost1, OperatorNodeConfig().follow_comp_node(opr0)),
grad = cg::grad(cost, opr0);
HostTensorND host_spl0, host_spl1, host_grad;
auto func = graph->compile(
{{spl[0], [&](DeviceTensorND& s) { host_spl0.copy_from(s); }},
{spl[1], [&](DeviceTensorND& s) { host_spl1.copy_from(s); }},
{grad, [&](DeviceTensorND& s) { host_grad.copy_from(s); }}});
func->execute();
auto o0 = host_spl0.sync().ptr<float>(), o1 = host_spl1.sync().ptr<float>(),
c = host_opr0->ptr<float>(), g = host_grad.sync().ptr<float>();
for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; i++) {
auto ch = i % C;
auto n = i / C;
if (ch < C1) {
MGB_ASSERT_FLOAT_EQ(o0[n * C1 + ch], c[i]) << ssprintf("failed at %zd", i);
} else {
MGB_ASSERT_FLOAT_EQ(o1[n * C2 + ch - C1], c[i])
<< ssprintf("failed at %zd", i);
}
MGB_ASSERT_FLOAT_EQ(c[i] * 2, g[i]) << ssprintf("grad failed at %zd", i);
}
}
TEST(TestTensorManip, SplitToDynOutShape) {
using Checker = AutoOprChecker<1, 2>;
auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
auto x = inputs[0];
auto y = opr::Split::make(
x, opr::Split::Options::make_partition(
0, {x.make_scalar(3), opr::MarkDynamicVar::make(
opr::GetVarShape::make(x, 0) -
x.make_scalar(3))}));
return {y[0], y[1]};
};
auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
auto sub = [&](size_t begin, Maybe<ptrdiff_t> end) {
auto&& iv = inp[0];
return iv->sub(Slice(begin, end, None).apply(iv->layout(), 0));
};
dest[0].copy_from(sub(0, 3));
dest[1].copy_from(sub(3, None));
};
Checker{make_graph, fwd}
.run({TensorShape{5}})
.run({TensorShape{8}})
.run({TensorShape{9, 3}});
}
TEST(TestTensorManip, SplitToDynOutStorage) {
using Checker = AutoOprChecker<1, 2>;
auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
auto x = inputs[0];
auto y = opr::Split::make(
x, opr::Split::Options::make_partition(
0, {x.make_scalar(3),
opr::GetVarShape::make(x, 0) - x.make_scalar(3)}));
auto y0 = opr::Copy::make(y[0], x.node()->comp_node().change_stream(1));
y0 = opr::Copy::make(y0, x.node()->comp_node());
return {y0, y[1]};
};
auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
auto sub = [&](size_t begin, Maybe<ptrdiff_t> end) {
auto&& iv = inp[0];
return iv->sub(Slice(begin, end, None).apply(iv->layout(), 0));
};
dest[0].copy_from(sub(0, 3));
dest[1].copy_from(sub(3, None));
};
Checker{make_graph, fwd}
.run({TensorShape{5}})
.run({TensorShape{8}})
.run({TensorShape{9, 3}});
}
namespace {
void do_test_dynamic_split(bool multiple_cn, bool force_dynamic) {
auto cns = load_multiple_xpus(3);
constexpr size_t N = 2, C = 51;
HostTensorGenerator<> gen;
auto host_x = gen({N, C}, cns[0]), host_sub_begin = gen({1}, cns[0]),
host_sub_end = gen({1}, cns[0]);
host_sub_begin->ptr<float>()[0] = 0;
host_sub_end->ptr<float>()[0] = 2;
auto graph = ComputingGraph::make();
SymbolVar x = opr::Host2DeviceCopy::make_no_fwd(*graph, host_x, {"x"}),
sub_begin = opr::Host2DeviceCopy::make_no_fwd(
*graph, host_sub_begin, {"sub_begin"}),
sub_end = opr::Host2DeviceCopy::make_no_fwd(
*graph, host_sub_end, {"sub_end"}),
xsub = opr::Subtensor::make(
x, {opr::Subtensor::AxisIndexer::make_interval(
1, sub_begin, sub_end, None)})
.rename("xsub");
OperatorNodeConfig split_config("split");
if (multiple_cn) {
split_config.comp_node_arr({cns[1], cns[2]});
}
if (force_dynamic)
xsub = opr::MarkDynamicVar::make(xsub);
auto spl = opr::Split::make(
xsub,
Split::Options::make_callback(
1, 2,
[](size_t s) {
return std::vector<size_t>{s / 2, s - s / 2};
}),
split_config);
if (multiple_cn) {
spl[0] = opr::Sleep::make(spl[0], 0.1);
spl[1] = opr::Sleep::make(spl[1], 0.2);
}
auto cost0 = opr::Dot::make(spl[0].flatten(), spl[0].flatten()),
cost1_ = opr::Dot::make(spl[1].flatten(), spl[1].flatten()),
cost1 = opr::Copy::make(cost1_, OperatorNodeConfig().follow_comp_node(cost0)),
cost = opr::Copy::make(
cost0 + cost1, OperatorNodeConfig().follow_comp_node(x)) *
0.5f,
grad = cg::grad(cost, x);
HostTensorND host_spl0, host_spl1, host_grad;
auto func = graph->compile(
{make_callback_copy(spl[0], host_spl0),
make_callback_copy(spl[1], host_spl1),
make_callback_copy(grad, host_grad)});
if (force_dynamic)
ASSERT_TRUE(!cg::is_static_var_shape(spl[0].node()));
else {
auto cb = [](cg::OperatorNodeBase* op) {
for (auto i : op->output()) {
mgb_assert(
cg::is_static_var_shape(i), "dynamic var: %s",
cg::dump_var_info({i}).c_str());
}
return true;
};
func->iter_opr_seq(cb);
}
bool failed = false, fwd_checked = false;
auto run_and_check = [&](size_t begin, size_t end) {
ASSERT_FALSE(failed);
failed = true;
host_sub_begin->ptr<float>()[0] = begin;
host_sub_end->ptr<float>()[0] = end;
func->execute();
auto mid = begin + (end - begin) / 2;
auto inp = host_x->ptr<float>(), grad = host_grad.ptr<float>();
ASSERT_EQ(host_spl0.shape(), TensorShape({N, mid - begin}));
ASSERT_EQ(host_spl1.shape(), TensorShape({N, end - mid}));
if (!force_dynamic && !multiple_cn && !begin && mid - begin == 1) {
ASSERT_EQ(
prev_dev_ptr(spl[0]),
static_cast<const dt_float32*>(prev_dev_ptr(x)));
fwd_checked = true;
}
for (size_t i = 0, it = host_x->layout().total_nr_elems(); i < it; ++i) {
auto ch = i % C;
auto n = i / C;
float expect_grad;
if (ch >= begin && ch < mid) {
MGB_ASSERT_FLOAT_EQ(inp[i], *host_spl0.ptr<float>({n, ch - begin}))
<< ssprintf(
"failed at (%zu, %zu),sub=[: ,%zu:%zu]", i, ch,
begin, end);
expect_grad = inp[i];
} else if (ch >= mid && ch < end) {
MGB_ASSERT_FLOAT_EQ(inp[i], *host_spl1.ptr<float>({n, ch - mid}))
<< ssprintf(
"failed at (%zu, %zu),sub=[: ,%zu:%zu]", i, ch,
begin, end);
expect_grad = inp[i];
} else {
expect_grad = 0;
}
MGB_ASSERT_FLOAT_EQ(expect_grad, grad[i]) << ssprintf(
"grad failed at (%zu, %zu), sub=x[:, %zu:%zu]", n, ch, begin, end);
}
failed = false;
};
run_and_check(0, 3);
run_and_check(2, 8);
run_and_check(5, 12);
run_and_check(1, C - 1);
run_and_check(0, C);
run_and_check(C - 2, C);
run_and_check(0, 2);
if (!multiple_cn && !force_dynamic) {
ASSERT_TRUE(fwd_checked);
}
}
}
TEST(TestTensorManip, DynamicSplit00) {
do_test_dynamic_split(false, false);
}
TEST(TestTensorManip, DynamicSplit01) {
do_test_dynamic_split(false, true);
}
TEST(TestTensorManip, DynamicSplit10) {
do_test_dynamic_split(true, false);
}
TEST(TestTensorManip, DynamicSplit11) {
do_test_dynamic_split(true, true);
}
TEST(TestTensorManip, SplitFromDynStorage) {
HostTensorGenerator<> gen;
auto host_x = gen({4});
auto graph = cg::ComputingGraph::make();
auto x = opr::MarkDynamicVar::make(opr::Host2DeviceCopy::make(*graph, host_x))
.reshape({4});
ASSERT_TRUE(cg::is_static_var_shape(x.node()));
auto y = opr::Split::make(x, opr::Split::Options::make_partition(x, 0, {1, 3}));
HostTensorND y0, y1;
auto func = graph->compile(
{make_callback_copy(y[0], y0), make_callback_copy(y[1], y1)});
func->execute();
ASSERT_FALSE(cg::is_static_var_storage(x.node()));
HostTensorND expt{host_x->comp_node(), host_x->dtype()};
expt.resize({1}).ptr<float>()[0] = host_x->ptr<float>()[0];
MGB_ASSERT_TENSOR_EQ(expt, y0);
expt.resize({3});
for (int i = 0; i < 3; ++i)
expt.ptr<float>()[i] = host_x->ptr<float>()[i + 1];
MGB_ASSERT_TENSOR_EQ(expt, y1);
}
TEST(TestTensorManip, SplitPreAllocatedMultiCN) {
auto cns = load_multiple_xpus(3);
HostTensorGenerator<> gen;
auto host_x = gen({3}, cns[0]);
auto dev_x = std::make_shared<DeviceTensorND>();
dev_x->copy_from(*host_x).sync();
auto graph = cg::ComputingGraph::make();
auto x = opr::SharedDeviceTensor::make(*graph, dev_x);
auto ys = opr::Split::make(
x, opr::Split::Options::make_average(0, 3),
OperatorNodeConfig{}.comp_node_arr({cns.begin(), cns.end()}));
ASSERT_EQ(3u, ys.size());
HostTensorND y0, y1, y2;
auto func = graph->compile(
{make_callback_copy(ys[0], y0),
make_callback_copy(opr::Copy::make(ys[1], {cns[0]}), y1),
make_callback_copy(ys[2], y2)});
func->execute();
ASSERT_TRUE(cg::is_static_var_storage(ys[0].node()));
ASSERT_FALSE(cg::is_static_var_storage(ys[1].node()));
ASSERT_EQ(x.node()->prev_dev_ptr(), ys[0].node()->prev_dev_ptr());
ASSERT_EQ(host_x->ptr<float>()[0], y0.ptr<float>()[0]);
ASSERT_EQ(host_x->ptr<float>()[1], y1.ptr<float>()[0]);
ASSERT_EQ(host_x->ptr<float>()[2], y2.ptr<float>()[0]);
}
TEST(TestTensorManip, SplitMemfwdMultipleTimesWithOffset) {
auto cns = load_multiple_xpus(2);
HostTensorGenerator<> gen;
auto host_x = gen({4}, cns[0]);
auto graph = cg::ComputingGraph::make();
auto x0 = opr::Host2DeviceCopy::make_no_fwd(*graph, host_x),
x = opr::Subtensor::make(
x0, {opr::Subtensor::AxisIndexer::make_interval(
0, x0.make_scalar(1), None, None)});
auto ys = opr::Split::make(x, opr::Split::Options::make_average(0, 3));
ASSERT_EQ(3u, ys.size());
HostTensorND y0, y1, y2;
auto func = graph->compile(
{make_callback_copy(ys[0], y0),
make_callback_copy(opr::Copy::make(ys[1], {cns[1]}), y1),
make_callback_copy(ys[2], y2)});
func->execute();
ASSERT_FALSE(cg::is_static_var_storage(ys[0].node()));
ASSERT_TRUE(cg::is_static_var_shape(ys[0].node()));
ASSERT_FALSE(cg::is_static_var_storage(ys[1].node()));
ASSERT_EQ(host_x->ptr<float>()[1], y0.ptr<float>()[0]);
ASSERT_EQ(host_x->ptr<float>()[2], y1.ptr<float>()[0]);
ASSERT_EQ(host_x->ptr<float>()[3], y2.ptr<float>()[0]);
ASSERT_EQ(static_cast<const float*>(prev_dev_ptr(x0)) + 3, prev_dev_ptr(ys[2]));
}
TEST(TestTensorManip, SplitValueInfer) {
auto cns = load_multiple_xpus(3);
HostTensorGenerator<> gen;
auto host_x = gen({3});
auto graph = cg::ComputingGraph::make();
auto x = opr::ImmutableTensor::make(*graph, *host_x);
auto ys = opr::Split::make(
x, opr::Split::Options::make_average(0, 3),
OperatorNodeConfig{}.comp_node_arr({cns.begin(), cns.end()}));
for (size_t i = 0; i < 3; ++i) {
auto var = (ys[i] + 0).node();
ASSERT_TRUE(var->owner_opr()->same_type<opr::ImmutableTensor>());
ASSERT_EQ(cns[i], var->comp_node());
HostTensorND hv;
hv.copy_from(var->owner_graph()->static_infer_manager().infer_value(var));
ASSERT_EQ(TensorShape{1}, hv.shape());
ASSERT_EQ(host_x->ptr<float>()[i], hv.ptr<float>()[0]);
}
}
TEST(TestTensorManip, SplitZeroGrad) {
HostTensorGenerator<> gen;
auto host_x = gen({3, 2});
auto graph = cg::ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x);
auto ys = opr::Split::make(x, opr::Split::Options::make_average(0, 3));
auto loss = opr::reduce_sum(ys[2] * ys[2], x.make_scalar(1)),
gx = cg::grad(loss, x);
HostTensorND host_gx;
auto func = graph->compile({make_callback_copy(gx, host_gx)});
func->execute();
auto px = host_x->ptr<float>(), pgx = host_gx.ptr<float>();
for (int i = 0; i < 2; ++i) {
MGB_ASSERT_FLOAT_EQ(0.f, pgx[i]);
MGB_ASSERT_FLOAT_EQ(0.f, pgx[2 + i]);
MGB_ASSERT_FLOAT_EQ(px[4 + i] * 2, pgx[4 + i]);
}
}
TEST(TestTensorManip, DynamicFill) {
HostTensorGenerator<> gen;
auto host_x = gen({1});
auto graph = cg::ComputingGraph::make();
auto x = opr::MarkDynamicVar::make(opr::Host2DeviceCopy::make(*graph, host_x)),
y = x.fill_retain_dtype(23);
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
bool failed = false;
auto check = [&](const TensorShape& ishp) {
ASSERT_FALSE(failed);
failed = true;
host_x->resize(ishp);
func->execute();
ASSERT_EQ(ishp, host_y.shape());
auto ptr = host_y.ptr<float>();
for (size_t i = 0, it = host_y.shape().total_nr_elems(); i < it; ++i)
ASSERT_EQ(23, ptr[i]);
failed = false;
};
check({4, 2});
check({2, 4});
check({23});
}
TEST(TestTensorManip, Pooling2DBySetSub) {
constexpr int PH = 4, PW = 3;
using Checker = AutoOprChecker<1, 1>;
bool run_dyn = false;
auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
auto x = inputs.at(0);
if (run_dyn)
x = opr::MarkDynamicVar::make(x);
x.rename("x");
auto cv = [&](int v, bool dyn = false) {
auto rst = x.make_scalar(v);
if (dyn)
rst = opr::MarkDynamicVar::make(rst);
return rst;
};
auto oh = (opr::GetVarShape::make(x, 0) / PH).rename("oh"),
ow = (opr::GetVarShape::make(x, 1) / PW).rename("ow"),
y_tmp_shape = opr::Concat::make({cv(PH * PW), oh, ow}, 0),
y_tmp = opr::Alloc::make(y_tmp_shape, dtype::Float32());
if (!run_dyn)
mgb_assert(cg::is_static_var_storage(y_tmp.node()));
using Ad = opr::Subtensor::AxisIndexer;
for (size_t i = 0, num = 0; i < (size_t)PH; ++i) {
for (size_t j = 0; j < (size_t)PW; ++j) {
bool dyn = run_dyn && num % 2;
auto xsub = opr::Subtensor::make(
x, {Ad::make_interval(0, cv(i, dyn), None, cv(PH)),
Ad::make_interval(1, cv(j), None, cv(PW))})
.rename(ssprintf("sub(%zu, %zu)", i, j));
y_tmp = opr::SetSubtensor::make(
y_tmp, xsub, {Ad::make_index(0, cv(num, dyn))})
.rename(ssprintf("y(%zu, %zu)", i, j));
if (!run_dyn) {
mgb_assert(cg::is_static_var_storage(xsub.node()));
mgb_assert(cg::is_static_var_storage(y_tmp.node()));
} else if (dyn)
y_tmp = opr::MarkDynamicVar::make(y_tmp);
++num;
}
}
auto y = opr::Reduce::make(y_tmp, {opr::Reduce::Mode::SUM, 0});
y = opr::AxisAddRemove::make(y, {opr::AxisAddRemove::AxisDesc::make_remove(0)});
if (!run_dyn)
mgb_assert(cg::is_static_var_storage(y.node()));
return {y};
};
auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
auto&& ishp = inp.at(0)->shape();
auto oshp = ishp;
mgb_assert(oshp.shape[0] % PH == 0);
mgb_assert(oshp.shape[1] % PW == 0);
oshp.shape[0] /= PH;
oshp.shape[1] /= PW;
auto optr = dest.at(0).comp_node(inp[0]->comp_node()).resize(oshp).ptr<float>();
auto&& iv = *inp.at(0);
for (size_t i = 0; i < oshp.shape[0]; ++i)
for (size_t j = 0; j < oshp.shape[1]; ++j) {
auto ii = i * PH, ij = j * PW;
float sum = 0;
for (size_t di = 0; di < PH; ++di)
for (size_t dj = 0; dj < PW; ++dj) {
sum += *iv.ptr<float>({ii + di, ij + dj});
}
*(optr++) = sum;
}
};
auto run = [&](bool dyn) {
run_dyn = dyn;
Checker(make_graph, fwd)
.run({TensorShape{PH * 1, PW * 2}})
.run({TensorShape{PH * 4, PW * 3}})
.run({TensorShape{PH * 2, PW * 2}});
};
run(false);
run(true);
}
TEST(TestTensorManip, Flatten) {
HostTensorGenerator<> gen;
auto host_x = gen({20});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"), y = x.flatten();
y = y + x.reshape(y.symshape());
ASSERT_EQ(TensorShape{20}, y.node()->shape());
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
for (auto&& ishp : {TensorShape{2, 5}, TensorShape{6, 8, 1}, TensorShape{3}}) {
*host_x = *gen(ishp);
func->execute();
auto expected = host_x->sub(SubTensorSpec::make_from_layout(
{{ishp.total_nr_elems()}, host_x->dtype()}));
auto ptr = expected.ptr<float>();
for (size_t i = 0; i < expected.shape()[0]; ++i)
ptr[i] *= 2;
MGB_ASSERT_TENSOR_EQ(expected, host_y);
}
}
TEST(TestTensorManip, FillWithDtypeDedup) {
HostTensorGenerator<> gen;
auto host_x = gen({20});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x);
SymbolVar vals[] = {
x.fill_retain_dtype(0), x.fill_retain_dtype(1),
x.fill_retain_dtype(0), x.fill_retain_dtype(1),
x.fill_retain_dtype(0.f), x.fill_retain_dtype(1.f),
x.fill_retain_dtype(0.f), x.fill_retain_dtype(1.f),
};
for (int i : {0, 1})
for (int j = 2; j < 8; j += 2)
ASSERT_EQ(vals[i].node(), vals[i + j].node()) << i << ' ' << i + j;
ASSERT_NE(vals[0].node(), vals[1].node());
}
TEST(TestTensorManip, StrongContig) {
HostTensorGenerator<> gen;
auto host_x = gen({5, 1});
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Dimshuffle::make(x, {1, 0});
auto cb = [](DeviceTensorND& dv) {
TensorLayout expect{{1, 5}, dv.dtype()};
ASSERT_EQ(expect, dv.layout());
};
auto func = graph->compile({{y, cb}});
func->execute();
}
namespace {
void test_param_pack_concat(const TensorShapeArray& shapes, DType type) {
auto cn = CompNode::load("xpu0");
auto graph = ComputingGraph::make();
auto align = cn.get_mem_addr_alignment() / type.size();
size_t size = 0;
std::vector<size_t> begins;
for (auto&& shape : shapes) {
size = get_aligned_power2(size, align);
begins.push_back(size);
size += shape.total_nr_elems();
}
SmallVector<SymbolVar> srcs;
for (size_t i = 0; i < shapes.size(); i++) {
auto data = std::make_shared<HostTensorND>();
data->comp_node(cn).dtype(dtype::Int32()).resize(shapes[i]);
auto ptr = data->ptr<dt_int32>();
for (size_t j = 0; j < shapes[i].total_nr_elems(); j++) {
ptr[j] = j;
}
auto nd = opr::Host2DeviceCopy::make(*graph, data);
srcs.push_back(nd);
}
auto host_offsets_gen = megdnn::ParamPackConcat::gen_offsets(
shapes, cn.get_mem_addr_alignment(), 4);
ASSERT_EQ(host_offsets_gen.back(), size);
auto host_offsets = std::make_shared<HostTensorND>();
host_offsets->comp_node(cn).dtype(dtype::Int32{}).resize({srcs.size() * 2});
memcpy(host_offsets->raw_ptr(), host_offsets_gen.data(), srcs.size() * 8);
auto offsets = opr::Host2DeviceCopy::make(*graph, host_offsets);
auto z = opr::ParamPackConcat::make(srcs, offsets, host_offsets_gen);
HostTensorND host_z;
auto func = graph->compile({make_callback_copy(z, host_z)});
func->execute();
HostTensorND expected;
expected.comp_node(cn).dtype(dtype::Int32()).resize({size});
{
auto ptr = expected.ptr<dt_int32>();
memset(ptr, 0, sizeof(int32_t) * size);
for (size_t i = 0; i < begins.size(); i++) {
auto begin = begins[i];
auto shape = shapes[i];
for (size_t j = 0; j < shape.total_nr_elems(); j++) {
ptr[begin + j] = j;
}
}
}
MGB_ASSERT_TENSOR_EQ(expected, host_z);
}
template <size_t nr_out>
void test_param_pack_split(const TensorShapeArray& shapes) {
auto cn = CompNode::load("xpu0");
auto align = std::max<size_t>(cn.get_mem_addr_alignment() / 4, 1);
size_t concat_size = 0;
mgb_assert(shapes.size() == nr_out);
for (auto&& i : shapes) {
concat_size = get_aligned_power2(concat_size, align) + i.total_nr_elems();
}
using Checker = AutoOprChecker<1, nr_out>;
auto make_graph = [&](const typename Checker::SymInpArray& inputs) ->
typename Checker::SymOutArray {
auto offsets_val = megdnn::ParamPackConcat::gen_offsets(
shapes, cn.get_mem_addr_alignment(), 4);
HostTensorND offsets;
std::copy_n(
offsets_val.data(), offsets_val.size(),
offsets.dtype(dtype::Int32{})
.comp_node(cn)
.resize({offsets_val.size()})
.ptr<dt_int32>());
auto out = opr::ParamPackSplit::make(inputs[0], offsets_val, shapes);
mgb_assert(out.size() == nr_out);
typename Checker::SymOutArray ret;
for (size_t i = 0; i < nr_out; ++i) {
ret[i] = out[i];
}
return ret;
};
auto fwd = [&](typename Checker::NumOutArray& dest,
typename Checker::NumInpArray inp) {
size_t offset = 0;
auto ptr = inp[0]->template ptr<float>();
for (size_t i = 0; i < nr_out; ++i) {
dest[i].resize(shapes[i]);
offset = get_aligned_power2(offset, align);
auto nr_elem = shapes[i].total_nr_elems();
memcpy(dest[i].template ptr<float>(), ptr + offset, nr_elem * 4);
offset += nr_elem;
}
};
Checker{make_graph, fwd}
.run({TensorShape{concat_size}})
.run({TensorShape{concat_size}})
.run({TensorShape{concat_size}});
}
}
TEST(TestParamPack, Concat) {
TensorShapeArray array = {{129}, {21}};
test_param_pack_concat(array, dtype::Int32());
array = {{23}, {32}, {75}, {45}};
test_param_pack_concat(array, dtype::Int32());
array = {{129}, {512}, {513}, {27}};
test_param_pack_concat(array, dtype::Int32());
}
TEST(TestParamPack, Split) {
test_param_pack_split<2>({{2, 3}, {4, 5, 6}});
test_param_pack_split<3>({{2, 9}, {123}, {5, 3}});
}