#include "test/cpu/fixture.h"
#include "test/common/benchmarker.h"
#include "test/common/checker.h"
#include "test/common/convolution.h"
using namespace megdnn;
using namespace test;
namespace {
Convolution::Param gconv_param(Convolution::Param p) {
p.sparse = Convolution::Param::Sparse::GROUP;
return p;
}
}
#define CONVOLUTION_ARG_DIV_SIZE 100
TEST_F(CPU, CONVOLUTION_0) {
using namespace convolution;
std::vector<TestArg> args = get_args();
auto loop_size = args.size();
ASSERT_GT(loop_size, CONVOLUTION_ARG_DIV_SIZE);
Checker<Convolution> checker(handle());
for (unsigned int i = 0; i < CONVOLUTION_ARG_DIV_SIZE; i++) {
checker.set_param(args[i].param).execs({args[i].src, args[i].filter, {}});
}
}
#define CONVOLUTION1_ARG_LOOP_END_TIME (CONVOLUTION_ARG_DIV_SIZE + 205)
TEST_F(CPU, CONVOLUTION_1) {
using namespace convolution;
std::vector<TestArg> args = get_args();
auto loop_size = args.size();
ASSERT_GT(loop_size, CONVOLUTION_ARG_DIV_SIZE);
ASSERT_GT(loop_size, CONVOLUTION1_ARG_LOOP_END_TIME);
Checker<Convolution> checker(handle());
for (unsigned int i = CONVOLUTION_ARG_DIV_SIZE; i < CONVOLUTION1_ARG_LOOP_END_TIME;
i++) {
checker.set_param(args[i].param).execs({args[i].src, args[i].filter, {}});
}
}
#define CONVOLUTION2_ARG_LOOP_END_TIME (CONVOLUTION1_ARG_LOOP_END_TIME + 200)
TEST_F(CPU, CONVOLUTION_2) {
using namespace convolution;
std::vector<TestArg> args = get_args();
auto loop_size = args.size();
ASSERT_GT(loop_size, CONVOLUTION2_ARG_LOOP_END_TIME);
Checker<Convolution> checker(handle());
for (unsigned int i = CONVOLUTION1_ARG_LOOP_END_TIME;
i < CONVOLUTION2_ARG_LOOP_END_TIME; i++) {
checker.set_param(args[i].param).execs({args[i].src, args[i].filter, {}});
}
}
TEST_F(CPU, CONVOLUTION_3) {
using namespace convolution;
std::vector<TestArg> args = get_args();
auto loop_size = args.size();
ASSERT_GT(loop_size, CONVOLUTION2_ARG_LOOP_END_TIME);
Checker<Convolution> checker(handle());
for (unsigned int i = CONVOLUTION2_ARG_LOOP_END_TIME; i < loop_size; i++) {
checker.set_param(args[i].param).execs({args[i].src, args[i].filter, {}});
}
}
#undef CONVOLUTION_ARG_DIV_SIZE
#undef CONVOLUTION1_ARG_LOOP_END_TIME
#undef CONVOLUTION2_ARG_LOOP_END_TIME
#define CB_CONV_CONFIG_COMBINATIONS(KSIZE) \
TEST_F(CPU, CONV_CONFIG_COMBINATIONS_KSIZE_1_KSIZE_##KSIZE) { \
convolution::test_conv_config_combinations( \
KSIZE, handle(), true, false, false); \
}
CB_CONV_CONFIG_COMBINATIONS(2);
CB_CONV_CONFIG_COMBINATIONS(3);
CB_CONV_CONFIG_COMBINATIONS(5);
#undef CB_CONV_CONFIG_COMBINATIONS
#if MEGDNN_WITH_BENCHMARK
TEST_F(CPU, BENCHMARK_CONVOLUTION) {
using TestArg = convolution::TestArg;
using Param = param::Convolution;
std::vector<TestArg> args;
for (size_t has_pad = 0; has_pad < 2; ++has_pad)
for (uint32_t stride = 1; stride <= 2; ++stride)
for (std::pair<size_t, size_t> kersize :
std::vector<std::pair<size_t, size_t>>{
{2, 2}, {3, 3}, {5, 5}, {7, 7}}) {
uint32_t pad_h, pad_w;
if (has_pad)
pad_h = kersize.first / 2;
else
pad_h = 0;
if (has_pad)
pad_w = kersize.second / 2;
else
pad_w = 0;
auto param = Param{Param::Mode::CROSS_CORRELATION, pad_h, pad_w,
stride, stride};
{
auto arg = TestArg{param,
{2, 3, 320, 240},
{4, 3, kersize.first, kersize.second}};
args.push_back(arg);
}
}
Checker<Convolution> checker(handle());
checker.set_perf_check(true).set_perf_check_threshold(2.0);
for (auto&& arg : args) {
checker.set_param(arg.param).execs({arg.src, arg.filter, {}});
}
}
#endif
TEST_F(CPU, CHANWISE_CONVOLUTION) {
constexpr auto M = Convolution::Mode::CROSS_CORRELATION;
Checker<Convolution> checker(handle());
checker.set_param(gconv_param({M, 0, 0, 1, 1}))
.execs({{1, 1, 2, 2}, {1, 1, 1, 2, 2}, {}})
.execs({{1, 1, 5, 5}, {1, 1, 1, 2, 2}, {}})
.execs({{2, 2, 5, 5}, {2, 3, 1, 2, 2}, {2, 6, 4, 4}});
checker.set_param(gconv_param({M, 1, 1, 1, 1}))
.execs({{2, 2, 5, 5}, {2, 1, 1, 2, 2}, {}});
checker.set_param(gconv_param({M, 2, 3, 2, 1}))
.execs({{32, 12, 20, 10}, {12, 2, 1, 4, 5}, {}});
checker.set_param(gconv_param({M, 20, 30, 4, 5}))
.execs({{32, 12, 20, 10}, {12, 2, 1, 4, 5}, {}});
}
TEST_F(CPU, CHANWISE_CONVOLUTION_INT8_INT8_INT16) {
constexpr auto M = Convolution::Mode::CROSS_CORRELATION;
Checker<Convolution> checker(handle());
checker.set_dtype(0, dtype::Int8());
checker.set_dtype(1, dtype::Int8());
checker.set_dtype(2, dtype::Int16());
checker.set_param(gconv_param({M, 0, 0, 1, 1, 1, 1}))
.execs({{1, 1, 2, 2}, {1, 1, 1, 2, 2}, {}})
.execs({{1, 1, 5, 5}, {1, 1, 1, 2, 2}, {}})
.execs({{2, 2, 5, 5}, {2, 3, 1, 2, 2}, {2, 6, 4, 4}});
checker.set_param(gconv_param({M, 1, 1, 1, 1, 1, 1}))
.execs({{2, 2, 5, 5}, {2, 1, 1, 2, 2}, {}});
checker.set_param(gconv_param({M, 2, 3, 2, 1, 1, 1}))
.execs({{32, 12, 20, 10}, {12, 2, 1, 4, 5}, {}});
checker.set_param(gconv_param({M, 20, 30, 4, 5, 1, 1}))
.execs({{32, 12, 20, 10}, {12, 2, 1, 4, 5}, {}});
for (uint32_t s : {1, 2})
for (uint32_t p : {0, 1})
for (size_t kh : {2, 3, 5})
for (size_t kw : {kh, kh + 1})
for (size_t ic : {5})
for (size_t oc : {3})
for (size_t h = 20; h <= 60; h += 7)
for (size_t w : {h, h + 1}) {
checker.set_param(gconv_param({M, p, p, s, s, 1, 1}))
.execs({{2, ic, h, w}, {ic, oc, 1, kh, kw}, {}});
}
}
TEST_F(CPU, GROUP_CONV) {
auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH, size_t FW,
size_t OC, size_t , size_t , size_t PH, size_t PW,
size_t SH, size_t SW, size_t group) {
Checker<Convolution> checker(handle());
Convolution::Param param;
param.pad_h = PH;
param.pad_w = PW;
param.stride_h = SH;
param.stride_w = SW;
auto ICg = IC / group;
auto OCg = OC / group;
checker.set_param(gconv_param(param))
.exec({{N, IC, IH, IW}, {group, OCg, ICg, FH, FW}, {}});
};
run(2, 64, 7, 7, 3, 3, 32, 5, 5, 0, 0, 1, 1, 1);
run(2, 32, 7, 7, 3, 3, 64, 7, 7, 1, 1, 1, 1, 4);
run(2, 32, 7, 7, 3, 3, 64, 3, 3, 0, 0, 2, 2, 8);
}
#if MEGDNN_WITH_BENCHMARK
TEST_F(CPU, BENCHMARK_7X7_CONVOLUTION) {
using Param = param::Convolution;
auto run = [&](const TensorShapeArray& shapes, Param param) {
auto handle_naive = create_cpu_handle(2);
Benchmarker<Convolution> benchmarker_naive(handle_naive.get());
Benchmarker<Convolution> benchmarker_float(handle());
size_t RUN = 10;
auto tfloat = benchmarker_float.set_display(false)
.set_times(RUN)
.set_param(param)
.exec(shapes);
auto tnaive = benchmarker_naive.set_display(false)
.set_times(RUN)
.set_param(param)
.exec(shapes);
printf("src: %s filter: %s dst: %s naive=%.3fms float=%.3fms\n",
shapes[0].to_string().c_str(), shapes[1].to_string().c_str(),
shapes[2].to_string().c_str(), tnaive / RUN, tfloat / RUN);
};
Param param;
param.stride_h = 2;
param.stride_w = 2;
param.pad_h = 3;
param.pad_w = 3;
for (size_t ic : {1, 3, 8, 16, 24}) {
for (size_t oc : {8, 16}) {
for (size_t h : {128, 224, 256, 512}) {
for (size_t w : {128, 224, 256, 512}) {
run({{1, ic, h, w}, {oc, ic, 7, 7}, {1, oc, h / 2, w / 2}}, param);
} } } }
}
#endif