#include "megbrain/opr/dnn/pooling.h"
#include "./legacy_checker.h"
#include "megbrain/opr/basic_arith.h"
#include "megbrain/opr/basic_arith_wrapper.h"
#include "megbrain/utils/persistent_cache.h"
using namespace std;
using namespace mgb;
namespace {
using Param = opr::Pooling::Param;
using Mode = Param::Mode;
void pooling_brute(
const vector<shared_ptr<HostTensorND>>& in_tensor,
shared_ptr<HostTensorND>& out_tensor, const Param& param) {
ASSERT_EQ(1u, in_tensor.size());
ASSERT_EQ(4u, in_tensor[0]->shape().ndim);
size_t n = in_tensor[0]->shape().shape[0];
size_t c = in_tensor[0]->shape().shape[1];
size_t ih = in_tensor[0]->shape().shape[2];
size_t iw = in_tensor[0]->shape().shape[3];
size_t oh = (ih + 2 * param.pad_h - param.window_h) / param.stride_h + 1;
size_t ow = (iw + 2 * param.pad_w - param.window_w) / param.stride_w + 1;
out_tensor = make_shared<HostTensorND>(
CompNode::load("xpu0"), TensorShape{n, c, oh, ow});
int fx, fy;
size_t tx, ty;
for (size_t on = 0; on < n; ++on)
for (size_t oc = 0; oc < c; ++oc)
for (tx = 0, fx = -param.pad_h; tx < oh; ++tx, fx += param.stride_h)
for (ty = 0, fy = -param.pad_w; ty < ow; ++ty, fy += param.stride_w) {
float& cur = out_tensor->ptr<float>({on, oc, tx, ty})[0];
bool valid = false;
if (param.mode == Param::Mode::AVERAGE ||
param.mode == Param::Mode::AVERAGE_COUNT_EXCLUDE_PADDING) {
cur = 0;
int fx2 = fx + static_cast<int>(param.window_h);
int fy2 = fy + static_cast<int>(param.window_w);
int cnt = 0;
for (int rx = fx; rx < fx2; ++rx)
for (int ry = fy; ry < fy2; ++ry)
if (rx >= 0 && rx < static_cast<int>(ih) && ry >= 0 &&
ry < static_cast<int>(iw)) {
cur += in_tensor[0]->ptr<float>(
{on, oc, static_cast<size_t>(rx),
static_cast<size_t>(ry)})[0];
valid = true;
++cnt;
}
if (param.mode == Param::Mode::AVERAGE) {
cnt = param.window_h * param.window_w;
}
cur /= static_cast<float>(cnt);
} else {
cur = -numeric_limits<float>::max();
ASSERT_EQ(Param::Mode::MAX, param.mode);
int fx2 = fx + static_cast<int>(param.window_h);
int fy2 = fy + static_cast<int>(param.window_w);
for (int rx = fx; rx < fx2; ++rx)
for (int ry = fy; ry < fy2; ++ry)
if (rx >= 0 && rx < static_cast<int>(ih) && ry >= 0 &&
ry < static_cast<int>(iw)) {
cur = std::max(
cur,
in_tensor[0]->ptr<float>(
{on, oc, static_cast<size_t>(rx),
static_cast<size_t>(ry)})[0]);
valid = true;
}
}
mgb_assert(valid);
}
}
TEST(TestOprDNN, PoolingForward) {
size_t sx = 2, sy = 3, wx = 4, wy = 2, ix = 23, iy = 15, ph = 0, pw = 3;
for (uint32_t i = 0; i < Param::MODE_NR_MEMBER; ++i) {
Param param(static_cast<Mode>(i), ph, pw, sy, sx, wy, wx);
opr::test::ForwardChecker<opr::Pooling, 1> forward_checker(
{{2, 3, ix, iy}}, pooling_brute, param);
forward_checker.run();
}
}
TEST(TestOprDNN, PoolingBackward) {
size_t sx = 2, sy = 3, wx = 3, wy = 2, ix = 23, iy = 15, ph = 1, pw = 1;
for (uint32_t i = 0; i < Param::MODE_NR_MEMBER; ++i) {
Param param(static_cast<Mode>(i), ph, pw, sy, sx, wy, wx);
opr::test::BackwardChecker<opr::Pooling, 1> backward_checker(
{{2, 3, ix, iy}}, param, 1e-2, 1e-2, false);
backward_checker.run();
}
}
TEST(TestOprDNN, PoolingExePolicy) {
using Param = opr::Pooling::Param;
Param param;
using Policy = opr::Pooling::ExecutionPolicy;
using S = Policy::Strategy;
REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0");
cn.activate();
auto orig_impl =
PersistentCache::set_impl(std::make_shared<InMemoryPersistentCache>());
HostTensorND host_y, host_y_copy;
S strategy = S::HEURISTIC | S::REPRODUCIBLE;
auto graph = ComputingGraph::make();
HostTensorGenerator<> gen;
TensorShape shape = {1, 20, 24, 24};
auto input = opr::Host2DeviceCopy::make(*graph, gen(shape, cn));
param.mode = Param::Mode::MAX;
param.window_h = param.window_w = 2;
param.stride_h = param.stride_w = 2;
param.pad_h = param.pad_w = 0;
param.format = Param::Format::NCHW;
Policy policy;
policy.strategy = strategy;
auto pooling = opr::PoolingForward::make(input, param, policy);
auto loss0 = opr::reduce_sum_sqr(pooling, pooling.make_scalar(1));
auto grad = cg::grad(loss0, input, true, false);
opr::PoolingBackward* found = nullptr;
auto cb = [&found](cg::OperatorNodeBase* opr) {
if (opr->same_type<opr::PoolingBackward>()) {
found = &opr->cast_final_safe<opr::PoolingBackward>();
}
};
cg::DepOprIter{cb}.add(grad.node()->owner_opr());
found->set_execution_policy(strategy);
auto func = graph->compile({make_callback_copy(grad, host_y)});
func->execute().wait();
mgb_assert(
found->megdnn_opr()->execution_policy().algo.name.find(
"cudnnReproducible") != std::string::npos);
}
TEST(TestOprDNN, PoolingForwardFastrun) {
using Param = opr::Pooling::Param;
Param param;
using Policy = opr::Pooling::ExecutionPolicy;
using S = Policy::Strategy;
auto cn = CompNode::load("xpux");
cn.activate();
auto orig_impl =
PersistentCache::set_impl(std::make_shared<InMemoryPersistentCache>());
HostTensorND host_y;
S strategy = S::PROFILE | S::REPRODUCIBLE;
auto graph = ComputingGraph::make();
HostTensorGenerator<> gen;
TensorShape shape = {1, 20, 24, 24};
auto input = opr::Host2DeviceCopy::make(*graph, gen(shape, cn));
param.mode = Param::Mode::MAX;
param.window_h = param.window_w = 2;
param.stride_h = param.stride_w = 2;
param.pad_h = param.pad_w = 0;
param.format = Param::Format::NCHW;
Policy policy;
policy.strategy = strategy;
auto pooling = opr::PoolingForward::make(input, param, policy);
auto func = graph->compile({make_callback_copy(pooling, host_y)});
func->execute().wait();
}
}