#include "src/naive/warp_affine/opr_impl.h"
#include "src/common/warp_common.h"
#include "src/naive/handle.h"
#include "src/naive/warp_affine/warp_affine_cv.h"
#include "midout.h"
MIDOUT_DECL(megdnn_naive_warpaffine)
MIDOUT_DECL(megdnn_naive_warpaffine_dtype)
using namespace megdnn;
using namespace naive;
template <typename ctype, typename mtype>
void WarpAffineImpl::kern_naive(
const KernParam<ctype, mtype>& kern_param, size_t task_id) {
if (kern_param.format == Format::NHWC) {
kern_naive_nhwc(kern_param, task_id);
return;
} else if (kern_param.format == Format::NHWCD4) {
kern_naive_nhwcd4(kern_param, task_id);
return;
}
UNPACK_WARP_AFFINE_FWD_KERN_PARAM(kern_param);
MEGDNN_MARK_USED_VAR(N_SRC);
MEGDNN_MARK_USED_VAR(N_MAT);
rounding::RoundingConverter<ctype> output_converter;
auto bmode = param().border_mode;
auto border_val = param().border_val;
size_t n = task_id / OH;
size_t oh = task_id % OH;
mptr += n * 2 * 3;
dptr += n * C * OH * OW;
sptr += n * C * IH * IW;
rep(ow, OW) {
float alphaw = mptr[0] * ow + mptr[1] * oh + mptr[2];
float alphah = mptr[3] * ow + mptr[4] * oh + mptr[5];
int iw0 = get_real_coord(std::floor(alphaw) + 0, IW);
int iw1 = get_real_coord(std::floor(alphaw) + 1, IW);
int ih0 = get_real_coord(std::floor(alphah) + 0, IH);
int ih1 = get_real_coord(std::floor(alphah) + 1, IH);
alphaw -= floor(alphaw);
alphah -= floor(alphah);
if (bmode != BorderMode::CONSTANT) {
rep(c, C) {
dptr[c * OH * OW + oh * OW + ow] = output_converter(
sptr[c * IH * IW + ih0 * IW + iw0] * (1.0f - alphaw) *
(1.0f - alphah) +
sptr[c * IH * IW + ih0 * IW + iw1] * alphaw * (1.0f - alphah) +
sptr[c * IH * IW + ih1 * IW + iw0] * (1.0f - alphaw) * alphah +
sptr[c * IH * IW + ih1 * IW + iw1] * alphaw * alphah);
}
} else {
rep(c, C) {
const float b = border_val;
auto val = (ih0 != -1 && iw0 != -1 ? sptr[c * IH * IW + ih0 * IW + iw0]
: b) *
(1.0f - alphaw) * (1.0f - alphah) +
(ih0 != -1 && iw1 != -1 ? sptr[c * IH * IW + ih0 * IW + iw1]
: b) *
alphaw * (1.0f - alphah) +
(ih1 != -1 && iw0 != -1 ? sptr[c * IH * IW + ih1 * IW + iw0]
: b) *
(1.0f - alphaw) * alphah +
(ih1 != -1 && iw1 != -1 ? sptr[c * IH * IW + ih1 * IW + iw1]
: b) *
alphaw * alphah;
dptr[c * OH * OW + oh * OW + ow] =
output_converter(std::isfinite(val) ? val : b);
}
}
}
}
template <typename ctype, typename mtype>
void WarpAffineImpl::kern_naive_nhwcd4(
const KernParam<ctype, mtype>& kern_param, size_t task_id) {
UNPACK_WARP_AFFINE_FWD_KERN_PARAM(kern_param);
MEGDNN_MARK_USED_VAR(N_SRC);
MEGDNN_MARK_USED_VAR(N_MAT);
rounding::RoundingConverter<ctype> output_converter;
auto bmode = param().border_mode;
auto border_val = param().border_val;
size_t n = task_id / OH;
size_t oh = task_id % OH;
mptr += n * 2 * 3;
dptr += n * C * OH * OW * 4;
sptr += n * C * IH * IW * 4;
rep(ow, OW) {
float alphaw = mptr[0] * ow + mptr[1] * oh + mptr[2];
float alphah = mptr[3] * ow + mptr[4] * oh + mptr[5];
int iw0 = get_real_coord(std::floor(alphaw) + 0, IW);
int iw1 = get_real_coord(std::floor(alphaw) + 1, IW);
int ih0 = get_real_coord(std::floor(alphah) + 0, IH);
int ih1 = get_real_coord(std::floor(alphah) + 1, IH);
alphaw -= floor(alphaw);
alphah -= floor(alphah);
if (bmode != BorderMode::CONSTANT) {
rep(c, C) {
for (int i = 0; i < 4; i++) {
dptr[((oh * C + c) * OW + ow) * 4 + i] = output_converter(
sptr[((ih0 * C + c) * IW + iw0) * 4 + i] * (1.0f - alphaw) *
(1.0f - alphah) +
sptr[((ih0 * C + c) * IW + iw1) * 4 + i] * alphaw *
(1.0f - alphah) +
sptr[((ih1 * C + c) * IW + iw0) * 4 + i] * (1.0f - alphaw) *
alphah +
sptr[((ih1 * C + c) * IW + iw1) * 4 + i] * alphaw * alphah);
}
}
} else {
rep(c, C) {
const float b = border_val;
for (int i = 0; i < 4; i++) {
auto val = (ih0 != -1 && iw0 != -1
? sptr[(((ih0 * C + c) * IW + iw0)) * 4 + i]
: b) *
(1.0f - alphaw) * (1.0f - alphah) +
(ih0 != -1 && iw1 != -1
? sptr[((ih0 * C + c) * IW + iw1) * 4 + i]
: b) *
alphaw * (1.0f - alphah) +
(ih1 != -1 && iw0 != -1
? sptr[((ih1 * C + c) * IW + iw0) * 4 + i]
: b) *
(1.0f - alphaw) * alphah +
(ih1 != -1 && iw1 != -1
? sptr[((ih1 * C + c) * IW + iw1) * 4 + i]
: b) *
alphaw * alphah;
dptr[((oh * C + c) * OW + ow) * 4 + i] =
output_converter(std::isfinite(val) ? val : b);
}
}
}
}
}
template <typename ctype, typename mtype>
void WarpAffineImpl::kern_naive_nhwc(
const KernParam<ctype, mtype>& kern_param, size_t task_id) {
UNPACK_WARP_AFFINE_FWD_KERN_PARAM(kern_param);
MEGDNN_MARK_USED_VAR(N_SRC);
MEGDNN_MARK_USED_VAR(N_MAT);
rounding::RoundingConverter<ctype> output_converter;
auto bmode = param().border_mode;
auto border_val = param().border_val;
size_t n = task_id / OH;
size_t oh = task_id % OH;
mptr += n * 2 * 3;
dptr += n * C * OH * OW;
sptr += n * C * IH * IW;
rep(ow, OW) {
float alphaw = mptr[0] * ow + mptr[1] * oh + mptr[2];
float alphah = mptr[3] * ow + mptr[4] * oh + mptr[5];
int iw0 = get_real_coord(std::floor(alphaw) + 0, IW);
int iw1 = get_real_coord(std::floor(alphaw) + 1, IW);
int ih0 = get_real_coord(std::floor(alphah) + 0, IH);
int ih1 = get_real_coord(std::floor(alphah) + 1, IH);
alphaw -= floor(alphaw);
alphah -= floor(alphah);
if (bmode != BorderMode::CONSTANT) {
rep(c, C) {
dptr[(oh * OW + ow) * C + c] = output_converter(
sptr[(ih0 * IW + iw0) * C + c] * (1.0f - alphaw) *
(1.0f - alphah) +
sptr[(ih0 * IW + iw1) * C + c] * alphaw * (1.0f - alphah) +
sptr[(ih1 * IW + iw0) * C + c] * (1.0f - alphaw) * alphah +
sptr[(ih1 * IW + iw1) * C + c] * alphaw * alphah);
}
} else {
rep(c, C) {
const float b = border_val;
auto val =
(ih0 != -1 && iw0 != -1 ? sptr[(ih0 * IW + iw0) * C + c] : b) *
(1.0f - alphaw) * (1.0f - alphah) +
(ih0 != -1 && iw1 != -1 ? sptr[(ih0 * IW + iw1) * C + c] : b) *
alphaw * (1.0f - alphah) +
(ih1 != -1 && iw0 != -1 ? sptr[(ih1 * IW + iw0) * C + c] : b) *
(1.0f - alphaw) * alphah +
(ih1 != -1 && iw1 != -1 ? sptr[(ih1 * IW + iw1) * C + c] : b) *
alphaw * alphah;
dptr[(oh * OW + ow) * C + c] =
output_converter(std::isfinite(val) ? val : b);
}
}
}
}
void WarpAffineImpl::exec(
_megdnn_tensor_in src, _megdnn_tensor_in mat, _megdnn_tensor_out dst,
_megdnn_workspace workspace) {
check_exec(src.layout, mat.layout, dst.layout, workspace.size);
if (warp::is_cv_available(
src.layout, mat.layout, dst.layout, param().imode, param().format)) {
MIDOUT_BEGIN(megdnn_naive_warpaffine, void) {
warp_affine_cv_exec(
src, mat, dst, param().border_val, param().border_mode,
param().imode, handle());
}
MIDOUT_END();
} else {
size_t batch = dst.layout[0];
size_t oh = dst.layout[1];
if (param().format == Format::NCHW) {
oh = dst.layout[2];
}
megdnn_assert(warp::is_dnn_available(
src.layout, mat.layout, dst.layout, param().imode, param().format));
#define cb(dt, ct, mct, _midout_iv) \
case DTypeTrait<dt>::enumv: { \
auto kparam = KernParam<ct, mct>::from_tensors( \
param().format, src, mat, dst, workspace); \
MIDOUT_BEGIN(megdnn_naive_warpaffine_dtype, midout_iv(_midout_iv)) { \
auto run = [kparam, this](size_t index, size_t) { \
kern_naive(kparam, index); \
}; \
MEGDNN_DISPATCH_MULTI_THREAD_CPU_KERN_OPR(run, batch* oh); \
} \
MIDOUT_END(); \
return; \
}
switch (src.layout.dtype.enumv()) {
cb(dtype::Float32, float, float, 0);
DNN_INC_FLOAT16(cb(dtype::Float16, dt_float16, dt_float16, 1));
cb(dtype::Int8, int8_t, float, 2);
cb(dtype::QuantizedS8, int8_t, float, 3);
cb(dtype::Uint8, uint8_t, float, 4);
cb(dtype::Quantized8Asymm, uint8_t, float, 5);
default:
megdnn_throw(ssprintf(
"Unsupported input DType in WarpAffine: %s",
src.layout.dtype.name())
.c_str());
return;
}
#undef cb
}
}