#include "./opr_impl.h"
#include "src/common/cv/common.h"
#include "src/common/cv/helper.h"
#include "src/common/gaussian_blur_helper.h"
#include "src/naive/handle.h"
namespace megdnn {
namespace naive {
template <>
void GaussianBlurImpl::exec_internal<uint8_t>(
_megdnn_tensor_in src, _megdnn_tensor_out dst) {
auto N = src.layout.shape[0], IH = src.layout.shape[1], IW = src.layout.shape[2],
IC = src.layout.shape[3];
using namespace megcv;
Size ksize = Size(param().kernel_height, param().kernel_width);
Mat<float> kx_(1, ksize.cols(), 1);
Mat<float> ky_(1, ksize.rows(), 1);
gaussian_blur::createGaussianKernels<float>(
kx_, ky_, ksize, param().sigma_x, param().sigma_y);
uint32_t kernel_height = ky_.width();
uint32_t kernel_width = kx_.width();
Mat<int> kx(1, kernel_width, 1);
Mat<int> ky(1, kernel_height, 1);
const uint8_t bits = 8;
for (size_t i = 0; i < kernel_height; i++) {
ky.at(0, i, 0) = static_cast<int>(ky_.at(0, i, 0) * (1 << bits));
}
for (size_t i = 0; i < kernel_width; i++) {
kx.at(0, i, 0) = static_cast<int>(kx_.at(0, i, 0) * (1 << bits));
}
FixedPtCastEx<int, uint8_t> cast_op(2 * bits);
rep(n, N) rep(h, IH) rep(w, IW) rep(c, IC) {
int val = 0;
rep(iy, kernel_height) {
int y = gaussian_blur::border_interpolate(
h + iy - kernel_height / 2, IH, param().border_mode);
rep(ix, kernel_width) {
int x = gaussian_blur::border_interpolate(
w + ix - kernel_width / 2, IW, param().border_mode);
if (x != -1 && y != -1) {
val += kx.at(0, ix, 0) * ky.at(0, iy, 0) *
src.ptr<uint8_t>()
[n * src.layout.stride[0] +
y * src.layout.stride[1] +
x * src.layout.stride[2] +
c * src.layout.stride[3]];
}
}
}
dst.ptr<uint8_t>()
[n * dst.layout.stride[0] + h * dst.layout.stride[1] +
w * dst.layout.stride[2] + c * dst.layout.stride[3]] = cast_op(val);
}
}
template <typename T>
void GaussianBlurImpl::exec_internal(_megdnn_tensor_in src, _megdnn_tensor_out dst) {
auto N = src.layout.shape[0], IH = src.layout.shape[1], IW = src.layout.shape[2],
IC = src.layout.shape[3];
using namespace megcv;
Size ksize = Size(param().kernel_height, param().kernel_width);
Mat<float> kx(1, ksize.cols(), 1);
Mat<float> ky(1, ksize.rows(), 1);
gaussian_blur::createGaussianKernels<float>(
kx, ky, ksize, param().sigma_x, param().sigma_y);
uint32_t kernel_height = ky.width();
uint32_t kernel_width = kx.width();
uint32_t half_h = kernel_height / 2;
uint32_t half_w = kernel_width / 2;
rep(n, N) rep(h, IH) rep(w, IW) rep(c, IC) {
double val = 0;
rep(iy, kernel_height) {
int y = gaussian_blur::border_interpolate(
h + iy - half_h, IH, param().border_mode);
rep(ix, kernel_width) {
int x = gaussian_blur::border_interpolate(
w + ix - half_w, IW, param().border_mode);
if (x != -1 && y != -1) {
val += kx.at(0, ix, 0) * ky.at(0, iy, 0) *
src.ptr<T>()
[n * src.layout.stride[0] +
y * src.layout.stride[1] +
x * src.layout.stride[2] +
c * src.layout.stride[3]];
}
}
}
dst.ptr<T>()
[n * dst.layout.stride[0] + h * dst.layout.stride[1] +
w * dst.layout.stride[2] + c * dst.layout.stride[3]] =
static_cast<T>(val);
}
}
void GaussianBlurImpl::exec(
_megdnn_tensor_in src, _megdnn_tensor_in dst, _megdnn_workspace ) {
#define cb(DType) \
if (src.layout.dtype == DType()) { \
using ctype = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR(exec_internal<ctype>(src, dst)); \
return; \
}
cb(dtype::Uint8);
cb(dtype::Float32);
#undef cb
megdnn_assert_internal(0);
}
} }