#include <cstring>
#include "./opr_impl.h"
#include "./rotate.cuh"
#include "src/common/utils.h"
#include "src/cuda/handle.h"
#include "src/cuda/utils.h"
namespace megdnn {
namespace cuda {
namespace rotate_intl {
template <typename ctype>
void rotate_exec(
const ctype* src, ctype* dst, size_t N, size_t IH, size_t IW, size_t IC,
size_t istride0, size_t istride1, size_t istride2, size_t OH, size_t OW,
size_t OC, size_t ostride0, size_t ostride1, size_t ostride2, bool clockwise,
cudaStream_t stream) {
megdnn_assert(IC == OC);
if (clockwise) {
rotate::rotate<ctype, true>(
src, dst, N, IH, IW, IC, istride0, istride1, istride2, OH, OW, ostride0,
ostride1, ostride2, stream);
} else {
rotate::rotate<ctype, false>(
src, dst, N, IH, IW, IC, istride0, istride1, istride2, OH, OW, ostride0,
ostride1, ostride2, stream);
}
}
}
void RotateImpl::exec(
_megdnn_tensor_in src, _megdnn_tensor_in dst, _megdnn_workspace workspace) {
check_exec(src.layout, dst.layout, workspace.size);
auto stream = cuda_stream(handle());
size_t N = src.layout.shape[0];
size_t batch_size = 0;
#define cb(DType) \
if (src.layout.dtype.enumv() == DTypeTrait<DType>::enumv) { \
using ctype = typename DTypeTrait<DType>::ctype; \
ctype* src_ptr = src.ptr<ctype>() + curr_batch * src.layout.stride[0]; \
ctype* dst_ptr = dst.ptr<ctype>() + curr_batch * dst.layout.stride[0]; \
batch_size = std::min<size_t>(N - curr_batch, max_batch_x_channel); \
rotate_intl::rotate_exec<ctype>( \
src_ptr, dst_ptr, batch_size, src.layout.shape[1], \
src.layout.shape[2], src.layout.shape[3], src.layout.stride[0], \
src.layout.stride[1], src.layout.stride[2], dst.layout.shape[1], \
dst.layout.shape[2], dst.layout.shape[3], dst.layout.stride[0], \
dst.layout.stride[1], dst.layout.stride[2], param().clockwise, \
stream); \
}
size_t max_batch_x_channel = max_batch_x_channel_size();
size_t curr_batch = 0;
if (N <= max_batch_x_channel) {
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
} else {
while (curr_batch < N) {
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
curr_batch += max_batch_x_channel;
}
}
#undef cb
}
} }