/**
* \file dnn/src/cuda/convpooling/conv_pooling_utils.cuh
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include <cuda_runtime_api.h>
#include <math.h>
#include <algorithm>
#include "src/cuda/utils.cuh"
//#include "./helper.cuh"
namespace megdnn {
namespace cuda {
namespace conv_pool {
#define CUDA_CHKERR(call) \
do { \
cudaError_t code = (call); \
megdnn_assert( \
code == cudaSuccess, "cuda err %d: %s (call %s at %s:%s:%d)", \
int(code), cudaGetErrorString(code), #call, __FILE__, __func__, \
__LINE__); \
} while (0)
#define CUDA_CHK_KERN_ERR CUDA_CHKERR(cudaDeviceSynchronize());
static inline int __host__ align_to_warp(int n) {
int x = n / 32 * 32;
if (!x)
x = n;
return x;
}
// --- Nonline ---
struct Relu {
static __device__ float apply(float x) { return x > 0 ? x : 0; }
};
struct Sigmoid {
static __device__ float apply(float x) {
float exp_value = exp((double)-x);
return 1 / (1 + exp_value);
}
};
struct Identity {
static __device__ float apply(float x) { return x; }
};
// --- Static Reduce ---
template <int size, class Op>
struct StaticReduce {
static __device__ float apply(const float* val) {
const int half = size / 2;
return Op::apply(
StaticReduce<half, Op>::apply(val),
StaticReduce<size - half, Op>::apply(val + half));
}
};
template <class Op>
struct StaticReduce<1, Op> {
static __device__ float apply(const float* val) { return val[0]; }
};
template <class Op>
struct StaticReduce<2, Op> {
static __device__ float apply(const float* val) {
return Op::apply(val[0], val[1]);
}
};
struct OpAdd {
static __device__ float apply(float a, float b) { return a + b; }
};
struct OpMax {
static __device__ float apply(float a, float b) { return max(a, b); }
};
struct IdxGetterConvolution {
static inline __device__ int apply(int kern, int i, int p) {
return kern - i - 1 + p;
}
};
struct IdxGetterCorrRel {
static inline __device__ int apply(int kern, int i, int p) { return i - p; }
};
// --- Pooling ---
struct MeanPooler {
template <int pool_shape_h, int pool_shape_w>
static __device__ float apply(const float* val) {
const int size = pool_shape_h * pool_shape_w;
return StaticReduce<size, OpAdd>::apply(val) / size;
}
};
struct MaxPooler {
template <int pool_shape_h, int pool_shape_w>
static __device__ float apply(const float* val) {
return StaticReduce<pool_shape_h * pool_shape_w, OpMax>::apply(val);
}
};
// --- Reader ---
class Tex1DReader {
cudaTextureObject_t m_tex;
int m_base_offset, m_chl_stride, m_row_stride, m_row_offset;
// size_t batch_, chal_, height_, weight_;
public:
// Set attributes of texture Object
/*__device__ void init(cudaTextureObject_t& tex,
size_t batch, size_t chal, size_t height, size_t weight) {
batch_ = batch;
chal_ = chal;
height_ = height;
weight_ = weight;
m_chl_stride = height * weight;
m_row_stride = weight;
}
__device__ void set_pos(cudaTextureObject_t& tex,
// Current position
size_t n, size_t c, size_t h, size_t w) {
m_tex = tex;
m_base_offset = ((n * chal_ + c) * height_ + h) * weight_ + w;
}
*/
__device__ void set_pos(
cudaTextureObject_t& tex,
// Current position
int chal, int height, int weight, int n, int c, int h, int w) {
m_chl_stride = height * weight;
m_row_stride = weight;
m_tex = tex;
m_base_offset = ((n * chal + c) * height + h) * weight + w;
}
__device__ void reset_row() { m_row_offset = m_base_offset; }
__device__ void next_row() { m_row_offset += m_row_stride; }
__device__ void next_channel() { m_base_offset += m_chl_stride; }
__device__ float get(int /*dr*/, int dc) {
return tex1Dfetch<float>(m_tex, dc + m_row_offset);
}
__device__ float get(int idx) {
return tex1Dfetch<float>(m_tex, idx + m_base_offset);
}
};
extern __host__ void create_cuda_tex(
float* input, cudaTextureObject_t& tex, size_t N, size_t IC, size_t IH,
size_t IW);
} // namespace conv_pool
} // namespace cuda
} // namespace megdnn
// vim: syntax=cpp.doxygen