#pragma once
#include <cuda_runtime_api.h>
namespace megdnn {
namespace cuda {
namespace conv_pool {
#define NR_PXL_PER_THREAD 4
#define NR_THREAD_PER_BLOCK 192
#define MAX_SHARED_MEM_SIZE 32768
#define MAX_TEX_OBJ_SIZE 134217728
#define HEIGHT_EQUALS_WITH_WEIGHT
enum PoolModeCu { AVERAGE = 0, MAX = 1 };
enum ConvModeCu { CROSS_CORRELATION = 0, CONVOLUTION = 1 };
enum NonlineModeCu { IDENTITY = 0, RELU = 1, SIGMOID = 2 };
void start_gpu_xcorr_pool_with_texture_obj(
cudaStream_t stream, float* input, const float* kernel, float* output, size_t N,
size_t IC, size_t IH, size_t IW, size_t OC, size_t OH, size_t OW, size_t FH,
size_t FW, size_t , size_t , size_t , size_t ,
size_t pool_shape_h, size_t pool_shape_w, PoolModeCu poolMode,
ConvModeCu convMode, NonlineModeCu nonlineMode, const float* bias);
} } }