/**
* \file dnn/src/cuda/query_blocksize.cuh
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
namespace megdnn {
namespace cuda {
struct LaunchConfig {
int grid_size; //!< minimal grid size
int block_size; //!< suggested block size
};
//! get shared mem size given block size
struct SmemGetter {
typedef int (*func_t)(int block_size, void* user_data);
func_t func;
void* user_data;
SmemGetter(func_t func_ = 0, void* user_data_ = 0)
: func(func_), user_data(user_data_) {}
};
/*!
* \brief cudaOccupancyMaxPotentialBlockSize only available when compiled by
* nvcc; so we need to wrap this function and expose it to normal c++
*
* Note that the result is cached for kernel ptr.
*/
LaunchConfig query_launch_config_for_kernel(
const void* kern, const SmemGetter& smem = SmemGetter());
//! return block size only
static inline int query_blocksize_for_kernel(const void* kern) {
return query_launch_config_for_kernel(kern).block_size;
}
template <typename T>
static inline int query_blocksize_for_kernel(T kern) {
return query_blocksize_for_kernel(reinterpret_cast<const void*>(kern));
}
namespace detail {
LaunchConfig query_launch_config_for_kernel_uncached(
const void* kern, const SmemGetter& smem);
}
} // namespace cuda
} // namespace megdnn
// vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}