Function rcudnn_sys::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[][src]

pub unsafe extern "C" fn cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
    numBlocks: *mut c_int,
    func: *const c_void,
    blockSize: c_int,
    dynamicSMemSize: usize,
    flags: c_uint
) -> cudaError_t
Expand description

\brief Returns occupancy for a device function with the specified flags

Returns in \p *numBlocks the maximum number of active blocks per streaming multiprocessor for the device function.

The \p flags parameter controls how special cases are handled. Valid flags include:

  • ::cudaOccupancyDefault: keeps the default behavior as ::cudaOccupancyMaxActiveBlocksPerMultiprocessor

  • ::cudaOccupancyDisableCachingOverride: This flag suppresses the default behavior on platform where global caching affects occupancy. On such platforms, if caching is enabled, but per-block SM resource usage would result in zero occupancy, the occupancy calculator will calculate the occupancy as if caching is disabled. Setting this flag makes the occupancy calculator to return 0 in such cases. More information can be found about this feature in the “Unified L1/Texture Cache” section of the Maxwell tuning guide.

\param numBlocks - Returned occupancy \param func - Kernel function for which occupancy is calculated \param blockSize - Block size the kernel is intended to be launched with \param dynamicSMemSize - Per-block dynamic shared memory usage intended, in bytes \param flags - Requested behavior for the occupancy calculator

\return ::cudaSuccess, ::cudaErrorInvalidDevice, ::cudaErrorInvalidDeviceFunction, ::cudaErrorInvalidValue, ::cudaErrorUnknown, \notefnerr \note_init_rt \note_callback

\sa ::cudaOccupancyMaxActiveBlocksPerMultiprocessor, \ref ::cudaOccupancyMaxPotentialBlockSize(int*, int*, T, size_t, int) “cudaOccupancyMaxPotentialBlockSize (C++ API)”, \ref ::cudaOccupancyMaxPotentialBlockSizeWithFlags(int*, int*, T, size_t, int, unsigned int) “cudaOccupancyMaxPotentialBlockSizeWithFlags (C++ API)”, \ref ::cudaOccupancyMaxPotentialBlockSizeVariableSMem(int*, int*, T, UnaryFunction, int) “cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API)”, \ref ::cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags(int*, int*, T, UnaryFunction, int, unsigned int) “cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags (C++ API)”, \ref ::cudaOccupancyAvailableDynamicSMemPerBlock(size_t*, T, int, int) “cudaOccupancyAvailableDynamicSMemPerBlock (C++ API)”, ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags