use crate::{driver::CuResult, error::CudaError, kernels::CudaKernel};
use std::{os::raw::c_uint, sync::OnceLock};
pub(crate) const CUDA_SUCCESS: CuResult = 0;
pub(crate) const PINNED_UPLOAD_STAGING_POOL_MAX: usize = 8;
pub(crate) const PINNED_POOLED_I16_UPLOAD_MAX_BYTES: usize = 4 * 1024 * 1024;
pub(crate) const DWT97_ROW_LIFT_MAX_WIDTH: i32 = 1024;
pub(crate) const DWT97_ROW_LIFT_COOP_THREADS_X: c_uint = 128;
pub(crate) const DWT97_ROW_LIFT_COOP_ROWS_PER_BLOCK: c_uint = 4;
pub(crate) const CUDA_IDWT_TRACE_ENV_VAR: &str = "J2K_CUDA_IDWT_TRACE";
pub(crate) const DWT97_FUSED_COLUMN_QUANTIZE_DISABLE_ENV_VAR: &str =
"J2K_CUDA_DISABLE_DWT97_FUSED_COLUMN_QUANTIZE";
pub(crate) static CUDA_STAGE_TIMINGS_DISABLED: OnceLock<bool> = OnceLock::new();
pub(crate) static DWT97_FUSED_COLUMN_QUANTIZE_DISABLED: OnceLock<bool> = OnceLock::new();
pub(crate) fn cuda_stage_timings_disabled() -> bool {
*CUDA_STAGE_TIMINGS_DISABLED
.get_or_init(|| std::env::var_os("J2K_CUDA_DISABLE_STAGE_TIMINGS").is_some())
}
pub(crate) fn dwt97_fused_column_quantize_disabled() -> bool {
*DWT97_FUSED_COLUMN_QUANTIZE_DISABLED
.get_or_init(|| std::env::var_os(DWT97_FUSED_COLUMN_QUANTIZE_DISABLE_ENV_VAR).is_some())
}
pub(crate) fn ensure_kernel_ptx_built(kernel: CudaKernel) -> Result<(), CudaError> {
let message = match kernel {
CudaKernel::J2kDeinterleaveToF32
| CudaKernel::J2kForwardRct
| CudaKernel::J2kForwardIct
| CudaKernel::J2kForwardDwt53Horizontal
| CudaKernel::J2kForwardDwt53Vertical
| CudaKernel::J2kForwardDwt97Horizontal
| CudaKernel::J2kForwardDwt97Vertical
| CudaKernel::J2kQuantizeSubband
| CudaKernel::J2kQuantizeSubbandStrided
if !J2K_ENCODE_PTX_BUILT_FROM_CUDA =>
{
Some("JPEG 2000 encode CUDA PTX was not built from j2k_encode_kernels.cu")
}
CudaKernel::Htj2kEncodeCodeblock
| CudaKernel::Htj2kEncodeCodeblocks
| CudaKernel::Htj2kPacketizeCleanup
if !HTJ2K_ENCODE_PTX_BUILT_FROM_CUDA =>
{
Some("HTJ2K encode CUDA PTX was not built from htj2k_encode_kernels.cu")
}
CudaKernel::TranscodeReversible53Idct
| CudaKernel::TranscodeReversible53VerticalLow
| CudaKernel::TranscodeReversible53VerticalHigh
| CudaKernel::TranscodeReversible53HorizontalLow
| CudaKernel::TranscodeReversible53HorizontalHigh
| CudaKernel::TranscodeDwt97Idct
| CudaKernel::TranscodeDwt97RowLift
| CudaKernel::TranscodeDwt97ColumnLift
| CudaKernel::TranscodeDwt97IdctBatch
| CudaKernel::TranscodeDwt97RowLiftBatch
| CudaKernel::TranscodeDwt97ColumnLiftBatch
| CudaKernel::TranscodeDwt97QuantizeCodeblocks
if !TRANSCODE_PTX_BUILT_FROM_CUDA =>
{
Some("transcode CUDA PTX was not built from transcode_kernels.cu")
}
_ => None,
};
match message {
Some(message) => Err(CudaError::InvalidArgument {
message: message.to_string(),
}),
None => Ok(()),
}
}
#[cfg(feature = "cuda-oxide-copy-u8")]
pub(crate) fn ensure_cuda_oxide_copy_u8_ptx_built() -> Result<(), CudaError> {
if CUDA_OXIDE_COPY_U8_PTX_BUILT {
Ok(())
} else {
Err(CudaError::InvalidArgument {
message: "cuda-oxide CopyU8 PTX was not built; set J2K_REQUIRE_CUDA_OXIDE_COPY_U8 on a Linux cuda-oxide host to require it".to_string(),
})
}
}
pub(crate) const J2K_ENCODE_PTX_BUILT_FROM_CUDA: bool = cfg!(j2k_cuda_j2k_encode_ptx_built);
pub(crate) const HTJ2K_ENCODE_PTX_BUILT_FROM_CUDA: bool = cfg!(j2k_cuda_htj2k_encode_ptx_built);
pub(crate) const TRANSCODE_PTX_BUILT_FROM_CUDA: bool = cfg!(j2k_cuda_transcode_ptx_built);
#[cfg(feature = "cuda-oxide-copy-u8")]
pub(crate) const CUDA_OXIDE_COPY_U8_PTX_BUILT: bool = cfg!(j2k_cuda_oxide_copy_u8_built);
#[must_use]
pub fn transcode_kernels_built() -> bool {
TRANSCODE_PTX_BUILT_FROM_CUDA
}