j2k-cuda 0.6.1

NVIDIA CUDA GPU adapter for Rust JPEG 2000 and HTJ2K decode/encode paths
Documentation
// SPDX-License-Identifier: Apache-2.0

use j2k_core::{BackendKind, BackendRequest, PixelFormat};
#[cfg(feature = "cuda-runtime")]
use j2k_cuda_runtime::CudaError;

use crate::surface::Storage;
use crate::{CudaSession, CudaSurfaceStats, Error, Surface, SurfaceResidency};

const CPU_STAGED_CUDA_REQUIRES_EXPLICIT_API: &str =
    "CPU-staged CUDA upload requires the explicit CPU-staged API; BackendRequest::Cuda only accepts resident CUDA HTJ2K decode";

pub(crate) fn wrap_surface(
    bytes: Vec<u8>,
    dimensions: (u32, u32),
    fmt: PixelFormat,
    backend: BackendRequest,
    session: &mut CudaSession,
) -> Result<Surface, Error> {
    validate_surface_request(backend)?;
    let pitch_bytes = dimensions.0 as usize * fmt.bytes_per_pixel();
    match backend {
        BackendRequest::Cpu | BackendRequest::Auto => {
            if j2k_profile::gpu_route_profile_enabled() {
                let request_s = format!("{backend:?}");
                let fmt_s = format!("{fmt:?}");
                let width_s = dimensions.0.to_string();
                let height_s = dimensions.1.to_string();
                j2k_profile::emit_gpu_route_profile(
                    "j2k",
                    "cuda",
                    &[
                        ("op", "wrap_surface"),
                        ("request", request_s.as_str()),
                        ("fmt", fmt_s.as_str()),
                        ("width", width_s.as_str()),
                        ("height", height_s.as_str()),
                        ("decision", "host_surface"),
                    ],
                );
            }
            Ok(Surface {
                backend: BackendKind::Cpu,
                residency: SurfaceResidency::Host,
                dimensions,
                fmt,
                pitch_bytes,
                stats: CudaSurfaceStats::default(),
                storage: Storage::Host(bytes),
            })
        }
        BackendRequest::Cuda => {
            let _ = (bytes, session);
            Err(Error::UnsupportedCudaRequest {
                reason: CPU_STAGED_CUDA_REQUIRES_EXPLICIT_API,
            })
        }
        BackendRequest::Metal => Err(Error::UnsupportedBackend { request: backend }),
    }
}

pub(crate) fn wrap_cpu_staged_cuda_surface(
    bytes: &[u8],
    dimensions: (u32, u32),
    fmt: PixelFormat,
    session: &mut CudaSession,
) -> Result<Surface, Error> {
    let pitch_bytes = dimensions.0 as usize * fmt.bytes_per_pixel();
    wrap_cuda_surface(bytes, dimensions, fmt, pitch_bytes, session)
}

pub(crate) fn validate_surface_request(backend: BackendRequest) -> Result<(), Error> {
    j2k_core::validate_cuda_surface_backend_request(backend)
        .map_err(|request| Error::UnsupportedBackend { request })
}

#[cfg(feature = "cuda-runtime")]
fn wrap_cuda_surface(
    bytes: &[u8],
    dimensions: (u32, u32),
    fmt: PixelFormat,
    pitch_bytes: usize,
    session: &mut CudaSession,
) -> Result<Surface, Error> {
    let context = session.cuda_context()?;
    let output = context.copy_with_kernel(bytes).map_err(cuda_error)?;
    let (buffer, stats) = output.into_parts();
    if j2k_profile::gpu_route_profile_enabled() {
        let fmt_s = format!("{fmt:?}");
        let width_s = dimensions.0.to_string();
        let height_s = dimensions.1.to_string();
        let kernel_dispatches_s = stats.kernel_dispatches().to_string();
        j2k_profile::emit_gpu_route_profile(
            "j2k",
            "cuda",
            &[
                ("op", "wrap_surface"),
                ("request", "Cuda"),
                ("fmt", fmt_s.as_str()),
                ("width", width_s.as_str()),
                ("height", height_s.as_str()),
                ("decision", "cuda_upload"),
                ("kernel_dispatches", kernel_dispatches_s.as_str()),
            ],
        );
    }
    Ok(Surface {
        backend: BackendKind::Cuda,
        residency: SurfaceResidency::CpuStagedCudaUpload,
        dimensions,
        fmt,
        pitch_bytes,
        stats: CudaSurfaceStats {
            total: stats.kernel_dispatches(),
            copy: stats.copy_kernel_dispatches(),
            decode: stats.decode_kernel_dispatches(),
        },
        storage: Storage::Cuda(buffer),
    })
}

#[cfg(not(feature = "cuda-runtime"))]
fn wrap_cuda_surface(
    _bytes: &[u8],
    dimensions: (u32, u32),
    fmt: PixelFormat,
    _pitch_bytes: usize,
    _session: &mut CudaSession,
) -> Result<Surface, Error> {
    if j2k_profile::gpu_route_profile_enabled() {
        let fmt_s = format!("{fmt:?}");
        let width_s = dimensions.0.to_string();
        let height_s = dimensions.1.to_string();
        j2k_profile::emit_gpu_route_profile(
            "j2k",
            "cuda",
            &[
                ("op", "wrap_surface"),
                ("request", "Cuda"),
                ("fmt", fmt_s.as_str()),
                ("width", width_s.as_str()),
                ("height", height_s.as_str()),
                ("decision", "cuda_unavailable"),
            ],
        );
    }
    Err(Error::CudaUnavailable)
}

#[cfg(feature = "cuda-runtime")]
#[allow(clippy::needless_pass_by_value)]
pub(crate) fn cuda_error(error: CudaError) -> Error {
    if error.is_unavailable() {
        Error::CudaUnavailable
    } else {
        Error::CudaRuntime {
            message: error.to_string(),
        }
    }
}