Skip to main content

j2k_jpeg_cuda/
codec.rs

1// SPDX-License-Identifier: Apache-2.0
2
3use j2k_core::{
4    submit_ready_device, BackendRequest, Downscale, ImageCodec, PixelFormat, ReadySubmission, Rect,
5    TileBatchDecodeDevice, TileBatchDecodeManyDevice, TileBatchDecodeSubmit,
6};
7#[cfg(feature = "cuda-runtime")]
8use j2k_cuda_runtime::CudaDeviceBuffer;
9use j2k_jpeg::{
10    decode_tile_into_in_context, decode_tile_region_into_in_context,
11    decode_tile_region_scaled_into_in_context, decode_tile_scaled_into_in_context,
12    Decoder as CpuDecoder, DecoderContext as CpuDecoderContext, JpegDecodeOp, JpegDecodeRequest,
13    JpegResolvedDecode, JpegResolvedDecodePath, ScratchPool as CpuScratchPool,
14    Warning as CpuWarning,
15};
16
17use crate::owned_decode::decode_owned_cuda_rgb8;
18#[cfg(feature = "cuda-runtime")]
19use crate::owned_decode::decode_owned_cuda_rgb8_into;
20use crate::runtime::{validate_surface_request, wrap_surface};
21use crate::{CudaSession, Error, Surface};
22
23#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
24/// JPEG codec marker used by J2K's generic CUDA decode traits.
25pub struct Codec;
26
27impl ImageCodec for Codec {
28    type Error = Error;
29    type Warning = CpuWarning;
30    type Pool = CpuScratchPool;
31}
32
33fn rejected_decode_path_error(backend: BackendRequest, reason: &'static str) -> Error {
34    match backend {
35        BackendRequest::Cuda => Error::UnsupportedCudaRequest { reason },
36        other => Error::UnsupportedBackend { request: other },
37    }
38}
39
40impl Codec {
41    #[cfg(feature = "cuda-runtime")]
42    /// Run experimental chunked JPEG entropy self-sync diagnostics for a 4:2:0 RGB8 tile.
43    ///
44    /// This does not decode pixels and does not affect production CUDA routing.
45    pub fn diagnose_tile_rgb8_chunked_entropy_with_session(
46        input: &[u8],
47        config: j2k_cuda_runtime::CudaJpegChunkedEntropyConfig,
48        session: &mut CudaSession,
49    ) -> Result<j2k_cuda_runtime::CudaJpegChunkedEntropyReport, Error> {
50        crate::owned_decode::diagnose_owned_cuda_420_entropy(input, config, session)
51    }
52
53    #[cfg(feature = "cuda-runtime")]
54    /// Decode one full JPEG tile to caller-owned CUDA RGB8 memory using a session.
55    ///
56    /// This is a strict J2K-owned CUDA-kernel path and currently supports
57    /// full-tile RGB8 fast 4:2:0, 4:2:2, and 4:4:4 YCbCr JPEG inputs.
58    pub fn decode_tile_rgb8_into_cuda_buffer_with_session(
59        input: &[u8],
60        output: &CudaDeviceBuffer,
61        pitch_bytes: usize,
62        session: &mut CudaSession,
63    ) -> Result<crate::CudaSurfaceStats, Error> {
64        let dimensions = CpuDecoder::inspect(input)?.dimensions;
65        decode_owned_cuda_rgb8_into(input, dimensions, session, output, pitch_bytes)
66    }
67
68    /// Decode many JPEG tiles to J2K surfaces using a caller-owned CUDA session.
69    pub fn decode_tiles_to_device_with_session(
70        inputs: &[&[u8]],
71        fmt: PixelFormat,
72        backend: BackendRequest,
73        session: &mut CudaSession,
74    ) -> Result<Vec<Surface>, Error> {
75        let mut ctx = j2k_core::DecoderContext::<CpuDecoderContext>::new();
76        let mut pool = CpuScratchPool::new();
77        Self::decode_tiles_to_device_with_session_in_context(
78            &mut ctx, &mut pool, inputs, fmt, backend, session,
79        )
80    }
81
82    fn decode_tiles_to_device_with_session_in_context(
83        ctx: &mut j2k_core::DecoderContext<CpuDecoderContext>,
84        pool: &mut CpuScratchPool,
85        inputs: &[&[u8]],
86        fmt: PixelFormat,
87        backend: BackendRequest,
88        session: &mut CudaSession,
89    ) -> Result<Vec<Surface>, Error> {
90        validate_surface_request(backend)?;
91        if inputs.is_empty() {
92            return Ok(Vec::new());
93        }
94
95        inputs
96            .iter()
97            .map(|input| Self::decode_tile_to_surface_impl(ctx, session, pool, input, fmt, backend))
98            .collect()
99    }
100
101    fn decode_tile_to_surface_impl(
102        ctx: &mut j2k_core::DecoderContext<CpuDecoderContext>,
103        session: &mut CudaSession,
104        pool: &mut CpuScratchPool,
105        input: &[u8],
106        fmt: PixelFormat,
107        backend: BackendRequest,
108    ) -> Result<Surface, Error> {
109        validate_surface_request(backend)?;
110        let resolved = JpegResolvedDecode::inspect(
111            input,
112            JpegDecodeRequest {
113                backend,
114                fmt,
115                op: JpegDecodeOp::Full,
116            },
117        )?;
118        if resolved.path == JpegResolvedDecodePath::OwnedCudaRgb8 {
119            return decode_owned_cuda_rgb8(input, resolved.capabilities.info.dimensions, session);
120        }
121        if let JpegResolvedDecodePath::Rejected { backend, reason } = resolved.path {
122            return Err(rejected_decode_path_error(backend, reason));
123        }
124        let dims = (resolved.output_rect.w, resolved.output_rect.h);
125        let stride = dims.0 as usize * fmt.bytes_per_pixel();
126        let mut out = vec![0u8; stride * dims.1 as usize];
127        decode_tile_into_in_context(input, ctx.codec_mut(), pool, &mut out, stride, fmt)?;
128        wrap_surface(out, dims, fmt, backend, session)
129    }
130
131    fn decode_tile_region_to_surface_impl(
132        ctx: &mut j2k_core::DecoderContext<CpuDecoderContext>,
133        session: &mut CudaSession,
134        pool: &mut CpuScratchPool,
135        input: &[u8],
136        fmt: PixelFormat,
137        roi: Rect,
138        backend: BackendRequest,
139    ) -> Result<Surface, Error> {
140        validate_surface_request(backend)?;
141        if backend == BackendRequest::Cuda {
142            return Err(Error::UnsupportedCudaRequest {
143                reason: "J2K CUDA JPEG owned decode does not support region output",
144            });
145        }
146        let dims = (roi.w, roi.h);
147        let stride = dims.0 as usize * fmt.bytes_per_pixel();
148        let mut out = vec![0u8; stride * dims.1 as usize];
149        decode_tile_region_into_in_context(
150            input,
151            ctx.codec_mut(),
152            pool,
153            &mut out,
154            stride,
155            fmt,
156            roi.into(),
157        )?;
158        wrap_surface(out, dims, fmt, backend, session)
159    }
160
161    fn decode_tile_scaled_to_surface_impl(
162        ctx: &mut j2k_core::DecoderContext<CpuDecoderContext>,
163        session: &mut CudaSession,
164        pool: &mut CpuScratchPool,
165        input: &[u8],
166        fmt: PixelFormat,
167        scale: Downscale,
168        backend: BackendRequest,
169    ) -> Result<Surface, Error> {
170        validate_surface_request(backend)?;
171        if backend == BackendRequest::Cuda {
172            return Err(Error::UnsupportedCudaRequest {
173                reason: "J2K CUDA JPEG owned decode does not support scaled output",
174            });
175        }
176        let source_dims = CpuDecoder::inspect(input)?.dimensions;
177        let dims = (
178            source_dims.0.div_ceil(scale.denominator()),
179            source_dims.1.div_ceil(scale.denominator()),
180        );
181        let stride = dims.0 as usize * fmt.bytes_per_pixel();
182        let mut out = vec![0u8; stride * dims.1 as usize];
183        decode_tile_scaled_into_in_context(
184            input,
185            ctx.codec_mut(),
186            pool,
187            &mut out,
188            stride,
189            fmt,
190            scale,
191        )?;
192        wrap_surface(out, dims, fmt, backend, session)
193    }
194
195    #[allow(clippy::too_many_arguments)]
196    fn decode_tile_region_scaled_to_surface_impl(
197        ctx: &mut j2k_core::DecoderContext<CpuDecoderContext>,
198        session: &mut CudaSession,
199        pool: &mut CpuScratchPool,
200        input: &[u8],
201        fmt: PixelFormat,
202        roi: Rect,
203        scale: Downscale,
204        backend: BackendRequest,
205    ) -> Result<Surface, Error> {
206        validate_surface_request(backend)?;
207        if backend == BackendRequest::Cuda {
208            return Err(Error::UnsupportedCudaRequest {
209                reason: "J2K CUDA JPEG owned decode does not support scaled region output",
210            });
211        }
212        let dims = {
213            let scaled = roi.scaled_covering(scale);
214            (scaled.w, scaled.h)
215        };
216        let stride = dims.0 as usize * fmt.bytes_per_pixel();
217        let mut out = vec![0u8; stride * dims.1 as usize];
218        decode_tile_region_scaled_into_in_context(
219            input,
220            ctx.codec_mut(),
221            pool,
222            &mut out,
223            stride,
224            fmt,
225            roi.into(),
226            scale,
227        )?;
228        wrap_surface(out, dims, fmt, backend, session)
229    }
230}
231
232impl TileBatchDecodeSubmit for Codec {
233    type Context = CpuDecoderContext;
234    type Session = CudaSession;
235    type DeviceSurface = Surface;
236    type SubmittedSurface = ReadySubmission<Surface, Error>;
237
238    fn submit_tile_to_device(
239        ctx: &mut j2k_core::DecoderContext<Self::Context>,
240        session: &mut Self::Session,
241        pool: &mut Self::Pool,
242        input: &[u8],
243        fmt: PixelFormat,
244        backend: BackendRequest,
245    ) -> Result<Self::SubmittedSurface, Self::Error> {
246        validate_surface_request(backend)?;
247        Ok(submit_ready_device(session, |session| {
248            Self::decode_tile_to_surface_impl(ctx, session, pool, input, fmt, backend)
249        }))
250    }
251
252    fn submit_tile_region_to_device(
253        ctx: &mut j2k_core::DecoderContext<Self::Context>,
254        session: &mut Self::Session,
255        pool: &mut Self::Pool,
256        input: &[u8],
257        fmt: PixelFormat,
258        roi: Rect,
259        backend: BackendRequest,
260    ) -> Result<Self::SubmittedSurface, Self::Error> {
261        validate_surface_request(backend)?;
262        Ok(submit_ready_device(session, |session| {
263            Self::decode_tile_region_to_surface_impl(ctx, session, pool, input, fmt, roi, backend)
264        }))
265    }
266
267    fn submit_tile_scaled_to_device(
268        ctx: &mut j2k_core::DecoderContext<Self::Context>,
269        session: &mut Self::Session,
270        pool: &mut Self::Pool,
271        input: &[u8],
272        fmt: PixelFormat,
273        scale: Downscale,
274        backend: BackendRequest,
275    ) -> Result<Self::SubmittedSurface, Self::Error> {
276        validate_surface_request(backend)?;
277        Ok(submit_ready_device(session, |session| {
278            Self::decode_tile_scaled_to_surface_impl(ctx, session, pool, input, fmt, scale, backend)
279        }))
280    }
281
282    fn submit_tile_region_scaled_to_device(
283        ctx: &mut j2k_core::DecoderContext<Self::Context>,
284        session: &mut Self::Session,
285        pool: &mut Self::Pool,
286        input: &[u8],
287        fmt: PixelFormat,
288        roi: Rect,
289        scale: Downscale,
290        backend: BackendRequest,
291    ) -> Result<Self::SubmittedSurface, Self::Error> {
292        validate_surface_request(backend)?;
293        Ok(submit_ready_device(session, |session| {
294            Self::decode_tile_region_scaled_to_surface_impl(
295                ctx, session, pool, input, fmt, roi, scale, backend,
296            )
297        }))
298    }
299}
300
301impl TileBatchDecodeDevice for Codec {
302    type Context = CpuDecoderContext;
303    type DeviceSurface = Surface;
304}
305
306impl TileBatchDecodeManyDevice for Codec {
307    type Context = CpuDecoderContext;
308    type DeviceSurface = Surface;
309
310    fn decode_tiles_to_device(
311        ctx: &mut j2k_core::DecoderContext<Self::Context>,
312        pool: &mut Self::Pool,
313        inputs: &[&[u8]],
314        fmt: PixelFormat,
315        backend: BackendRequest,
316    ) -> Result<Vec<Self::DeviceSurface>, Self::Error> {
317        let mut session = CudaSession::default();
318        Self::decode_tiles_to_device_with_session_in_context(
319            ctx,
320            pool,
321            inputs,
322            fmt,
323            backend,
324            &mut session,
325        )
326    }
327}