Skip to main content

rpdfium_codec/jpx/
decoder.rs

1// Derived from PDFium's jpx codec
2// Original: Copyright 2014 The PDFium Authors
3// Licensed under BSD-3-Clause / Apache-2.0
4// See pdfium-upstream/LICENSE for the original license.
5
6//! JPXDecode (JPEG 2000) filter — decodes JPEG 2000 codestreams and JP2 containers
7//! using the `hayro-jpeg2000` crate.
8
9use crate::error::DecodeError;
10
11/// Maximum number of resolution levels that can be skipped.
12///
13/// Ported from upstream PDFium's `CJPX_Decoder::kMaxResolutionsToSkip`.
14pub const MAX_RESOLUTIONS_TO_SKIP: u8 = 32;
15
16/// Color space handling option for JPEG 2000 decoding.
17///
18/// Ported from upstream PDFium's `CJPX_Decoder::ColorSpaceOption`.
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
20pub enum JpxColorSpaceOption {
21    /// Don't use JP2 color space info — treat as raw components.
22    None,
23    /// Normal color space handling (default).
24    #[default]
25    Normal,
26    /// Indexed (palette) color space — resolve palette indices to colors.
27    Indexed,
28}
29
30/// Options for JPEG 2000 decoding.
31///
32/// Maps to upstream PDFium's `CJPX_Decoder::Create()` parameters.
33#[derive(Debug, Clone, Default)]
34pub struct JpxDecodeOptions {
35    /// Target resolution as (width, height).
36    ///
37    /// When set, the decoder may decode at a lower resolution level that
38    /// best matches the target, avoiding full-resolution decode for thumbnail
39    /// generation. Maps to upstream's `resolution_levels_to_skip` parameter
40    /// via hayro-jpeg2000's `target_resolution` setting.
41    pub target_resolution: Option<(u32, u32)>,
42
43    /// Color space handling option.
44    pub color_space_option: JpxColorSpaceOption,
45
46    /// Enable strict decoding mode.
47    ///
48    /// When enabled, the decoder rejects non-conforming streams that would
49    /// otherwise be decoded with best-effort. Defaults to `false`.
50    pub strict: bool,
51}
52
53/// Information about a JPEG 2000 image's decoded result.
54#[derive(Debug, Clone)]
55pub struct Jpeg2000Info {
56    /// Image width in pixels.
57    pub width: u32,
58    /// Image height in pixels.
59    pub height: u32,
60    /// Whether the image has an alpha channel.
61    pub has_alpha: bool,
62    /// The original bit depth before normalization to 8-bit.
63    pub original_bit_depth: u8,
64    /// Number of color components (excluding alpha).
65    /// 1 = Gray, 3 = RGB, 4 = CMYK.
66    pub num_components: u8,
67}
68
69/// Build hayro-jpeg2000 `DecodeSettings` from our `JpxDecodeOptions`.
70fn build_decode_settings(options: &JpxDecodeOptions) -> hayro_jpeg2000::DecodeSettings {
71    let mut settings = hayro_jpeg2000::DecodeSettings::default();
72    // target_resolution: not available in hayro-jpeg2000 0.1.x.
73    // Will be forwarded when the dependency is upgraded to a version
74    // that exposes this setting (available in 0.3+).
75    let _ = options.target_resolution;
76    settings.strict = options.strict;
77    settings.resolve_palette_indices = match options.color_space_option {
78        JpxColorSpaceOption::None => false,
79        JpxColorSpaceOption::Normal => true,
80        JpxColorSpaceOption::Indexed => true,
81    };
82    settings
83}
84
85/// Decode JPEG 2000 (JPX) compressed data.
86///
87/// Accepts both raw JPEG 2000 codestreams (`.j2c`) and JP2-wrapped images.
88/// Returns interleaved 8-bit pixel data.
89///
90/// The hayro-jpeg2000 library automatically normalizes component precision to 8 bits,
91/// so no manual precision adjustment is needed.
92pub fn decode(input: &[u8]) -> Result<Vec<u8>, DecodeError> {
93    let bitmap = hayro_jpeg2000::decode(input, &hayro_jpeg2000::DecodeSettings::default())
94        .map_err(|e| DecodeError::InvalidInput(format!("JPEG2000: {e}")))?;
95    Ok(bitmap.data)
96}
97
98/// Decode JPEG 2000 data with custom options.
99///
100/// Allows specifying target resolution for thumbnail generation and
101/// color space handling options. See [`JpxDecodeOptions`] for details.
102pub fn decode_with_options(
103    input: &[u8],
104    options: &JpxDecodeOptions,
105) -> Result<Vec<u8>, DecodeError> {
106    let settings = build_decode_settings(options);
107    let bitmap = hayro_jpeg2000::decode(input, &settings)
108        .map_err(|e| DecodeError::InvalidInput(format!("JPEG2000: {e}")))?;
109    Ok(bitmap.data)
110}
111
112/// Decode JPEG 2000 data and return both pixel data and image info.
113///
114/// This provides access to the original bit depth and other metadata
115/// that may be needed for color space interpretation.
116pub fn decode_with_info(input: &[u8]) -> Result<(Vec<u8>, Jpeg2000Info), DecodeError> {
117    let bitmap = hayro_jpeg2000::decode(input, &hayro_jpeg2000::DecodeSettings::default())
118        .map_err(|e| DecodeError::InvalidInput(format!("JPEG2000: {e}")))?;
119
120    // num_components should exclude alpha (per Jpeg2000Info doc comment).
121    // hayro-jpeg2000's color_space.num_channels() already excludes alpha,
122    // so we use it directly.
123    let num_components = bitmap.color_space.num_channels();
124
125    let info = Jpeg2000Info {
126        width: bitmap.width,
127        height: bitmap.height,
128        has_alpha: bitmap.has_alpha,
129        original_bit_depth: bitmap.original_bit_depth,
130        num_components,
131    };
132
133    Ok((bitmap.data, info))
134}
135
136/// Decode JPEG 2000 data with custom options and return both pixel data and image info.
137pub fn decode_with_info_and_options(
138    input: &[u8],
139    options: &JpxDecodeOptions,
140) -> Result<(Vec<u8>, Jpeg2000Info), DecodeError> {
141    let settings = build_decode_settings(options);
142    let bitmap = hayro_jpeg2000::decode(input, &settings)
143        .map_err(|e| DecodeError::InvalidInput(format!("JPEG2000: {e}")))?;
144
145    let num_components = bitmap.color_space.num_channels();
146    let info = Jpeg2000Info {
147        width: bitmap.width,
148        height: bitmap.height,
149        has_alpha: bitmap.has_alpha,
150        original_bit_depth: bitmap.original_bit_depth,
151        num_components,
152    };
153
154    Ok((bitmap.data, info))
155}
156
157/// Per-component precision information parsed from JP2/codestream headers.
158#[derive(Debug, Clone, Copy, PartialEq, Eq)]
159pub struct ComponentPrecision {
160    /// Bits per component (1-38 per spec).
161    pub bits: u8,
162    /// Whether the component values are signed.
163    pub is_signed: bool,
164}
165
166/// Parse component precision info from a JP2 Image Header Box (ihdr).
167///
168/// Scans for the JP2 Image Header Box to extract precision and signedness.
169/// Returns `None` if the data is not a JP2 container or the ihdr box is not found.
170pub fn parse_jp2_precision(data: &[u8]) -> Option<Vec<ComponentPrecision>> {
171    // JP2 files start with the signature box: 12 bytes
172    // \x00\x00\x00\x0C jP  \x20\x20 \x0D\x0A\x87\x0A
173    if data.len() < 12 {
174        return None;
175    }
176    if &data[4..8] != b"jP  " && &data[4..8] != b"jP\x1a\x1a" {
177        // Not a JP2 container — might be a raw codestream
178        return parse_codestream_precision(data);
179    }
180
181    // Scan JP2 boxes looking for ihdr (Image Header Box) inside jp2h (JP2 Header)
182    let mut pos = 0;
183    while pos + 8 <= data.len() {
184        let box_len =
185            u32::from_be_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
186        let box_type = &data[pos + 4..pos + 8];
187
188        if box_len < 8 || pos + box_len > data.len() {
189            break;
190        }
191
192        if box_type == b"jp2h" {
193            // Search inside jp2h for ihdr
194            return parse_ihdr_in_jp2h(&data[pos + 8..pos + box_len]);
195        }
196
197        pos += box_len;
198    }
199
200    None
201}
202
203/// Parse ihdr box inside a jp2h superbox.
204fn parse_ihdr_in_jp2h(data: &[u8]) -> Option<Vec<ComponentPrecision>> {
205    let mut pos = 0;
206    while pos + 8 <= data.len() {
207        let box_len =
208            u32::from_be_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
209        let box_type = &data[pos + 4..pos + 8];
210
211        if box_len < 8 || pos + box_len > data.len() {
212            break;
213        }
214
215        if box_type == b"ihdr" {
216            // ihdr: 4(height) + 4(width) + 2(num_components) + 1(bpc) + ...
217            let content = &data[pos + 8..pos + box_len];
218            if content.len() >= 11 {
219                let num_components = u16::from_be_bytes([content[8], content[9]]) as usize;
220                let bpc_byte = content[10];
221
222                if bpc_byte == 0xFF {
223                    // bpc varies per component — would need bpcc box
224                    // For now return None to indicate we can't determine from ihdr alone
225                    return None;
226                }
227
228                let bits = (bpc_byte & 0x7F) + 1;
229                let is_signed = (bpc_byte & 0x80) != 0;
230                let precision = ComponentPrecision { bits, is_signed };
231                return Some(vec![precision; num_components]);
232            }
233        }
234
235        pos += box_len;
236    }
237
238    None
239}
240
241/// Parse component precision from a raw JPEG 2000 codestream SIZ marker.
242fn parse_codestream_precision(data: &[u8]) -> Option<Vec<ComponentPrecision>> {
243    // SOC marker: 0xFF4F, then SIZ marker: 0xFF51
244    if data.len() < 4 || data[0] != 0xFF || data[1] != 0x4F {
245        return None;
246    }
247    if data[2] != 0xFF || data[3] != 0x51 {
248        return None;
249    }
250
251    // SIZ marker: length(2) + Rsiz(2) + Xsiz(4) + Ysiz(4) + XOsiz(4) + YOsiz(4)
252    //             + XTsiz(4) + YTsiz(4) + XTOsiz(4) + YTOsiz(4) + Csiz(2)
253    //             + per-component: Ssiz(1) + XRsiz(1) + YRsiz(1)
254    if data.len() < 6 {
255        return None;
256    }
257    let siz_len = u16::from_be_bytes([data[4], data[5]]) as usize;
258    if data.len() < 4 + siz_len {
259        return None;
260    }
261
262    // Offset within SIZ data (after length field)
263    let siz_data = &data[6..4 + siz_len];
264    // Rsiz(2) + Xsiz(4) + Ysiz(4) + XOsiz(4) + YOsiz(4)
265    // + XTsiz(4) + YTsiz(4) + XTOsiz(4) + YTOsiz(4) + Csiz(2) = 36 bytes
266    if siz_data.len() < 36 {
267        return None;
268    }
269
270    let csiz = u16::from_be_bytes([siz_data[34], siz_data[35]]) as usize;
271    let comp_start = 36;
272
273    let mut result = Vec::with_capacity(csiz);
274    for i in 0..csiz {
275        let offset = comp_start + i * 3;
276        if offset >= siz_data.len() {
277            break;
278        }
279        let ssiz = siz_data[offset];
280        let bits = (ssiz & 0x7F) + 1;
281        let is_signed = (ssiz & 0x80) != 0;
282        result.push(ComponentPrecision { bits, is_signed });
283    }
284
285    if result.is_empty() {
286        None
287    } else {
288        Some(result)
289    }
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295
296    #[test]
297    fn test_decode_invalid_data_returns_error() {
298        let result = decode(&[1, 2, 3]);
299        assert!(result.is_err());
300        let err = result.unwrap_err();
301        assert!(
302            err.to_string().contains("JPEG2000"),
303            "error should mention JPEG2000: {err}"
304        );
305    }
306
307    #[test]
308    fn test_decode_empty_data_returns_error() {
309        let result = decode(&[]);
310        assert!(result.is_err());
311        let err = result.unwrap_err();
312        assert!(
313            err.to_string().contains("JPEG2000"),
314            "error should mention JPEG2000: {err}"
315        );
316    }
317
318    #[test]
319    fn test_decode_truncated_jp2_header_returns_error() {
320        // JP2 signature prefix, but truncated
321        let data = b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A";
322        let result = decode(data);
323        assert!(result.is_err());
324    }
325
326    #[test]
327    fn test_decode_truncated_codestream_returns_error() {
328        // Codestream SOC + SIZ markers, but truncated
329        let data = b"\xFF\x4F\xFF\x51\x00\x00";
330        let result = decode(data);
331        assert!(result.is_err());
332    }
333
334    #[test]
335    fn test_parse_codestream_precision_basic() {
336        // Build a minimal SIZ marker: SOC + SIZ
337        let mut data = Vec::new();
338        // SOC
339        data.extend_from_slice(&[0xFF, 0x4F]);
340        // SIZ marker
341        data.extend_from_slice(&[0xFF, 0x51]);
342        // Length = 2 (self) + 36 (header) + 3*2 (components) = 44
343        data.extend_from_slice(&[0x00, 44u8]);
344        // Rsiz (2)
345        data.extend_from_slice(&[0x00, 0x00]);
346        // Xsiz (4) = 100
347        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x64]);
348        // Ysiz (4) = 200
349        data.extend_from_slice(&[0x00, 0x00, 0x00, 0xC8]);
350        // XOsiz (4)
351        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x00]);
352        // YOsiz (4)
353        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x00]);
354        // XTsiz (4)
355        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x64]);
356        // YTsiz (4)
357        data.extend_from_slice(&[0x00, 0x00, 0x00, 0xC8]);
358        // XTOsiz (4)
359        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x00]);
360        // YTOsiz (4)
361        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x00]);
362        // Csiz (2) = 2 components
363        data.extend_from_slice(&[0x00, 0x02]);
364        // Component 0: Ssiz=0x07 (8-bit unsigned), XRsiz=1, YRsiz=1
365        data.extend_from_slice(&[0x07, 0x01, 0x01]);
366        // Component 1: Ssiz=0x8F (16-bit signed), XRsiz=1, YRsiz=1
367        data.extend_from_slice(&[0x8F, 0x01, 0x01]);
368
369        let result = parse_jp2_precision(&data).unwrap();
370        assert_eq!(result.len(), 2);
371        assert_eq!(result[0].bits, 8);
372        assert!(!result[0].is_signed);
373        assert_eq!(result[1].bits, 16);
374        assert!(result[1].is_signed);
375    }
376
377    #[test]
378    fn test_parse_jp2_precision_with_ihdr() {
379        // Build a minimal JP2 file with signature + jp2h/ihdr boxes
380        let mut data = Vec::new();
381
382        // Signature box (12 bytes)
383        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x0C]);
384        data.extend_from_slice(b"jP  ");
385        data.extend_from_slice(&[0x0D, 0x0A, 0x87, 0x0A]);
386
387        // File type box (20 bytes)
388        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x14]);
389        data.extend_from_slice(b"ftyp");
390        data.extend_from_slice(b"jp2 ");
391        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x00]);
392        data.extend_from_slice(b"jp2 ");
393
394        // jp2h box containing ihdr
395        // ihdr box: 8 (box header) + 14 (content) = 22 bytes
396        // jp2h box: 8 (box header) + 22 (ihdr) = 30 bytes
397        let ihdr_len: u32 = 22;
398        let jp2h_len: u32 = 8 + ihdr_len;
399
400        data.extend_from_slice(&jp2h_len.to_be_bytes());
401        data.extend_from_slice(b"jp2h");
402
403        data.extend_from_slice(&ihdr_len.to_be_bytes());
404        data.extend_from_slice(b"ihdr");
405        // Height (4) = 480
406        data.extend_from_slice(&480u32.to_be_bytes());
407        // Width (4) = 640
408        data.extend_from_slice(&640u32.to_be_bytes());
409        // NumComponents (2) = 3
410        data.extend_from_slice(&3u16.to_be_bytes());
411        // BPC byte: 0x07 = 8-bit unsigned (bits-1=7, sign=0)
412        data.push(0x07);
413        // Compression type (1)
414        data.push(0x07);
415        // UnkC (1) = 0
416        data.push(0x00);
417        // IPR (1) = 0
418        data.push(0x00);
419
420        let result = parse_jp2_precision(&data).unwrap();
421        assert_eq!(result.len(), 3);
422        for comp in &result {
423            assert_eq!(comp.bits, 8);
424            assert!(!comp.is_signed);
425        }
426    }
427
428    #[test]
429    fn test_parse_jp2_precision_not_jp2() {
430        // Random data — not JP2 or codestream
431        let data = [0x00, 0x01, 0x02, 0x03, 0x04, 0x05];
432        assert!(parse_jp2_precision(&data).is_none());
433    }
434
435    #[test]
436    fn test_parse_jp2_precision_empty() {
437        assert!(parse_jp2_precision(&[]).is_none());
438    }
439
440    #[test]
441    fn test_component_precision_eq() {
442        let a = ComponentPrecision {
443            bits: 8,
444            is_signed: false,
445        };
446        let b = ComponentPrecision {
447            bits: 8,
448            is_signed: false,
449        };
450        assert_eq!(a, b);
451    }
452
453    // -----------------------------------------------------------------------
454    // JpxDecodeOptions tests (R3 + R4)
455    // -----------------------------------------------------------------------
456
457    #[test]
458    fn test_jpx_decode_options_default() {
459        let opts = JpxDecodeOptions::default();
460        assert_eq!(opts.target_resolution, None);
461        assert_eq!(opts.color_space_option, JpxColorSpaceOption::Normal);
462        assert!(!opts.strict);
463    }
464
465    #[test]
466    fn test_jpx_color_space_option_default() {
467        assert_eq!(JpxColorSpaceOption::default(), JpxColorSpaceOption::Normal);
468    }
469
470    #[test]
471    fn test_build_settings_default() {
472        let opts = JpxDecodeOptions::default();
473        let settings = build_decode_settings(&opts);
474        assert!(!settings.strict);
475        assert!(settings.resolve_palette_indices);
476    }
477
478    #[test]
479    fn test_build_settings_with_target_resolution() {
480        // target_resolution is accepted in our API but not yet forwarded to
481        // hayro-jpeg2000 v0.1 (which lacks the field). Verify it doesn't panic.
482        let opts = JpxDecodeOptions {
483            target_resolution: Some((320, 240)),
484            ..Default::default()
485        };
486        let settings = build_decode_settings(&opts);
487        // Settings should still be valid
488        assert!(!settings.strict);
489    }
490
491    #[test]
492    fn test_build_settings_color_space_none() {
493        let opts = JpxDecodeOptions {
494            color_space_option: JpxColorSpaceOption::None,
495            ..Default::default()
496        };
497        let settings = build_decode_settings(&opts);
498        assert!(!settings.resolve_palette_indices);
499    }
500
501    #[test]
502    fn test_build_settings_color_space_indexed() {
503        let opts = JpxDecodeOptions {
504            color_space_option: JpxColorSpaceOption::Indexed,
505            ..Default::default()
506        };
507        let settings = build_decode_settings(&opts);
508        assert!(settings.resolve_palette_indices);
509    }
510
511    #[test]
512    fn test_build_settings_strict() {
513        let opts = JpxDecodeOptions {
514            strict: true,
515            ..Default::default()
516        };
517        let settings = build_decode_settings(&opts);
518        assert!(settings.strict);
519    }
520
521    #[test]
522    fn test_decode_with_options_invalid_data() {
523        let opts = JpxDecodeOptions::default();
524        let result = decode_with_options(&[1, 2, 3], &opts);
525        assert!(result.is_err());
526    }
527
528    #[test]
529    fn test_decode_with_info_and_options_invalid_data() {
530        let opts = JpxDecodeOptions {
531            target_resolution: Some((100, 100)),
532            ..Default::default()
533        };
534        let result = decode_with_info_and_options(&[1, 2, 3], &opts);
535        assert!(result.is_err());
536    }
537
538    #[test]
539    fn test_max_resolutions_to_skip_constant() {
540        assert_eq!(MAX_RESOLUTIONS_TO_SKIP, 32);
541    }
542
543    // --- Upstream ports: jpx_unittest.cpp ---
544    //
545    // The upstream C++ tests exercise OpenJPEG stream adapter callbacks
546    // (DecodeData read/skip/seek) and YUV420ToRGB conversion. rpdfium
547    // delegates to hayro-jpeg2000, which handles stream I/O internally.
548    // These tests verify equivalent error-handling behavior at the Rust API
549    // boundary.
550
551    /// Upstream: TEST(fxcodec, DecodeDataNullDecodeData)
552    ///
553    /// Null/missing data should produce an error, not a crash.
554    #[test]
555    fn test_jpx_decode_null_data() {
556        // Empty slice (null-equivalent) returns error
557        let result = decode(&[]);
558        assert!(result.is_err());
559        let result2 = decode_with_options(&[], &JpxDecodeOptions::default());
560        assert!(result2.is_err());
561    }
562
563    /// Upstream: TEST(fxcodec, DecodeDataNullStream)
564    ///
565    /// Zero-length reads on null stream should error.
566    #[test]
567    fn test_jpx_decode_null_stream_equivalent() {
568        // Attempt decode with completely invalid data
569        let result = decode_with_info(&[]);
570        assert!(result.is_err());
571        let err = result.unwrap_err();
572        assert!(err.to_string().contains("JPEG2000"));
573    }
574
575    /// Upstream: TEST(fxcodec, DecodeDataZeroSize)
576    ///
577    /// Zero-size data should error.
578    #[test]
579    fn test_jpx_decode_zero_size_data() {
580        let result = decode_with_info_and_options(&[], &JpxDecodeOptions::default());
581        assert!(result.is_err());
582    }
583
584    /// Upstream: TEST(fxcodec, DecodeDataReadInBounds)
585    #[test]
586    #[ignore = "requires OpenJPEG stream adapter internals not exposed in hayro-jpeg2000"]
587    fn test_jpx_decode_data_read_in_bounds() {
588        // Upstream tests opj_read_from_memory with exact-sized reads.
589        // hayro-jpeg2000 manages stream I/O internally.
590    }
591
592    /// Upstream: TEST(fxcodec, DecodeDataReadBeyondBounds)
593    #[test]
594    #[ignore = "requires OpenJPEG stream adapter internals not exposed in hayro-jpeg2000"]
595    fn test_jpx_decode_data_read_beyond_bounds() {
596        // Upstream tests opj_read_from_memory beyond data length.
597    }
598
599    /// Upstream: TEST(fxcodec, DecodeDataSkip)
600    #[test]
601    #[ignore = "requires OpenJPEG stream adapter internals not exposed in hayro-jpeg2000"]
602    fn test_jpx_decode_data_skip() {
603        // Upstream tests opj_skip_from_memory with various offsets.
604    }
605
606    /// Upstream: TEST(fxcodec, DecodeDataSeek)
607    #[test]
608    #[ignore = "requires OpenJPEG stream adapter internals not exposed in hayro-jpeg2000"]
609    fn test_jpx_decode_data_seek() {
610        // Upstream tests opj_seek_from_memory with various positions.
611    }
612
613    /// Upstream: TEST(fxcodec, YUV420ToRGB)
614    #[test]
615    #[ignore = "requires CJPX_Decoder::Sycc420ToRgbForTesting internals not exposed in hayro-jpeg2000"]
616    fn test_jpx_yuv420_to_rgb() {
617        // Upstream tests YUV420 -> RGB color conversion with various image widths.
618        // hayro-jpeg2000 handles color conversion internally.
619    }
620}