Skip to main content

rpdfium_codec/
lib.rs

1#![forbid(unsafe_code)]
2#![doc = "Image codecs and stream decode filters for rpdfium — a faithful Rust port of PDFium."]
3//!
4//! This crate implements the PDF stream decode filters:
5//!
6//! - **ASCII85Decode** — base-85 decoding
7//! - **ASCIIHexDecode** — hexadecimal decoding
8//! - **RunLengthDecode** — run-length decoding
9//! - **LZWDecode** — LZW decompression (PDF variant)
10//! - **FlateDecode** — zlib/deflate decompression with predictor support
11//! - **DCTDecode** — JPEG decompression
12//! - **CCITTFaxDecode** — Group 3 and Group 4 fax decompression
13//! - **JBIG2Decode** — JBIG2 decompression via `hayro-jbig2`
14//! - **JPXDecode** — JPEG 2000 decompression via `hayro-jpeg2000`
15//!
16//! The [`apply_filter_chain`] function applies multiple filters in sequence,
17//! enforcing security limits on chain length and output size.
18//!
19//! # Design Principles
20//!
21//! - `#![forbid(unsafe_code)]`
22//! - All deep operations are **iterative** (explicit stacks), never recursive.
23//! - Security limits enforced: `MAX_FILTER_CHAIN_LENGTH`, decompressed size limits.
24
25// --- Codec subdirectories (mirroring upstream core/fxcodec/) ---
26pub mod basic;
27pub mod fax;
28pub mod flate;
29pub mod jbig2;
30pub mod jpeg;
31pub mod jpx;
32
33// --- Root infrastructure ---
34pub mod error;
35pub mod pipeline;
36pub mod scanline;
37pub mod streaming;
38
39pub use error::DecodeError;
40pub use scanline::{
41    DctScanlineDecoder, FlateScanlineDecoder, RandomAccessDecoder, ScanlineDecoder,
42};
43pub use streaming::{StreamingDecoder, StreamingDecoderType, create_streaming_decoder};
44
45/// Available PDF stream decode filters.
46#[derive(Debug, Clone, PartialEq, Eq, Hash)]
47pub enum DecodeFilter {
48    /// FlateDecode — zlib/deflate decompression.
49    Flate,
50    /// LZWDecode — LZW decompression.
51    LZW,
52    /// ASCII85Decode — base-85 decoding.
53    ASCII85,
54    /// ASCIIHexDecode — hexadecimal decoding.
55    ASCIIHex,
56    /// RunLengthDecode — run-length decoding.
57    RunLength,
58    /// DCTDecode — JPEG decompression.
59    DCT,
60    /// CCITTFaxDecode — fax decompression.
61    CCITTFax,
62    /// JBIG2Decode — JBIG2 decompression.
63    JBIG2,
64    /// JPXDecode (JPEG 2000) — JPEG 2000 decompression.
65    JPX,
66}
67
68/// Parameters for decode filters.
69///
70/// Not all fields apply to all filters. Unused fields are ignored.
71#[derive(Debug, Clone, Default)]
72pub struct FilterParams {
73    /// Predictor type for Flate/LZW (1=none, 2=TIFF, 10-15=PNG).
74    pub predictor: Option<i32>,
75    /// Number of columns per row (for predictors and CCITT).
76    pub columns: Option<i32>,
77    /// Number of color components per sample (for predictors).
78    pub colors: Option<i32>,
79    /// Bits per color component (for predictors).
80    pub bits_per_component: Option<i32>,
81    /// LZW early code size change (default: true per PDF spec).
82    pub early_change: Option<bool>,
83    /// CCITT /K parameter: 0=Group3, <0=Group4, >0=mixed.
84    pub k: Option<i32>,
85    /// CCITT number of rows.
86    pub rows: Option<i32>,
87    /// CCITT end-of-line markers present.
88    pub end_of_line: Option<bool>,
89    /// CCITT byte-aligned encoding.
90    pub encoded_byte_align: Option<bool>,
91    /// CCITT black pixel representation.
92    pub black_is_1: Option<bool>,
93    /// JBIG2 global segments data (decoded bytes from /JBIG2Globals stream).
94    pub jbig2_globals: Option<Vec<u8>>,
95}
96
97/// Decode data using a single filter with the given parameters.
98pub fn decode(
99    filter: DecodeFilter,
100    data: &[u8],
101    params: &FilterParams,
102) -> Result<Vec<u8>, DecodeError> {
103    match filter {
104        DecodeFilter::Flate => flate::decode(
105            data,
106            params.predictor,
107            params.columns,
108            params.colors,
109            params.bits_per_component,
110        ),
111        DecodeFilter::LZW => {
112            let early_change = params.early_change.unwrap_or(true);
113            let raw = flate::lzw::decode(data, early_change)?;
114            let predictor_val = params.predictor.unwrap_or(1);
115            if predictor_val <= 1 {
116                Ok(raw)
117            } else {
118                flate::predictor::apply_predictor(
119                    &raw,
120                    predictor_val,
121                    params.columns.unwrap_or(1),
122                    params.colors.unwrap_or(1),
123                    params.bits_per_component.unwrap_or(8),
124                )
125            }
126        }
127        DecodeFilter::ASCII85 => basic::ascii85::decode(data),
128        DecodeFilter::ASCIIHex => basic::ascii_hex::decode(data),
129        DecodeFilter::RunLength => basic::run_length::decode(data),
130        DecodeFilter::DCT => jpeg::decode(data),
131        DecodeFilter::CCITTFax => {
132            let fax_params = fax::CcittParams {
133                k: params.k.unwrap_or(0),
134                columns: params.columns.unwrap_or(1728),
135                rows: params.rows.unwrap_or(0),
136                end_of_line: params.end_of_line.unwrap_or(false),
137                encoded_byte_align: params.encoded_byte_align.unwrap_or(false),
138                black_is_1: params.black_is_1.unwrap_or(false),
139            };
140            fax::decode(data, &fax_params)
141        }
142        DecodeFilter::JBIG2 => jbig2::decode(data, params.jbig2_globals.as_deref()),
143        DecodeFilter::JPX => jpx::decode(data),
144    }
145}
146
147/// Apply a chain of decode filters in sequence.
148///
149/// See [`pipeline::apply_filter_chain`] for details.
150#[inline]
151pub fn apply_filter_chain(
152    data: &[u8],
153    filters: &[(DecodeFilter, FilterParams)],
154) -> Result<Vec<u8>, DecodeError> {
155    pipeline::apply_filter_chain(data, filters)
156}
157
158#[cfg(test)]
159mod tests {
160    use super::*;
161
162    #[test]
163    fn test_decode_ascii_hex_via_public_api() {
164        let data = b"48656C6C6F>";
165        let result = decode(DecodeFilter::ASCIIHex, data, &FilterParams::default()).unwrap();
166        assert_eq!(result, b"Hello");
167    }
168
169    #[test]
170    fn test_decode_ascii85_via_public_api() {
171        let data = b"9jqo^~>";
172        let result = decode(DecodeFilter::ASCII85, data, &FilterParams::default()).unwrap();
173        assert_eq!(result, b"Man ");
174    }
175
176    #[test]
177    fn test_decode_run_length_via_public_api() {
178        let data = [253, b'X', 128];
179        let result = decode(DecodeFilter::RunLength, &data, &FilterParams::default()).unwrap();
180        assert_eq!(result, b"XXXX");
181    }
182
183    // --- Group A: JBIG2 decode tests ---
184
185    #[test]
186    fn test_decode_jbig2_invalid_input() {
187        let result = decode(DecodeFilter::JBIG2, &[1], &FilterParams::default());
188        assert!(result.is_err());
189        let err = result.unwrap_err();
190        assert!(err.to_string().contains("JBIG2"));
191    }
192
193    #[test]
194    fn test_jbig2_decode_empty_returns_error() {
195        // Empty input must not panic; it must return a well-formed Err.
196        let result = decode(DecodeFilter::JBIG2, &[], &FilterParams::default());
197        assert!(result.is_err(), "expected Err for empty JBIG2 input");
198    }
199
200    #[test]
201    fn test_jbig2_decode_with_globals_returns_error_for_invalid_data() {
202        // Passing a non-empty globals buffer alongside invalid stream data must
203        // still yield Err (not panic or produce garbage pixels).
204        let params = FilterParams {
205            jbig2_globals: Some(vec![0xAA, 0xBB, 0xCC]),
206            ..FilterParams::default()
207        };
208        let result = decode(DecodeFilter::JBIG2, &[0x00, 0x01, 0x02], &params);
209        assert!(
210            result.is_err(),
211            "expected Err for invalid JBIG2 with globals"
212        );
213        assert!(result.unwrap_err().to_string().contains("JBIG2"));
214    }
215
216    // --- Group B: JPEG (DCT) decode tests ---
217
218    #[test]
219    fn test_jpeg_decode_empty_returns_error() {
220        // Empty slice: no SOI marker is present; decode must return Err.
221        let result = decode(DecodeFilter::DCT, &[], &FilterParams::default());
222        assert!(result.is_err(), "expected Err for empty JPEG input");
223    }
224
225    #[test]
226    fn test_jpeg_decode_invalid_returns_error() {
227        // Arbitrary non-JPEG bytes: must return Err without panicking.
228        let result = decode(DecodeFilter::DCT, b"not a jpeg", &FilterParams::default());
229        assert!(result.is_err(), "expected Err for non-JPEG bytes");
230    }
231
232    #[test]
233    fn test_jpeg_decode_truncated_soi_returns_error() {
234        // SOI marker present but immediately truncated — must fail gracefully.
235        let data = [0xFF, 0xD8, 0xFF];
236        let result = decode(DecodeFilter::DCT, &data, &FilterParams::default());
237        assert!(result.is_err(), "expected Err for truncated JPEG");
238    }
239
240    // --- Group C: JPEG 2000 (JPX) decode tests ---
241
242    #[test]
243    fn test_decode_jpx_invalid_input() {
244        let result = decode(DecodeFilter::JPX, &[1], &FilterParams::default());
245        assert!(result.is_err());
246        let err = result.unwrap_err();
247        assert!(err.to_string().contains("JPEG2000"));
248    }
249
250    #[test]
251    fn test_jpeg2000_decode_empty_returns_error() {
252        // Empty input must not panic; it must return a well-formed Err.
253        let result = decode(DecodeFilter::JPX, &[], &FilterParams::default());
254        assert!(result.is_err(), "expected Err for empty JPEG2000 input");
255        let err = result.unwrap_err();
256        assert!(
257            err.to_string().contains("JPEG2000"),
258            "error should mention JPEG2000, got: {err}"
259        );
260    }
261
262    #[test]
263    fn test_jpeg2000_decode_truncated_jp2_header_returns_error() {
264        // JP2 signature bytes but truncated — must fail, not panic.
265        let data = b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A";
266        let result = decode(DecodeFilter::JPX, data, &FilterParams::default());
267        assert!(result.is_err(), "expected Err for truncated JP2 header");
268    }
269
270    #[test]
271    fn test_decode_filter_is_clone_and_eq() {
272        let f1 = DecodeFilter::Flate;
273        let f2 = f1.clone();
274        assert_eq!(f1, f2);
275    }
276
277    #[test]
278    fn test_filter_params_default() {
279        let params = FilterParams::default();
280        assert!(params.predictor.is_none());
281        assert!(params.columns.is_none());
282        assert!(params.colors.is_none());
283        assert!(params.bits_per_component.is_none());
284        assert!(params.early_change.is_none());
285        assert!(params.k.is_none());
286        assert!(params.rows.is_none());
287        assert!(params.end_of_line.is_none());
288        assert!(params.encoded_byte_align.is_none());
289        assert!(params.black_is_1.is_none());
290        assert!(params.jbig2_globals.is_none());
291    }
292}