Skip to main content

rpdfium_codec/
lib.rs

1#![forbid(unsafe_code)]
2#![doc = "Image codecs and stream decode filters for rpdfium — a faithful Rust port of PDFium."]
3//!
4//! This crate implements the PDF stream decode filters:
5//!
6//! - **ASCII85Decode** — base-85 decoding
7//! - **ASCIIHexDecode** — hexadecimal decoding
8//! - **RunLengthDecode** — run-length decoding
9//! - **LZWDecode** — LZW decompression (PDF variant)
10//! - **FlateDecode** — zlib/deflate decompression with predictor support
11//! - **DCTDecode** — JPEG decompression
12//! - **CCITTFaxDecode** — Group 3 and Group 4 fax decompression
13//! - **JBIG2Decode** — JBIG2 decompression via `hayro-jbig2`
14//! - **JPXDecode** — JPEG 2000 decompression via `hayro-jpeg2000`
15//!
16//! The [`apply_filter_chain`] function applies multiple filters in sequence,
17//! enforcing security limits on chain length and output size.
18//!
19//! # Design Principles
20//!
21//! - `#![forbid(unsafe_code)]`
22//! - All deep operations are **iterative** (explicit stacks), never recursive.
23//! - Security limits enforced: `MAX_FILTER_CHAIN_LENGTH`, decompressed size limits.
24
25// --- Codec subdirectories (mirroring upstream core/fxcodec/) ---
26pub mod basic;
27pub mod fax;
28pub mod flate;
29pub mod jbig2;
30pub mod jpeg;
31pub mod jpx;
32
33// --- Root infrastructure ---
34pub mod error;
35pub mod pipeline;
36pub mod scanline;
37pub mod streaming;
38
39pub use error::DecodeError;
40pub use scanline::{
41    DctScanlineDecoder, FlateScanlineDecoder, RandomAccessDecoder, ScanlineDecoder,
42};
43pub use streaming::{StreamingDecoder, StreamingDecoderType, create_streaming_decoder};
44
45/// Available PDF stream decode filters.
46#[derive(Debug, Clone, PartialEq, Eq, Hash)]
47pub enum DecodeFilter {
48    /// FlateDecode — zlib/deflate decompression.
49    Flate,
50    /// LZWDecode — LZW decompression.
51    LZW,
52    /// ASCII85Decode — base-85 decoding.
53    ASCII85,
54    /// ASCIIHexDecode — hexadecimal decoding.
55    ASCIIHex,
56    /// RunLengthDecode — run-length decoding.
57    RunLength,
58    /// DCTDecode — JPEG decompression.
59    DCT,
60    /// CCITTFaxDecode — fax decompression.
61    CCITTFax,
62    /// JBIG2Decode — JBIG2 decompression.
63    JBIG2,
64    /// JPXDecode (JPEG 2000) — JPEG 2000 decompression.
65    JPX,
66}
67
68/// Parameters for decode filters.
69///
70/// Not all fields apply to all filters. Unused fields are ignored.
71#[derive(Debug, Clone, Default)]
72pub struct FilterParams {
73    /// Predictor type for Flate/LZW (1=none, 2=TIFF, 10-15=PNG).
74    pub predictor: Option<i32>,
75    /// Number of columns per row (for predictors and CCITT).
76    pub columns: Option<i32>,
77    /// Number of color components per sample (for predictors).
78    pub colors: Option<i32>,
79    /// Bits per color component (for predictors).
80    pub bits_per_component: Option<i32>,
81    /// LZW early code size change (default: true per PDF spec).
82    pub early_change: Option<bool>,
83    /// CCITT /K parameter: 0=Group3, <0=Group4, >0=mixed.
84    pub k: Option<i32>,
85    /// CCITT number of rows.
86    pub rows: Option<i32>,
87    /// CCITT end-of-line markers present.
88    pub end_of_line: Option<bool>,
89    /// CCITT byte-aligned encoding.
90    pub encoded_byte_align: Option<bool>,
91    /// CCITT black pixel representation.
92    pub black_is_1: Option<bool>,
93    /// JBIG2 global segments data (decoded bytes from /JBIG2Globals stream).
94    pub jbig2_globals: Option<Vec<u8>>,
95}
96
97/// Decode data using a single filter with the given parameters.
98pub fn decode(
99    filter: DecodeFilter,
100    data: &[u8],
101    params: &FilterParams,
102) -> Result<Vec<u8>, DecodeError> {
103    match filter {
104        DecodeFilter::Flate => flate::decode(
105            data,
106            params.predictor,
107            params.columns,
108            params.colors,
109            params.bits_per_component,
110        ),
111        DecodeFilter::LZW => {
112            let early_change = params.early_change.unwrap_or(true);
113            let raw = flate::lzw::decode(data, early_change)?;
114            let predictor_val = params.predictor.unwrap_or(1);
115            if predictor_val <= 1 {
116                Ok(raw)
117            } else {
118                flate::predictor::apply_predictor(
119                    &raw,
120                    predictor_val,
121                    params.columns.unwrap_or(1),
122                    params.colors.unwrap_or(1),
123                    params.bits_per_component.unwrap_or(8),
124                )
125            }
126        }
127        DecodeFilter::ASCII85 => basic::ascii85::decode(data),
128        DecodeFilter::ASCIIHex => basic::ascii_hex::decode(data),
129        DecodeFilter::RunLength => basic::run_length::decode(data),
130        DecodeFilter::DCT => jpeg::decode(data),
131        DecodeFilter::CCITTFax => {
132            let fax_params = fax::CcittParams {
133                k: params.k.unwrap_or(0),
134                columns: params.columns.unwrap_or(1728),
135                rows: params.rows.unwrap_or(0),
136                end_of_line: params.end_of_line.unwrap_or(false),
137                encoded_byte_align: params.encoded_byte_align.unwrap_or(false),
138                black_is_1: params.black_is_1.unwrap_or(false),
139            };
140            fax::decode(data, &fax_params)
141        }
142        DecodeFilter::JBIG2 => jbig2::decode(data, params.jbig2_globals.as_deref()),
143        DecodeFilter::JPX => jpx::decode(data),
144    }
145}
146
147/// Apply a chain of decode filters in sequence.
148///
149/// See [`pipeline::apply_filter_chain`] for details.
150pub fn apply_filter_chain(
151    data: &[u8],
152    filters: &[(DecodeFilter, FilterParams)],
153) -> Result<Vec<u8>, DecodeError> {
154    pipeline::apply_filter_chain(data, filters)
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    #[test]
162    fn decode_ascii_hex_via_public_api() {
163        let data = b"48656C6C6F>";
164        let result = decode(DecodeFilter::ASCIIHex, data, &FilterParams::default()).unwrap();
165        assert_eq!(result, b"Hello");
166    }
167
168    #[test]
169    fn decode_ascii85_via_public_api() {
170        let data = b"9jqo^~>";
171        let result = decode(DecodeFilter::ASCII85, data, &FilterParams::default()).unwrap();
172        assert_eq!(result, b"Man ");
173    }
174
175    #[test]
176    fn decode_run_length_via_public_api() {
177        let data = [253, b'X', 128];
178        let result = decode(DecodeFilter::RunLength, &data, &FilterParams::default()).unwrap();
179        assert_eq!(result, b"XXXX");
180    }
181
182    #[test]
183    fn decode_jbig2_invalid_input() {
184        let result = decode(DecodeFilter::JBIG2, &[1], &FilterParams::default());
185        assert!(result.is_err());
186        let err = result.unwrap_err();
187        assert!(err.to_string().contains("JBIG2"));
188    }
189
190    #[test]
191    fn decode_jpx_invalid_input() {
192        let result = decode(DecodeFilter::JPX, &[1], &FilterParams::default());
193        assert!(result.is_err());
194        let err = result.unwrap_err();
195        assert!(err.to_string().contains("JPEG2000"));
196    }
197
198    #[test]
199    fn decode_filter_is_clone_and_eq() {
200        let f1 = DecodeFilter::Flate;
201        let f2 = f1.clone();
202        assert_eq!(f1, f2);
203    }
204
205    #[test]
206    fn filter_params_default() {
207        let params = FilterParams::default();
208        assert!(params.predictor.is_none());
209        assert!(params.columns.is_none());
210        assert!(params.colors.is_none());
211        assert!(params.bits_per_component.is_none());
212        assert!(params.early_change.is_none());
213        assert!(params.k.is_none());
214        assert!(params.rows.is_none());
215        assert!(params.end_of_line.is_none());
216        assert!(params.encoded_byte_align.is_none());
217        assert!(params.black_is_1.is_none());
218        assert!(params.jbig2_globals.is_none());
219    }
220}