Skip to main content

pdf_objects/
stream.rs

1use std::io::{Read, Write};
2
3use flate2::Compression;
4use flate2::read::ZlibDecoder;
5use flate2::write::ZlibEncoder;
6
7use crate::error::{PdfError, PdfResult};
8use crate::types::{PdfStream, PdfValue};
9
10/// Compress `data` with FlateDecode (zlib / deflate) at the default
11/// compression level. Used by the writer when re-emitting rewritten content
12/// streams so the saved PDF does not bloat with plaintext content bytes.
13pub fn flate_encode(data: &[u8]) -> PdfResult<Vec<u8>> {
14    let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
15    encoder
16        .write_all(data)
17        .map_err(|error| PdfError::Corrupt(format!("flate encode failed: {error}")))?;
18    encoder
19        .finish()
20        .map_err(|error| PdfError::Corrupt(format!("flate encode finalize failed: {error}")))
21}
22
23pub fn decode_stream(stream: &PdfStream) -> PdfResult<Vec<u8>> {
24    let inflated = match stream.dict.get("Filter") {
25        None => stream.data.clone(),
26        Some(PdfValue::Name(name)) if name == "FlateDecode" => inflate(stream.data.as_slice())?,
27        Some(PdfValue::Array(filters)) if filters.len() == 1 => match filters.first() {
28            Some(PdfValue::Name(name)) if name == "FlateDecode" => inflate(stream.data.as_slice())?,
29            _ => {
30                return Err(PdfError::Unsupported(
31                    "only a single FlateDecode filter is supported".to_string(),
32                ));
33            }
34        },
35        Some(_) => {
36            return Err(PdfError::Unsupported(
37                "unsupported stream filter configuration".to_string(),
38            ));
39        }
40    };
41
42    apply_predictor(&inflated, stream.dict.get("DecodeParms"))
43}
44
45/// Maximum decompressed stream size (256 MiB). Prevents decompression bombs from
46/// exhausting memory in WASM or native contexts.
47const MAX_DECOMPRESSED_SIZE: u64 = 256 * 1024 * 1024;
48
49fn inflate(data: &[u8]) -> PdfResult<Vec<u8>> {
50    let decoder = ZlibDecoder::new(data);
51    let mut output = Vec::new();
52    decoder
53        .take(MAX_DECOMPRESSED_SIZE + 1)
54        .read_to_end(&mut output)
55        .map_err(|error| PdfError::Corrupt(format!("failed to decode flate stream: {error}")))?;
56    if output.len() as u64 > MAX_DECOMPRESSED_SIZE {
57        return Err(PdfError::Corrupt(
58            "decompressed stream exceeds maximum allowed size".to_string(),
59        ));
60    }
61    Ok(output)
62}
63
64fn apply_predictor(data: &[u8], decode_parms: Option<&PdfValue>) -> PdfResult<Vec<u8>> {
65    let parms = match decode_parms {
66        None => return Ok(data.to_vec()),
67        Some(PdfValue::Dictionary(dict)) => dict,
68        Some(PdfValue::Null) => return Ok(data.to_vec()),
69        Some(PdfValue::Array(_)) => {
70            // Per-filter DecodeParms arrays are legal when multiple filters are
71            // chained. We only support a single FlateDecode filter today so any
72            // array-valued DecodeParms is unexpected.
73            return Err(PdfError::Unsupported(
74                "per-filter DecodeParms arrays are not supported".to_string(),
75            ));
76        }
77        Some(_) => {
78            return Err(PdfError::Corrupt(
79                "DecodeParms is not a dictionary".to_string(),
80            ));
81        }
82    };
83
84    let predictor = parms
85        .get("Predictor")
86        .and_then(PdfValue::as_integer)
87        .unwrap_or(1);
88    match predictor {
89        1 => Ok(data.to_vec()),
90        2 => tiff_predictor_decode(data, parms),
91        10..=15 => png_predictor_decode(data, parms),
92        other => Err(PdfError::Unsupported(format!(
93            "predictor {other} is not supported"
94        ))),
95    }
96}
97
98fn tiff_predictor_decode(data: &[u8], parms: &crate::types::PdfDictionary) -> PdfResult<Vec<u8>> {
99    let columns = parms
100        .get("Columns")
101        .and_then(PdfValue::as_integer)
102        .unwrap_or(1) as usize;
103    let colors = parms
104        .get("Colors")
105        .and_then(PdfValue::as_integer)
106        .unwrap_or(1) as usize;
107    let bits_per_component = parms
108        .get("BitsPerComponent")
109        .and_then(PdfValue::as_integer)
110        .unwrap_or(8) as usize;
111
112    if bits_per_component != 8 {
113        return Err(PdfError::Unsupported(format!(
114            "TIFF predictor with BitsPerComponent {bits_per_component} is not supported"
115        )));
116    }
117    if columns == 0 || colors == 0 {
118        return Err(PdfError::Corrupt(
119            "TIFF predictor Columns/Colors must be positive".to_string(),
120        ));
121    }
122    let row_stride = columns * colors;
123    if data.len() % row_stride != 0 {
124        return Err(PdfError::Corrupt(format!(
125            "TIFF predictor row length mismatch: data={} stride={row_stride}",
126            data.len()
127        )));
128    }
129    let mut output = Vec::with_capacity(data.len());
130    for row in data.chunks_exact(row_stride) {
131        for (component_index, byte) in row.iter().enumerate() {
132            if component_index < colors {
133                // First pixel in a row is stored as-is per component.
134                output.push(*byte);
135            } else {
136                let previous = output[output.len() - colors];
137                output.push(previous.wrapping_add(*byte));
138            }
139        }
140    }
141    Ok(output)
142}
143
144fn png_predictor_decode(data: &[u8], parms: &crate::types::PdfDictionary) -> PdfResult<Vec<u8>> {
145    let columns = parms
146        .get("Columns")
147        .and_then(PdfValue::as_integer)
148        .unwrap_or(1) as usize;
149    let colors = parms
150        .get("Colors")
151        .and_then(PdfValue::as_integer)
152        .unwrap_or(1) as usize;
153    let bits_per_component = parms
154        .get("BitsPerComponent")
155        .and_then(PdfValue::as_integer)
156        .unwrap_or(8) as usize;
157
158    if bits_per_component != 8 {
159        return Err(PdfError::Unsupported(format!(
160            "PNG predictor with BitsPerComponent {bits_per_component} is not supported"
161        )));
162    }
163    if columns == 0 || colors == 0 {
164        return Err(PdfError::Corrupt(
165            "PNG predictor Columns/Colors must be positive".to_string(),
166        ));
167    }
168    let bytes_per_pixel = colors; // bits_per_component == 8
169    let row_data_len = columns * bytes_per_pixel;
170    let row_stride = row_data_len + 1; // leading filter byte
171
172    if data.len() % row_stride != 0 {
173        return Err(PdfError::Corrupt(format!(
174            "PNG predictor row length mismatch: data={} stride={row_stride}",
175            data.len()
176        )));
177    }
178    let row_count = data.len() / row_stride;
179    let mut output = Vec::with_capacity(row_count * row_data_len);
180    let mut prev_row = vec![0u8; row_data_len];
181    let mut row = vec![0u8; row_data_len];
182
183    for r in 0..row_count {
184        let base = r * row_stride;
185        let filter = data[base];
186        let src = &data[base + 1..base + row_stride];
187        row.copy_from_slice(src);
188        match filter {
189            0 => {} // None
190            1 => {
191                // Sub
192                for i in 0..row_data_len {
193                    let left = if i >= bytes_per_pixel {
194                        row[i - bytes_per_pixel]
195                    } else {
196                        0
197                    };
198                    row[i] = row[i].wrapping_add(left);
199                }
200            }
201            2 => {
202                // Up
203                for i in 0..row_data_len {
204                    row[i] = row[i].wrapping_add(prev_row[i]);
205                }
206            }
207            3 => {
208                // Average
209                for i in 0..row_data_len {
210                    let left = if i >= bytes_per_pixel {
211                        row[i - bytes_per_pixel]
212                    } else {
213                        0
214                    };
215                    let up = prev_row[i];
216                    let avg = ((left as u16 + up as u16) / 2) as u8;
217                    row[i] = row[i].wrapping_add(avg);
218                }
219            }
220            4 => {
221                // Paeth
222                for i in 0..row_data_len {
223                    let left = if i >= bytes_per_pixel {
224                        row[i - bytes_per_pixel]
225                    } else {
226                        0
227                    };
228                    let up = prev_row[i];
229                    let up_left = if i >= bytes_per_pixel {
230                        prev_row[i - bytes_per_pixel]
231                    } else {
232                        0
233                    };
234                    row[i] = row[i].wrapping_add(paeth(left, up, up_left));
235                }
236            }
237            other => {
238                return Err(PdfError::Corrupt(format!(
239                    "unknown PNG row filter type {other}"
240                )));
241            }
242        }
243        output.extend_from_slice(&row);
244        prev_row.copy_from_slice(&row);
245    }
246
247    Ok(output)
248}
249
250fn paeth(a: u8, b: u8, c: u8) -> u8 {
251    let p = a as i32 + b as i32 - c as i32;
252    let pa = (p - a as i32).abs();
253    let pb = (p - b as i32).abs();
254    let pc = (p - c as i32).abs();
255    if pa <= pb && pa <= pc {
256        a
257    } else if pb <= pc {
258        b
259    } else {
260        c
261    }
262}
263
264#[cfg(test)]
265mod tests {
266    use super::*;
267    use crate::types::{PdfDictionary, PdfStream, PdfValue};
268    use flate2::{Compression, write::ZlibEncoder};
269    use std::io::Write;
270
271    fn make_stream(dict: PdfDictionary, data: Vec<u8>) -> PdfStream {
272        PdfStream { dict, data }
273    }
274
275    #[test]
276    fn passthrough_when_no_filter() {
277        let dict = PdfDictionary::new();
278        let stream = make_stream(dict, vec![1, 2, 3]);
279        assert_eq!(decode_stream(&stream).unwrap(), vec![1, 2, 3]);
280    }
281
282    #[test]
283    fn inflates_flate_decode() {
284        let raw = b"hello world";
285        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
286        encoder.write_all(raw).unwrap();
287        let compressed = encoder.finish().unwrap();
288        let mut dict = PdfDictionary::new();
289        dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
290        let stream = make_stream(dict, compressed);
291        assert_eq!(decode_stream(&stream).unwrap(), raw.to_vec());
292    }
293
294    #[test]
295    fn applies_png_up_predictor() {
296        // Original 2 rows of 4 bytes each.
297        let original: [u8; 8] = [10, 20, 30, 40, 15, 22, 33, 44];
298
299        // Encode with filter type 2 (Up) on row 2, type 0 on row 1.
300        let mut encoded = Vec::new();
301        encoded.push(0); // row 0: None
302        encoded.extend_from_slice(&original[0..4]);
303        encoded.push(2); // row 1: Up
304        let diff: Vec<u8> = original[4..8]
305            .iter()
306            .zip(original[0..4].iter())
307            .map(|(v, up)| v.wrapping_sub(*up))
308            .collect();
309        encoded.extend_from_slice(&diff);
310
311        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
312        encoder.write_all(&encoded).unwrap();
313        let compressed = encoder.finish().unwrap();
314
315        let mut dict = PdfDictionary::new();
316        dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
317        let mut parms = PdfDictionary::new();
318        parms.insert("Predictor".to_string(), PdfValue::Integer(12));
319        parms.insert("Columns".to_string(), PdfValue::Integer(4));
320        dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
321
322        let stream = make_stream(dict, compressed);
323        let decoded = decode_stream(&stream).expect("decode");
324        assert_eq!(decoded, original.to_vec());
325    }
326
327    #[test]
328    fn applies_tiff_predictor() {
329        // Original 2 rows of 4 bytes each, 1 color, 8 bits per component.
330        let original: [u8; 8] = [10, 20, 30, 40, 15, 22, 33, 44];
331
332        // TIFF predictor encodes each row independently: first byte as-is,
333        // subsequent bytes as (current - previous). No filter byte prefix.
334        let mut encoded = Vec::new();
335        for row in original.chunks(4) {
336            encoded.push(row[0]);
337            for index in 1..row.len() {
338                encoded.push(row[index].wrapping_sub(row[index - 1]));
339            }
340        }
341
342        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
343        encoder.write_all(&encoded).unwrap();
344        let compressed = encoder.finish().unwrap();
345
346        let mut dict = PdfDictionary::new();
347        dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
348        let mut parms = PdfDictionary::new();
349        parms.insert("Predictor".to_string(), PdfValue::Integer(2));
350        parms.insert("Columns".to_string(), PdfValue::Integer(4));
351        dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
352
353        let stream = make_stream(dict, compressed);
354        let decoded = decode_stream(&stream).expect("decode");
355        assert_eq!(decoded, original.to_vec());
356    }
357
358    #[test]
359    fn rejects_unsupported_predictor() {
360        let mut dict = PdfDictionary::new();
361        let mut parms = PdfDictionary::new();
362        parms.insert("Predictor".to_string(), PdfValue::Integer(3));
363        dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
364        let stream = make_stream(dict, vec![0, 0, 0, 0]);
365        match decode_stream(&stream) {
366            Err(PdfError::Unsupported(msg)) => {
367                assert!(msg.contains("predictor"), "got: {msg}")
368            }
369            other => panic!("expected Unsupported, got: {other:?}"),
370        }
371    }
372}