Skip to main content

pdf_objects/
stream.rs

1use std::io::{Read, Write};
2
3use flate2::Compression;
4use flate2::read::ZlibDecoder;
5use flate2::write::ZlibEncoder;
6
7use crate::error::{PdfError, PdfResult};
8use crate::types::{PdfStream, PdfValue};
9
10/// Compress `data` with FlateDecode (zlib / deflate) at the default
11/// compression level. Used by the writer when re-emitting rewritten content
12/// streams so the saved PDF does not bloat with plaintext content bytes.
13pub fn flate_encode(data: &[u8]) -> PdfResult<Vec<u8>> {
14    let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
15    encoder
16        .write_all(data)
17        .map_err(|error| PdfError::Corrupt(format!("flate encode failed: {error}")))?;
18    encoder
19        .finish()
20        .map_err(|error| PdfError::Corrupt(format!("flate encode finalize failed: {error}")))
21}
22
23pub fn decode_stream(stream: &PdfStream) -> PdfResult<Vec<u8>> {
24    let inflated = match stream.dict.get("Filter") {
25        None => stream.data.clone(),
26        Some(PdfValue::Name(name)) if name == "FlateDecode" => inflate(stream.data.as_slice())?,
27        Some(PdfValue::Array(filters)) if filters.len() == 1 => match filters.first() {
28            Some(PdfValue::Name(name)) if name == "FlateDecode" => {
29                inflate(stream.data.as_slice())?
30            }
31            _ => {
32                return Err(PdfError::Unsupported(
33                    "only a single FlateDecode filter is supported".to_string(),
34                ));
35            }
36        },
37        Some(_) => {
38            return Err(PdfError::Unsupported(
39                "unsupported stream filter configuration".to_string(),
40            ));
41        }
42    };
43
44    apply_predictor(&inflated, stream.dict.get("DecodeParms"))
45}
46
47/// Maximum decompressed stream size (256 MiB). Prevents decompression bombs from
48/// exhausting memory in WASM or native contexts.
49const MAX_DECOMPRESSED_SIZE: u64 = 256 * 1024 * 1024;
50
51fn inflate(data: &[u8]) -> PdfResult<Vec<u8>> {
52    let decoder = ZlibDecoder::new(data);
53    let mut output = Vec::new();
54    decoder
55        .take(MAX_DECOMPRESSED_SIZE + 1)
56        .read_to_end(&mut output)
57        .map_err(|error| PdfError::Corrupt(format!("failed to decode flate stream: {error}")))?;
58    if output.len() as u64 > MAX_DECOMPRESSED_SIZE {
59        return Err(PdfError::Corrupt(
60            "decompressed stream exceeds maximum allowed size".to_string(),
61        ));
62    }
63    Ok(output)
64}
65
66fn apply_predictor(data: &[u8], decode_parms: Option<&PdfValue>) -> PdfResult<Vec<u8>> {
67    let parms = match decode_parms {
68        None => return Ok(data.to_vec()),
69        Some(PdfValue::Dictionary(dict)) => dict,
70        Some(PdfValue::Null) => return Ok(data.to_vec()),
71        Some(PdfValue::Array(_)) => {
72            // Per-filter DecodeParms arrays are legal when multiple filters are
73            // chained. We only support a single FlateDecode filter today so any
74            // array-valued DecodeParms is unexpected.
75            return Err(PdfError::Unsupported(
76                "per-filter DecodeParms arrays are not supported".to_string(),
77            ));
78        }
79        Some(_) => {
80            return Err(PdfError::Corrupt(
81                "DecodeParms is not a dictionary".to_string(),
82            ));
83        }
84    };
85
86    let predictor = parms
87        .get("Predictor")
88        .and_then(PdfValue::as_integer)
89        .unwrap_or(1);
90    match predictor {
91        1 => Ok(data.to_vec()),
92        2 => tiff_predictor_decode(data, parms),
93        10..=15 => png_predictor_decode(data, parms),
94        other => Err(PdfError::Unsupported(format!(
95            "predictor {other} is not supported"
96        ))),
97    }
98}
99
100fn tiff_predictor_decode(
101    data: &[u8],
102    parms: &crate::types::PdfDictionary,
103) -> PdfResult<Vec<u8>> {
104    let columns = parms
105        .get("Columns")
106        .and_then(PdfValue::as_integer)
107        .unwrap_or(1) as usize;
108    let colors = parms
109        .get("Colors")
110        .and_then(PdfValue::as_integer)
111        .unwrap_or(1) as usize;
112    let bits_per_component = parms
113        .get("BitsPerComponent")
114        .and_then(PdfValue::as_integer)
115        .unwrap_or(8) as usize;
116
117    if bits_per_component != 8 {
118        return Err(PdfError::Unsupported(format!(
119            "TIFF predictor with BitsPerComponent {bits_per_component} is not supported"
120        )));
121    }
122    if columns == 0 || colors == 0 {
123        return Err(PdfError::Corrupt(
124            "TIFF predictor Columns/Colors must be positive".to_string(),
125        ));
126    }
127    let row_stride = columns * colors;
128    if data.len() % row_stride != 0 {
129        return Err(PdfError::Corrupt(format!(
130            "TIFF predictor row length mismatch: data={} stride={row_stride}",
131            data.len()
132        )));
133    }
134    let mut output = Vec::with_capacity(data.len());
135    for row in data.chunks_exact(row_stride) {
136        for (component_index, byte) in row.iter().enumerate() {
137            if component_index < colors {
138                // First pixel in a row is stored as-is per component.
139                output.push(*byte);
140            } else {
141                let previous = output[output.len() - colors];
142                output.push(previous.wrapping_add(*byte));
143            }
144        }
145    }
146    Ok(output)
147}
148
149fn png_predictor_decode(data: &[u8], parms: &crate::types::PdfDictionary) -> PdfResult<Vec<u8>> {
150    let columns = parms
151        .get("Columns")
152        .and_then(PdfValue::as_integer)
153        .unwrap_or(1) as usize;
154    let colors = parms
155        .get("Colors")
156        .and_then(PdfValue::as_integer)
157        .unwrap_or(1) as usize;
158    let bits_per_component = parms
159        .get("BitsPerComponent")
160        .and_then(PdfValue::as_integer)
161        .unwrap_or(8) as usize;
162
163    if bits_per_component != 8 {
164        return Err(PdfError::Unsupported(format!(
165            "PNG predictor with BitsPerComponent {bits_per_component} is not supported"
166        )));
167    }
168    if columns == 0 || colors == 0 {
169        return Err(PdfError::Corrupt(
170            "PNG predictor Columns/Colors must be positive".to_string(),
171        ));
172    }
173    let bytes_per_pixel = colors; // bits_per_component == 8
174    let row_data_len = columns * bytes_per_pixel;
175    let row_stride = row_data_len + 1; // leading filter byte
176
177    if data.len() % row_stride != 0 {
178        return Err(PdfError::Corrupt(format!(
179            "PNG predictor row length mismatch: data={} stride={row_stride}",
180            data.len()
181        )));
182    }
183    let row_count = data.len() / row_stride;
184    let mut output = Vec::with_capacity(row_count * row_data_len);
185    let mut prev_row = vec![0u8; row_data_len];
186    let mut row = vec![0u8; row_data_len];
187
188    for r in 0..row_count {
189        let base = r * row_stride;
190        let filter = data[base];
191        let src = &data[base + 1..base + row_stride];
192        row.copy_from_slice(src);
193        match filter {
194            0 => {} // None
195            1 => {
196                // Sub
197                for i in 0..row_data_len {
198                    let left = if i >= bytes_per_pixel {
199                        row[i - bytes_per_pixel]
200                    } else {
201                        0
202                    };
203                    row[i] = row[i].wrapping_add(left);
204                }
205            }
206            2 => {
207                // Up
208                for i in 0..row_data_len {
209                    row[i] = row[i].wrapping_add(prev_row[i]);
210                }
211            }
212            3 => {
213                // Average
214                for i in 0..row_data_len {
215                    let left = if i >= bytes_per_pixel {
216                        row[i - bytes_per_pixel]
217                    } else {
218                        0
219                    };
220                    let up = prev_row[i];
221                    let avg = ((left as u16 + up as u16) / 2) as u8;
222                    row[i] = row[i].wrapping_add(avg);
223                }
224            }
225            4 => {
226                // Paeth
227                for i in 0..row_data_len {
228                    let left = if i >= bytes_per_pixel {
229                        row[i - bytes_per_pixel]
230                    } else {
231                        0
232                    };
233                    let up = prev_row[i];
234                    let up_left = if i >= bytes_per_pixel {
235                        prev_row[i - bytes_per_pixel]
236                    } else {
237                        0
238                    };
239                    row[i] = row[i].wrapping_add(paeth(left, up, up_left));
240                }
241            }
242            other => {
243                return Err(PdfError::Corrupt(format!(
244                    "unknown PNG row filter type {other}"
245                )));
246            }
247        }
248        output.extend_from_slice(&row);
249        prev_row.copy_from_slice(&row);
250    }
251
252    Ok(output)
253}
254
255fn paeth(a: u8, b: u8, c: u8) -> u8 {
256    let p = a as i32 + b as i32 - c as i32;
257    let pa = (p - a as i32).abs();
258    let pb = (p - b as i32).abs();
259    let pc = (p - c as i32).abs();
260    if pa <= pb && pa <= pc {
261        a
262    } else if pb <= pc {
263        b
264    } else {
265        c
266    }
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272    use crate::types::{PdfDictionary, PdfStream, PdfValue};
273    use flate2::{Compression, write::ZlibEncoder};
274    use std::io::Write;
275
276    fn make_stream(dict: PdfDictionary, data: Vec<u8>) -> PdfStream {
277        PdfStream { dict, data }
278    }
279
280    #[test]
281    fn passthrough_when_no_filter() {
282        let dict = PdfDictionary::new();
283        let stream = make_stream(dict, vec![1, 2, 3]);
284        assert_eq!(decode_stream(&stream).unwrap(), vec![1, 2, 3]);
285    }
286
287    #[test]
288    fn inflates_flate_decode() {
289        let raw = b"hello world";
290        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
291        encoder.write_all(raw).unwrap();
292        let compressed = encoder.finish().unwrap();
293        let mut dict = PdfDictionary::new();
294        dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
295        let stream = make_stream(dict, compressed);
296        assert_eq!(decode_stream(&stream).unwrap(), raw.to_vec());
297    }
298
299    #[test]
300    fn applies_png_up_predictor() {
301        // Original 2 rows of 4 bytes each.
302        let original: [u8; 8] = [10, 20, 30, 40, 15, 22, 33, 44];
303
304        // Encode with filter type 2 (Up) on row 2, type 0 on row 1.
305        let mut encoded = Vec::new();
306        encoded.push(0); // row 0: None
307        encoded.extend_from_slice(&original[0..4]);
308        encoded.push(2); // row 1: Up
309        let diff: Vec<u8> = original[4..8]
310            .iter()
311            .zip(original[0..4].iter())
312            .map(|(v, up)| v.wrapping_sub(*up))
313            .collect();
314        encoded.extend_from_slice(&diff);
315
316        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
317        encoder.write_all(&encoded).unwrap();
318        let compressed = encoder.finish().unwrap();
319
320        let mut dict = PdfDictionary::new();
321        dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
322        let mut parms = PdfDictionary::new();
323        parms.insert("Predictor".to_string(), PdfValue::Integer(12));
324        parms.insert("Columns".to_string(), PdfValue::Integer(4));
325        dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
326
327        let stream = make_stream(dict, compressed);
328        let decoded = decode_stream(&stream).expect("decode");
329        assert_eq!(decoded, original.to_vec());
330    }
331
332    #[test]
333    fn applies_tiff_predictor() {
334        // Original 2 rows of 4 bytes each, 1 color, 8 bits per component.
335        let original: [u8; 8] = [10, 20, 30, 40, 15, 22, 33, 44];
336
337        // TIFF predictor encodes each row independently: first byte as-is,
338        // subsequent bytes as (current - previous). No filter byte prefix.
339        let mut encoded = Vec::new();
340        for row in original.chunks(4) {
341            encoded.push(row[0]);
342            for index in 1..row.len() {
343                encoded.push(row[index].wrapping_sub(row[index - 1]));
344            }
345        }
346
347        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
348        encoder.write_all(&encoded).unwrap();
349        let compressed = encoder.finish().unwrap();
350
351        let mut dict = PdfDictionary::new();
352        dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
353        let mut parms = PdfDictionary::new();
354        parms.insert("Predictor".to_string(), PdfValue::Integer(2));
355        parms.insert("Columns".to_string(), PdfValue::Integer(4));
356        dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
357
358        let stream = make_stream(dict, compressed);
359        let decoded = decode_stream(&stream).expect("decode");
360        assert_eq!(decoded, original.to_vec());
361    }
362
363    #[test]
364    fn rejects_unsupported_predictor() {
365        let mut dict = PdfDictionary::new();
366        let mut parms = PdfDictionary::new();
367        parms.insert("Predictor".to_string(), PdfValue::Integer(3));
368        dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
369        let stream = make_stream(dict, vec![0, 0, 0, 0]);
370        match decode_stream(&stream) {
371            Err(PdfError::Unsupported(msg)) => {
372                assert!(msg.contains("predictor"), "got: {msg}")
373            }
374            other => panic!("expected Unsupported, got: {other:?}"),
375        }
376    }
377}