1use std::io::{Read, Write};
2
3use flate2::Compression;
4use flate2::read::ZlibDecoder;
5use flate2::write::ZlibEncoder;
6
7use crate::error::{PdfError, PdfResult};
8use crate::types::{PdfStream, PdfValue};
9
10pub fn flate_encode(data: &[u8]) -> PdfResult<Vec<u8>> {
14 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
15 encoder
16 .write_all(data)
17 .map_err(|error| PdfError::Corrupt(format!("flate encode failed: {error}")))?;
18 encoder
19 .finish()
20 .map_err(|error| PdfError::Corrupt(format!("flate encode finalize failed: {error}")))
21}
22
23pub fn decode_stream(stream: &PdfStream) -> PdfResult<Vec<u8>> {
24 let inflated = match stream.dict.get("Filter") {
25 None => stream.data.clone(),
26 Some(PdfValue::Name(name)) if name == "FlateDecode" => inflate(stream.data.as_slice())?,
27 Some(PdfValue::Array(filters)) if filters.len() == 1 => match filters.first() {
28 Some(PdfValue::Name(name)) if name == "FlateDecode" => {
29 inflate(stream.data.as_slice())?
30 }
31 _ => {
32 return Err(PdfError::Unsupported(
33 "only a single FlateDecode filter is supported".to_string(),
34 ));
35 }
36 },
37 Some(_) => {
38 return Err(PdfError::Unsupported(
39 "unsupported stream filter configuration".to_string(),
40 ));
41 }
42 };
43
44 apply_predictor(&inflated, stream.dict.get("DecodeParms"))
45}
46
47const MAX_DECOMPRESSED_SIZE: u64 = 256 * 1024 * 1024;
50
51fn inflate(data: &[u8]) -> PdfResult<Vec<u8>> {
52 let decoder = ZlibDecoder::new(data);
53 let mut output = Vec::new();
54 decoder
55 .take(MAX_DECOMPRESSED_SIZE + 1)
56 .read_to_end(&mut output)
57 .map_err(|error| PdfError::Corrupt(format!("failed to decode flate stream: {error}")))?;
58 if output.len() as u64 > MAX_DECOMPRESSED_SIZE {
59 return Err(PdfError::Corrupt(
60 "decompressed stream exceeds maximum allowed size".to_string(),
61 ));
62 }
63 Ok(output)
64}
65
66fn apply_predictor(data: &[u8], decode_parms: Option<&PdfValue>) -> PdfResult<Vec<u8>> {
67 let parms = match decode_parms {
68 None => return Ok(data.to_vec()),
69 Some(PdfValue::Dictionary(dict)) => dict,
70 Some(PdfValue::Null) => return Ok(data.to_vec()),
71 Some(PdfValue::Array(_)) => {
72 return Err(PdfError::Unsupported(
76 "per-filter DecodeParms arrays are not supported".to_string(),
77 ));
78 }
79 Some(_) => {
80 return Err(PdfError::Corrupt(
81 "DecodeParms is not a dictionary".to_string(),
82 ));
83 }
84 };
85
86 let predictor = parms
87 .get("Predictor")
88 .and_then(PdfValue::as_integer)
89 .unwrap_or(1);
90 match predictor {
91 1 => Ok(data.to_vec()),
92 2 => tiff_predictor_decode(data, parms),
93 10..=15 => png_predictor_decode(data, parms),
94 other => Err(PdfError::Unsupported(format!(
95 "predictor {other} is not supported"
96 ))),
97 }
98}
99
100fn tiff_predictor_decode(
101 data: &[u8],
102 parms: &crate::types::PdfDictionary,
103) -> PdfResult<Vec<u8>> {
104 let columns = parms
105 .get("Columns")
106 .and_then(PdfValue::as_integer)
107 .unwrap_or(1) as usize;
108 let colors = parms
109 .get("Colors")
110 .and_then(PdfValue::as_integer)
111 .unwrap_or(1) as usize;
112 let bits_per_component = parms
113 .get("BitsPerComponent")
114 .and_then(PdfValue::as_integer)
115 .unwrap_or(8) as usize;
116
117 if bits_per_component != 8 {
118 return Err(PdfError::Unsupported(format!(
119 "TIFF predictor with BitsPerComponent {bits_per_component} is not supported"
120 )));
121 }
122 if columns == 0 || colors == 0 {
123 return Err(PdfError::Corrupt(
124 "TIFF predictor Columns/Colors must be positive".to_string(),
125 ));
126 }
127 let row_stride = columns * colors;
128 if data.len() % row_stride != 0 {
129 return Err(PdfError::Corrupt(format!(
130 "TIFF predictor row length mismatch: data={} stride={row_stride}",
131 data.len()
132 )));
133 }
134 let mut output = Vec::with_capacity(data.len());
135 for row in data.chunks_exact(row_stride) {
136 for (component_index, byte) in row.iter().enumerate() {
137 if component_index < colors {
138 output.push(*byte);
140 } else {
141 let previous = output[output.len() - colors];
142 output.push(previous.wrapping_add(*byte));
143 }
144 }
145 }
146 Ok(output)
147}
148
149fn png_predictor_decode(data: &[u8], parms: &crate::types::PdfDictionary) -> PdfResult<Vec<u8>> {
150 let columns = parms
151 .get("Columns")
152 .and_then(PdfValue::as_integer)
153 .unwrap_or(1) as usize;
154 let colors = parms
155 .get("Colors")
156 .and_then(PdfValue::as_integer)
157 .unwrap_or(1) as usize;
158 let bits_per_component = parms
159 .get("BitsPerComponent")
160 .and_then(PdfValue::as_integer)
161 .unwrap_or(8) as usize;
162
163 if bits_per_component != 8 {
164 return Err(PdfError::Unsupported(format!(
165 "PNG predictor with BitsPerComponent {bits_per_component} is not supported"
166 )));
167 }
168 if columns == 0 || colors == 0 {
169 return Err(PdfError::Corrupt(
170 "PNG predictor Columns/Colors must be positive".to_string(),
171 ));
172 }
173 let bytes_per_pixel = colors; let row_data_len = columns * bytes_per_pixel;
175 let row_stride = row_data_len + 1; if data.len() % row_stride != 0 {
178 return Err(PdfError::Corrupt(format!(
179 "PNG predictor row length mismatch: data={} stride={row_stride}",
180 data.len()
181 )));
182 }
183 let row_count = data.len() / row_stride;
184 let mut output = Vec::with_capacity(row_count * row_data_len);
185 let mut prev_row = vec![0u8; row_data_len];
186 let mut row = vec![0u8; row_data_len];
187
188 for r in 0..row_count {
189 let base = r * row_stride;
190 let filter = data[base];
191 let src = &data[base + 1..base + row_stride];
192 row.copy_from_slice(src);
193 match filter {
194 0 => {} 1 => {
196 for i in 0..row_data_len {
198 let left = if i >= bytes_per_pixel {
199 row[i - bytes_per_pixel]
200 } else {
201 0
202 };
203 row[i] = row[i].wrapping_add(left);
204 }
205 }
206 2 => {
207 for i in 0..row_data_len {
209 row[i] = row[i].wrapping_add(prev_row[i]);
210 }
211 }
212 3 => {
213 for i in 0..row_data_len {
215 let left = if i >= bytes_per_pixel {
216 row[i - bytes_per_pixel]
217 } else {
218 0
219 };
220 let up = prev_row[i];
221 let avg = ((left as u16 + up as u16) / 2) as u8;
222 row[i] = row[i].wrapping_add(avg);
223 }
224 }
225 4 => {
226 for i in 0..row_data_len {
228 let left = if i >= bytes_per_pixel {
229 row[i - bytes_per_pixel]
230 } else {
231 0
232 };
233 let up = prev_row[i];
234 let up_left = if i >= bytes_per_pixel {
235 prev_row[i - bytes_per_pixel]
236 } else {
237 0
238 };
239 row[i] = row[i].wrapping_add(paeth(left, up, up_left));
240 }
241 }
242 other => {
243 return Err(PdfError::Corrupt(format!(
244 "unknown PNG row filter type {other}"
245 )));
246 }
247 }
248 output.extend_from_slice(&row);
249 prev_row.copy_from_slice(&row);
250 }
251
252 Ok(output)
253}
254
255fn paeth(a: u8, b: u8, c: u8) -> u8 {
256 let p = a as i32 + b as i32 - c as i32;
257 let pa = (p - a as i32).abs();
258 let pb = (p - b as i32).abs();
259 let pc = (p - c as i32).abs();
260 if pa <= pb && pa <= pc {
261 a
262 } else if pb <= pc {
263 b
264 } else {
265 c
266 }
267}
268
269#[cfg(test)]
270mod tests {
271 use super::*;
272 use crate::types::{PdfDictionary, PdfStream, PdfValue};
273 use flate2::{Compression, write::ZlibEncoder};
274 use std::io::Write;
275
276 fn make_stream(dict: PdfDictionary, data: Vec<u8>) -> PdfStream {
277 PdfStream { dict, data }
278 }
279
280 #[test]
281 fn passthrough_when_no_filter() {
282 let dict = PdfDictionary::new();
283 let stream = make_stream(dict, vec![1, 2, 3]);
284 assert_eq!(decode_stream(&stream).unwrap(), vec![1, 2, 3]);
285 }
286
287 #[test]
288 fn inflates_flate_decode() {
289 let raw = b"hello world";
290 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
291 encoder.write_all(raw).unwrap();
292 let compressed = encoder.finish().unwrap();
293 let mut dict = PdfDictionary::new();
294 dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
295 let stream = make_stream(dict, compressed);
296 assert_eq!(decode_stream(&stream).unwrap(), raw.to_vec());
297 }
298
299 #[test]
300 fn applies_png_up_predictor() {
301 let original: [u8; 8] = [10, 20, 30, 40, 15, 22, 33, 44];
303
304 let mut encoded = Vec::new();
306 encoded.push(0); encoded.extend_from_slice(&original[0..4]);
308 encoded.push(2); let diff: Vec<u8> = original[4..8]
310 .iter()
311 .zip(original[0..4].iter())
312 .map(|(v, up)| v.wrapping_sub(*up))
313 .collect();
314 encoded.extend_from_slice(&diff);
315
316 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
317 encoder.write_all(&encoded).unwrap();
318 let compressed = encoder.finish().unwrap();
319
320 let mut dict = PdfDictionary::new();
321 dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
322 let mut parms = PdfDictionary::new();
323 parms.insert("Predictor".to_string(), PdfValue::Integer(12));
324 parms.insert("Columns".to_string(), PdfValue::Integer(4));
325 dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
326
327 let stream = make_stream(dict, compressed);
328 let decoded = decode_stream(&stream).expect("decode");
329 assert_eq!(decoded, original.to_vec());
330 }
331
332 #[test]
333 fn applies_tiff_predictor() {
334 let original: [u8; 8] = [10, 20, 30, 40, 15, 22, 33, 44];
336
337 let mut encoded = Vec::new();
340 for row in original.chunks(4) {
341 encoded.push(row[0]);
342 for index in 1..row.len() {
343 encoded.push(row[index].wrapping_sub(row[index - 1]));
344 }
345 }
346
347 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
348 encoder.write_all(&encoded).unwrap();
349 let compressed = encoder.finish().unwrap();
350
351 let mut dict = PdfDictionary::new();
352 dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
353 let mut parms = PdfDictionary::new();
354 parms.insert("Predictor".to_string(), PdfValue::Integer(2));
355 parms.insert("Columns".to_string(), PdfValue::Integer(4));
356 dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
357
358 let stream = make_stream(dict, compressed);
359 let decoded = decode_stream(&stream).expect("decode");
360 assert_eq!(decoded, original.to_vec());
361 }
362
363 #[test]
364 fn rejects_unsupported_predictor() {
365 let mut dict = PdfDictionary::new();
366 let mut parms = PdfDictionary::new();
367 parms.insert("Predictor".to_string(), PdfValue::Integer(3));
368 dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
369 let stream = make_stream(dict, vec![0, 0, 0, 0]);
370 match decode_stream(&stream) {
371 Err(PdfError::Unsupported(msg)) => {
372 assert!(msg.contains("predictor"), "got: {msg}")
373 }
374 other => panic!("expected Unsupported, got: {other:?}"),
375 }
376 }
377}