1use std::io::{Read, Write};
2
3use flate2::Compression;
4use flate2::read::ZlibDecoder;
5use flate2::write::ZlibEncoder;
6
7use crate::error::{PdfError, PdfResult};
8use crate::types::{PdfStream, PdfValue};
9
10pub fn flate_encode(data: &[u8]) -> PdfResult<Vec<u8>> {
14 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
15 encoder
16 .write_all(data)
17 .map_err(|error| PdfError::Corrupt(format!("flate encode failed: {error}")))?;
18 encoder
19 .finish()
20 .map_err(|error| PdfError::Corrupt(format!("flate encode finalize failed: {error}")))
21}
22
23pub fn decode_stream(stream: &PdfStream) -> PdfResult<Vec<u8>> {
24 let filter_names = normalize_filter_list(stream.dict.get("Filter"))?;
25 let mut decoded = stream.data.clone();
26 for filter_name in &filter_names {
27 decoded = apply_filter(filter_name, &decoded)?;
28 }
29 apply_predictor(&decoded, stream.dict.get("DecodeParms"))
30}
31
32fn normalize_filter_list(value: Option<&PdfValue>) -> PdfResult<Vec<String>> {
36 match value {
37 None => Ok(Vec::new()),
38 Some(PdfValue::Null) => Ok(Vec::new()),
39 Some(PdfValue::Name(name)) => Ok(vec![name.clone()]),
40 Some(PdfValue::Array(items)) => {
41 let mut names = Vec::with_capacity(items.len());
42 for item in items {
43 match item {
44 PdfValue::Name(name) => names.push(name.clone()),
45 _ => {
46 return Err(PdfError::Corrupt(
47 "stream /Filter array contains a non-name entry".to_string(),
48 ));
49 }
50 }
51 }
52 Ok(names)
53 }
54 Some(_) => Err(PdfError::Corrupt(
55 "stream /Filter is neither a name nor an array of names".to_string(),
56 )),
57 }
58}
59
60fn apply_filter(filter: &str, data: &[u8]) -> PdfResult<Vec<u8>> {
61 match filter {
62 "FlateDecode" | "Fl" => inflate(data),
63 "ASCII85Decode" | "A85" => ascii85_decode(data),
64 "ASCIIHexDecode" | "AHx" => ascii_hex_decode(data),
65 other => Err(PdfError::Unsupported(format!(
66 "stream filter /{other} is not supported"
67 ))),
68 }
69}
70
71const MAX_DECOMPRESSED_SIZE: u64 = 256 * 1024 * 1024;
74
75fn inflate(data: &[u8]) -> PdfResult<Vec<u8>> {
76 let decoder = ZlibDecoder::new(data);
77 let mut output = Vec::new();
78 decoder
79 .take(MAX_DECOMPRESSED_SIZE + 1)
80 .read_to_end(&mut output)
81 .map_err(|error| PdfError::Corrupt(format!("failed to decode flate stream: {error}")))?;
82 if output.len() as u64 > MAX_DECOMPRESSED_SIZE {
83 return Err(PdfError::Corrupt(
84 "decompressed stream exceeds maximum allowed size".to_string(),
85 ));
86 }
87 Ok(output)
88}
89
90fn ascii85_decode(data: &[u8]) -> PdfResult<Vec<u8>> {
95 let mut output = Vec::with_capacity(data.len());
96 let mut group = [0u8; 5];
97 let mut group_len = 0usize;
98
99 for &byte in data {
100 if byte == b'~' {
101 break; }
103 if matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | 0x0C) {
104 continue;
105 }
106 if byte == b'z' {
107 if group_len != 0 {
108 return Err(PdfError::Corrupt(
109 "ASCII85 'z' shortcut inside a partial group".to_string(),
110 ));
111 }
112 output.extend_from_slice(&[0u8; 4]);
113 continue;
114 }
115 if !(b'!'..=b'u').contains(&byte) {
116 return Err(PdfError::Corrupt(format!(
117 "invalid ASCII85 byte 0x{byte:02X}"
118 )));
119 }
120 group[group_len] = byte - b'!';
121 group_len += 1;
122 if group_len == 5 {
123 let value = (group[0] as u64) * 85u64.pow(4)
124 + (group[1] as u64) * 85u64.pow(3)
125 + (group[2] as u64) * 85u64.pow(2)
126 + (group[3] as u64) * 85
127 + (group[4] as u64);
128 if value > u32::MAX as u64 {
129 return Err(PdfError::Corrupt(
130 "ASCII85 group value exceeds 32 bits".to_string(),
131 ));
132 }
133 output.extend_from_slice(&(value as u32).to_be_bytes());
134 group_len = 0;
135 }
136 }
137
138 if group_len > 0 {
139 if group_len == 1 {
140 return Err(PdfError::Corrupt(
141 "ASCII85 final group contains a single byte".to_string(),
142 ));
143 }
144 for entry in group.iter_mut().skip(group_len) {
146 *entry = 84;
147 }
148 let value = (group[0] as u64) * 85u64.pow(4)
149 + (group[1] as u64) * 85u64.pow(3)
150 + (group[2] as u64) * 85u64.pow(2)
151 + (group[3] as u64) * 85
152 + (group[4] as u64);
153 let bytes = (value as u32).to_be_bytes();
154 output.extend_from_slice(&bytes[..group_len - 1]);
155 }
156
157 Ok(output)
158}
159
160fn ascii_hex_decode(data: &[u8]) -> PdfResult<Vec<u8>> {
164 let mut output = Vec::with_capacity(data.len() / 2 + 1);
165 let mut high: Option<u8> = None;
166 for &byte in data {
167 if byte == b'>' {
168 break;
169 }
170 if matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | 0x0C) {
171 continue;
172 }
173 let nibble = match byte {
174 b'0'..=b'9' => byte - b'0',
175 b'a'..=b'f' => byte - b'a' + 10,
176 b'A'..=b'F' => byte - b'A' + 10,
177 _ => {
178 return Err(PdfError::Corrupt(format!(
179 "invalid ASCIIHex byte 0x{byte:02X}"
180 )));
181 }
182 };
183 match high.take() {
184 None => high = Some(nibble),
185 Some(h) => output.push((h << 4) | nibble),
186 }
187 }
188 if let Some(h) = high {
189 output.push(h << 4);
190 }
191 Ok(output)
192}
193
194fn apply_predictor(data: &[u8], decode_parms: Option<&PdfValue>) -> PdfResult<Vec<u8>> {
195 let parms = match decode_parms {
196 None => return Ok(data.to_vec()),
197 Some(PdfValue::Dictionary(dict)) => dict,
198 Some(PdfValue::Null) => return Ok(data.to_vec()),
199 Some(PdfValue::Array(_)) => {
200 return Err(PdfError::Unsupported(
204 "per-filter DecodeParms arrays are not supported".to_string(),
205 ));
206 }
207 Some(_) => {
208 return Err(PdfError::Corrupt(
209 "DecodeParms is not a dictionary".to_string(),
210 ));
211 }
212 };
213
214 let predictor = parms
215 .get("Predictor")
216 .and_then(PdfValue::as_integer)
217 .unwrap_or(1);
218 match predictor {
219 1 => Ok(data.to_vec()),
220 2 => tiff_predictor_decode(data, parms),
221 10..=15 => png_predictor_decode(data, parms),
222 other => Err(PdfError::Unsupported(format!(
223 "predictor {other} is not supported"
224 ))),
225 }
226}
227
228fn tiff_predictor_decode(data: &[u8], parms: &crate::types::PdfDictionary) -> PdfResult<Vec<u8>> {
229 let columns = parms
230 .get("Columns")
231 .and_then(PdfValue::as_integer)
232 .unwrap_or(1) as usize;
233 let colors = parms
234 .get("Colors")
235 .and_then(PdfValue::as_integer)
236 .unwrap_or(1) as usize;
237 let bits_per_component = parms
238 .get("BitsPerComponent")
239 .and_then(PdfValue::as_integer)
240 .unwrap_or(8) as usize;
241
242 if bits_per_component != 8 {
243 return Err(PdfError::Unsupported(format!(
244 "TIFF predictor with BitsPerComponent {bits_per_component} is not supported"
245 )));
246 }
247 if columns == 0 || colors == 0 {
248 return Err(PdfError::Corrupt(
249 "TIFF predictor Columns/Colors must be positive".to_string(),
250 ));
251 }
252 let row_stride = columns * colors;
253 if data.len() % row_stride != 0 {
254 return Err(PdfError::Corrupt(format!(
255 "TIFF predictor row length mismatch: data={} stride={row_stride}",
256 data.len()
257 )));
258 }
259 let mut output = Vec::with_capacity(data.len());
260 for row in data.chunks_exact(row_stride) {
261 for (component_index, byte) in row.iter().enumerate() {
262 if component_index < colors {
263 output.push(*byte);
265 } else {
266 let previous = output[output.len() - colors];
267 output.push(previous.wrapping_add(*byte));
268 }
269 }
270 }
271 Ok(output)
272}
273
274fn png_predictor_decode(data: &[u8], parms: &crate::types::PdfDictionary) -> PdfResult<Vec<u8>> {
275 let columns = parms
276 .get("Columns")
277 .and_then(PdfValue::as_integer)
278 .unwrap_or(1) as usize;
279 let colors = parms
280 .get("Colors")
281 .and_then(PdfValue::as_integer)
282 .unwrap_or(1) as usize;
283 let bits_per_component = parms
284 .get("BitsPerComponent")
285 .and_then(PdfValue::as_integer)
286 .unwrap_or(8) as usize;
287
288 if bits_per_component != 8 {
289 return Err(PdfError::Unsupported(format!(
290 "PNG predictor with BitsPerComponent {bits_per_component} is not supported"
291 )));
292 }
293 if columns == 0 || colors == 0 {
294 return Err(PdfError::Corrupt(
295 "PNG predictor Columns/Colors must be positive".to_string(),
296 ));
297 }
298 let bytes_per_pixel = colors; let row_data_len = columns * bytes_per_pixel;
300 let row_stride = row_data_len + 1; if data.len() % row_stride != 0 {
303 return Err(PdfError::Corrupt(format!(
304 "PNG predictor row length mismatch: data={} stride={row_stride}",
305 data.len()
306 )));
307 }
308 let row_count = data.len() / row_stride;
309 let mut output = Vec::with_capacity(row_count * row_data_len);
310 let mut prev_row = vec![0u8; row_data_len];
311 let mut row = vec![0u8; row_data_len];
312
313 for r in 0..row_count {
314 let base = r * row_stride;
315 let filter = data[base];
316 let src = &data[base + 1..base + row_stride];
317 row.copy_from_slice(src);
318 match filter {
319 0 => {} 1 => {
321 for i in 0..row_data_len {
323 let left = if i >= bytes_per_pixel {
324 row[i - bytes_per_pixel]
325 } else {
326 0
327 };
328 row[i] = row[i].wrapping_add(left);
329 }
330 }
331 2 => {
332 for i in 0..row_data_len {
334 row[i] = row[i].wrapping_add(prev_row[i]);
335 }
336 }
337 3 => {
338 for i in 0..row_data_len {
340 let left = if i >= bytes_per_pixel {
341 row[i - bytes_per_pixel]
342 } else {
343 0
344 };
345 let up = prev_row[i];
346 let avg = ((left as u16 + up as u16) / 2) as u8;
347 row[i] = row[i].wrapping_add(avg);
348 }
349 }
350 4 => {
351 for i in 0..row_data_len {
353 let left = if i >= bytes_per_pixel {
354 row[i - bytes_per_pixel]
355 } else {
356 0
357 };
358 let up = prev_row[i];
359 let up_left = if i >= bytes_per_pixel {
360 prev_row[i - bytes_per_pixel]
361 } else {
362 0
363 };
364 row[i] = row[i].wrapping_add(paeth(left, up, up_left));
365 }
366 }
367 other => {
368 return Err(PdfError::Corrupt(format!(
369 "unknown PNG row filter type {other}"
370 )));
371 }
372 }
373 output.extend_from_slice(&row);
374 prev_row.copy_from_slice(&row);
375 }
376
377 Ok(output)
378}
379
380fn paeth(a: u8, b: u8, c: u8) -> u8 {
381 let p = a as i32 + b as i32 - c as i32;
382 let pa = (p - a as i32).abs();
383 let pb = (p - b as i32).abs();
384 let pc = (p - c as i32).abs();
385 if pa <= pb && pa <= pc {
386 a
387 } else if pb <= pc {
388 b
389 } else {
390 c
391 }
392}
393
394#[cfg(test)]
395mod tests {
396 use super::*;
397 use crate::types::{PdfDictionary, PdfStream, PdfValue};
398 use flate2::{Compression, write::ZlibEncoder};
399 use std::io::Write;
400
401 fn make_stream(dict: PdfDictionary, data: Vec<u8>) -> PdfStream {
402 PdfStream { dict, data }
403 }
404
405 #[test]
406 fn passthrough_when_no_filter() {
407 let dict = PdfDictionary::new();
408 let stream = make_stream(dict, vec![1, 2, 3]);
409 assert_eq!(decode_stream(&stream).unwrap(), vec![1, 2, 3]);
410 }
411
412 #[test]
413 fn inflates_flate_decode() {
414 let raw = b"hello world";
415 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
416 encoder.write_all(raw).unwrap();
417 let compressed = encoder.finish().unwrap();
418 let mut dict = PdfDictionary::new();
419 dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
420 let stream = make_stream(dict, compressed);
421 assert_eq!(decode_stream(&stream).unwrap(), raw.to_vec());
422 }
423
424 #[test]
425 fn applies_png_up_predictor() {
426 let original: [u8; 8] = [10, 20, 30, 40, 15, 22, 33, 44];
428
429 let mut encoded = Vec::new();
431 encoded.push(0); encoded.extend_from_slice(&original[0..4]);
433 encoded.push(2); let diff: Vec<u8> = original[4..8]
435 .iter()
436 .zip(original[0..4].iter())
437 .map(|(v, up)| v.wrapping_sub(*up))
438 .collect();
439 encoded.extend_from_slice(&diff);
440
441 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
442 encoder.write_all(&encoded).unwrap();
443 let compressed = encoder.finish().unwrap();
444
445 let mut dict = PdfDictionary::new();
446 dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
447 let mut parms = PdfDictionary::new();
448 parms.insert("Predictor".to_string(), PdfValue::Integer(12));
449 parms.insert("Columns".to_string(), PdfValue::Integer(4));
450 dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
451
452 let stream = make_stream(dict, compressed);
453 let decoded = decode_stream(&stream).expect("decode");
454 assert_eq!(decoded, original.to_vec());
455 }
456
457 #[test]
458 fn applies_tiff_predictor() {
459 let original: [u8; 8] = [10, 20, 30, 40, 15, 22, 33, 44];
461
462 let mut encoded = Vec::new();
465 for row in original.chunks(4) {
466 encoded.push(row[0]);
467 for index in 1..row.len() {
468 encoded.push(row[index].wrapping_sub(row[index - 1]));
469 }
470 }
471
472 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
473 encoder.write_all(&encoded).unwrap();
474 let compressed = encoder.finish().unwrap();
475
476 let mut dict = PdfDictionary::new();
477 dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
478 let mut parms = PdfDictionary::new();
479 parms.insert("Predictor".to_string(), PdfValue::Integer(2));
480 parms.insert("Columns".to_string(), PdfValue::Integer(4));
481 dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
482
483 let stream = make_stream(dict, compressed);
484 let decoded = decode_stream(&stream).expect("decode");
485 assert_eq!(decoded, original.to_vec());
486 }
487
488 #[test]
489 fn decodes_ascii85_full_group() {
490 let encoded = b"9jqo^~>".to_vec();
492 let mut dict = PdfDictionary::new();
493 dict.insert("Filter".to_string(), PdfValue::Name("ASCII85Decode".into()));
494 let stream = make_stream(dict, encoded);
495 assert_eq!(decode_stream(&stream).unwrap(), b"Man ".to_vec());
496 }
497
498 #[test]
499 fn decodes_ascii85_z_shortcut() {
500 let encoded = b"z~>".to_vec();
501 let mut dict = PdfDictionary::new();
502 dict.insert("Filter".to_string(), PdfValue::Name("ASCII85Decode".into()));
503 let stream = make_stream(dict, encoded);
504 assert_eq!(decode_stream(&stream).unwrap(), vec![0, 0, 0, 0]);
505 }
506
507 #[test]
508 fn decodes_filter_chain_ascii85_then_flate() {
509 let plaintext = b"PdfStreamFilterChainTest".to_vec();
514 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
515 encoder.write_all(&plaintext).unwrap();
516 let flate_bytes = encoder.finish().unwrap();
517
518 let mut ascii85 = String::new();
520 for chunk in flate_bytes.chunks(4) {
521 let mut buf = [0u8; 4];
522 buf[..chunk.len()].copy_from_slice(chunk);
523 let value = u32::from_be_bytes(buf);
524 if chunk.len() == 4 && value == 0 {
525 ascii85.push('z');
526 continue;
527 }
528 let mut digits = [0u8; 5];
529 let mut v = value as u64;
530 for i in (0..5).rev() {
531 digits[i] = (v % 85) as u8 + b'!';
532 v /= 85;
533 }
534 let take = chunk.len() + 1;
535 for &digit in &digits[..take] {
536 ascii85.push(digit as char);
537 }
538 }
539 ascii85.push_str("~>");
540
541 let mut dict = PdfDictionary::new();
542 dict.insert(
543 "Filter".to_string(),
544 PdfValue::Array(vec![
545 PdfValue::Name("ASCII85Decode".into()),
546 PdfValue::Name("FlateDecode".into()),
547 ]),
548 );
549 let stream = make_stream(dict, ascii85.into_bytes());
550 assert_eq!(decode_stream(&stream).unwrap(), plaintext);
551 }
552
553 #[test]
554 fn decodes_ascii_hex() {
555 let encoded = b"48656C6C6F>".to_vec();
556 let mut dict = PdfDictionary::new();
557 dict.insert(
558 "Filter".to_string(),
559 PdfValue::Name("ASCIIHexDecode".into()),
560 );
561 let stream = make_stream(dict, encoded);
562 assert_eq!(decode_stream(&stream).unwrap(), b"Hello".to_vec());
563 }
564
565 #[test]
566 fn rejects_unsupported_predictor() {
567 let mut dict = PdfDictionary::new();
568 let mut parms = PdfDictionary::new();
569 parms.insert("Predictor".to_string(), PdfValue::Integer(3));
570 dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
571 let stream = make_stream(dict, vec![0, 0, 0, 0]);
572 match decode_stream(&stream) {
573 Err(PdfError::Unsupported(msg)) => {
574 assert!(msg.contains("predictor"), "got: {msg}")
575 }
576 other => panic!("expected Unsupported, got: {other:?}"),
577 }
578 }
579}