1use std::io::{Read, Write};
2
3use flate2::Compression;
4use flate2::read::ZlibDecoder;
5use flate2::write::ZlibEncoder;
6
7use crate::error::{PdfError, PdfResult};
8use crate::types::{PdfStream, PdfValue};
9
10pub fn flate_encode(data: &[u8]) -> PdfResult<Vec<u8>> {
14 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
15 encoder
16 .write_all(data)
17 .map_err(|error| PdfError::Corrupt(format!("flate encode failed: {error}")))?;
18 encoder
19 .finish()
20 .map_err(|error| PdfError::Corrupt(format!("flate encode finalize failed: {error}")))
21}
22
23pub fn decode_stream(stream: &PdfStream) -> PdfResult<Vec<u8>> {
24 let filter_names = normalize_filter_list(stream.dict.get("Filter"))?;
25 let decode_parms = stream.dict.get("DecodeParms");
26 let mut decoded = stream.data.clone();
27 for (index, filter_name) in filter_names.iter().enumerate() {
28 let is_last = index + 1 == filter_names.len();
29 decoded = match filter_name.as_str() {
30 "LZWDecode" | "LZW" => {
33 let early_change = if is_last {
34 lzw_early_change(decode_parms)?
35 } else {
36 true
37 };
38 lzw_decode(&decoded, early_change)?
39 }
40 _ => apply_filter(filter_name, &decoded)?,
41 };
42 }
43 apply_predictor(&decoded, decode_parms)
44}
45
46fn normalize_filter_list(value: Option<&PdfValue>) -> PdfResult<Vec<String>> {
50 match value {
51 None => Ok(Vec::new()),
52 Some(PdfValue::Null) => Ok(Vec::new()),
53 Some(PdfValue::Name(name)) => Ok(vec![name.clone()]),
54 Some(PdfValue::Array(items)) => {
55 let mut names = Vec::with_capacity(items.len());
56 for item in items {
57 match item {
58 PdfValue::Name(name) => names.push(name.clone()),
59 _ => {
60 return Err(PdfError::Corrupt(
61 "stream /Filter array contains a non-name entry".to_string(),
62 ));
63 }
64 }
65 }
66 Ok(names)
67 }
68 Some(_) => Err(PdfError::Corrupt(
69 "stream /Filter is neither a name nor an array of names".to_string(),
70 )),
71 }
72}
73
74fn apply_filter(filter: &str, data: &[u8]) -> PdfResult<Vec<u8>> {
75 match filter {
76 "FlateDecode" | "Fl" => inflate(data),
77 "ASCII85Decode" | "A85" => ascii85_decode(data),
78 "ASCIIHexDecode" | "AHx" => ascii_hex_decode(data),
79 "LZWDecode" | "LZW" => lzw_decode(data, true),
80 "RunLengthDecode" | "RL" => run_length_decode(data),
81 other => Err(PdfError::Unsupported(format!(
82 "stream filter /{other} is not supported"
83 ))),
84 }
85}
86
87fn lzw_early_change(decode_parms: Option<&PdfValue>) -> PdfResult<bool> {
93 let Some(value) = decode_parms else {
94 return Ok(true);
95 };
96 let dict = match value {
97 PdfValue::Dictionary(dict) => dict,
98 PdfValue::Null => return Ok(true),
99 PdfValue::Array(_) => {
100 return Err(PdfError::Unsupported(
101 "per-filter DecodeParms arrays are not supported".to_string(),
102 ));
103 }
104 _ => {
105 return Err(PdfError::Corrupt(
106 "DecodeParms is not a dictionary".to_string(),
107 ));
108 }
109 };
110 match dict.get("EarlyChange").and_then(PdfValue::as_integer) {
111 None => Ok(true),
112 Some(1) => Ok(true),
113 Some(0) => Ok(false),
114 Some(other) => Err(PdfError::Corrupt(format!(
115 "unsupported LZW EarlyChange value {other}"
116 ))),
117 }
118}
119
120const MAX_DECOMPRESSED_SIZE: u64 = 256 * 1024 * 1024;
123
124fn inflate(data: &[u8]) -> PdfResult<Vec<u8>> {
125 let decoder = ZlibDecoder::new(data);
126 let mut output = Vec::new();
127 decoder
128 .take(MAX_DECOMPRESSED_SIZE + 1)
129 .read_to_end(&mut output)
130 .map_err(|error| PdfError::Corrupt(format!("failed to decode flate stream: {error}")))?;
131 if output.len() as u64 > MAX_DECOMPRESSED_SIZE {
132 return Err(PdfError::Corrupt(
133 "decompressed stream exceeds maximum allowed size".to_string(),
134 ));
135 }
136 Ok(output)
137}
138
139fn lzw_decode(data: &[u8], early_change: bool) -> PdfResult<Vec<u8>> {
146 const CLEAR: u32 = 256;
147 const EOD: u32 = 257;
148 const MAX_WIDTH: u32 = 12;
149 let width_threshold = |width: u32| {
150 if early_change {
151 (1u32 << width) - 1
152 } else {
153 1u32 << width
154 }
155 };
156
157 let mut reader = BitReader::new(data);
158 let mut dict: Vec<Vec<u8>> = Vec::with_capacity(1 << MAX_WIDTH);
159 let reset_dict = |dict: &mut Vec<Vec<u8>>| {
160 dict.clear();
161 for byte in 0u32..256 {
162 dict.push(vec![byte as u8]);
163 }
164 dict.push(Vec::new()); dict.push(Vec::new()); };
167 reset_dict(&mut dict);
168
169 let mut output: Vec<u8> = Vec::new();
170 let mut code_width: u32 = 9;
171 let mut previous: Option<Vec<u8>> = None;
172 loop {
173 let Some(code) = reader.read_bits(code_width) else {
174 break;
175 };
176 if code == EOD {
177 break;
178 }
179 if code == CLEAR {
180 reset_dict(&mut dict);
181 code_width = 9;
182 previous = None;
183 continue;
184 }
185 let entry = if (code as usize) < dict.len() {
186 let entry = dict[code as usize].clone();
187 if entry.is_empty() {
188 return Err(PdfError::Corrupt(format!(
189 "LZW code {code} references placeholder entry"
190 )));
191 }
192 entry
193 } else if code as usize == dict.len() {
194 let prev = previous
198 .clone()
199 .ok_or_else(|| PdfError::Corrupt("LZW code out of sequence".to_string()))?;
200 let first = *prev
201 .first()
202 .ok_or_else(|| PdfError::Corrupt("LZW previous entry was empty".to_string()))?;
203 let mut entry = prev;
204 entry.push(first);
205 entry
206 } else {
207 return Err(PdfError::Corrupt(format!(
208 "LZW code {code} outside dictionary"
209 )));
210 };
211 if output.len() + entry.len() > MAX_DECOMPRESSED_SIZE as usize {
212 return Err(PdfError::Corrupt(
213 "decompressed stream exceeds maximum allowed size".to_string(),
214 ));
215 }
216 output.extend_from_slice(&entry);
217 if let Some(prev_entry) = previous.take() {
218 let mut new_entry = prev_entry;
219 new_entry.push(entry[0]);
220 if dict.len() < (1 << MAX_WIDTH) {
221 dict.push(new_entry);
222 }
223 if (dict.len() as u32).saturating_add(1) >= width_threshold(code_width)
230 && code_width < MAX_WIDTH
231 {
232 code_width += 1;
233 }
234 }
235 previous = Some(entry);
236 }
237 Ok(output)
238}
239
240struct BitReader<'a> {
247 data: &'a [u8],
248 byte_index: usize,
249 bit_buffer: u32,
250 bit_count: u32,
251}
252
253impl<'a> BitReader<'a> {
254 fn new(data: &'a [u8]) -> Self {
255 BitReader {
256 data,
257 byte_index: 0,
258 bit_buffer: 0,
259 bit_count: 0,
260 }
261 }
262
263 fn read_bits(&mut self, width: u32) -> Option<u32> {
264 while self.bit_count < width {
265 if self.byte_index >= self.data.len() {
266 if self.bit_count == 0 {
267 return None;
268 }
269 let pad = width - self.bit_count;
271 self.bit_buffer <<= pad;
272 let mask = (1u32 << width) - 1;
273 let code = self.bit_buffer & mask;
274 self.bit_count = 0;
275 self.bit_buffer = 0;
276 return Some(code);
277 }
278 self.bit_buffer = (self.bit_buffer << 8) | u32::from(self.data[self.byte_index]);
279 self.byte_index += 1;
280 self.bit_count += 8;
281 }
282 self.bit_count -= width;
283 let mask = (1u32 << width) - 1;
284 let code = (self.bit_buffer >> self.bit_count) & mask;
285 self.bit_buffer &= (1u32 << self.bit_count) - 1;
286 Some(code)
287 }
288}
289
290fn ascii85_decode(data: &[u8]) -> PdfResult<Vec<u8>> {
295 let mut output = Vec::with_capacity(data.len());
296 let mut group = [0u8; 5];
297 let mut group_len = 0usize;
298
299 for &byte in data {
300 if byte == b'~' {
301 break; }
303 if matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | 0x0C) {
304 continue;
305 }
306 if byte == b'z' {
307 if group_len != 0 {
308 return Err(PdfError::Corrupt(
309 "ASCII85 'z' shortcut inside a partial group".to_string(),
310 ));
311 }
312 output.extend_from_slice(&[0u8; 4]);
313 continue;
314 }
315 if !(b'!'..=b'u').contains(&byte) {
316 return Err(PdfError::Corrupt(format!(
317 "invalid ASCII85 byte 0x{byte:02X}"
318 )));
319 }
320 group[group_len] = byte - b'!';
321 group_len += 1;
322 if group_len == 5 {
323 let value = (group[0] as u64) * 85u64.pow(4)
324 + (group[1] as u64) * 85u64.pow(3)
325 + (group[2] as u64) * 85u64.pow(2)
326 + (group[3] as u64) * 85
327 + (group[4] as u64);
328 if value > u32::MAX as u64 {
329 return Err(PdfError::Corrupt(
330 "ASCII85 group value exceeds 32 bits".to_string(),
331 ));
332 }
333 output.extend_from_slice(&(value as u32).to_be_bytes());
334 group_len = 0;
335 }
336 }
337
338 if group_len > 0 {
339 if group_len == 1 {
340 return Err(PdfError::Corrupt(
341 "ASCII85 final group contains a single byte".to_string(),
342 ));
343 }
344 for entry in group.iter_mut().skip(group_len) {
346 *entry = 84;
347 }
348 let value = (group[0] as u64) * 85u64.pow(4)
349 + (group[1] as u64) * 85u64.pow(3)
350 + (group[2] as u64) * 85u64.pow(2)
351 + (group[3] as u64) * 85
352 + (group[4] as u64);
353 let bytes = (value as u32).to_be_bytes();
354 output.extend_from_slice(&bytes[..group_len - 1]);
355 }
356
357 Ok(output)
358}
359
360fn ascii_hex_decode(data: &[u8]) -> PdfResult<Vec<u8>> {
364 let mut output = Vec::with_capacity(data.len() / 2 + 1);
365 let mut high: Option<u8> = None;
366 for &byte in data {
367 if byte == b'>' {
368 break;
369 }
370 if matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | 0x0C) {
371 continue;
372 }
373 let nibble = match byte {
374 b'0'..=b'9' => byte - b'0',
375 b'a'..=b'f' => byte - b'a' + 10,
376 b'A'..=b'F' => byte - b'A' + 10,
377 _ => {
378 return Err(PdfError::Corrupt(format!(
379 "invalid ASCIIHex byte 0x{byte:02X}"
380 )));
381 }
382 };
383 match high.take() {
384 None => high = Some(nibble),
385 Some(h) => output.push((h << 4) | nibble),
386 }
387 }
388 if let Some(h) = high {
389 output.push(h << 4);
390 }
391 Ok(output)
392}
393
394fn run_length_decode(data: &[u8]) -> PdfResult<Vec<u8>> {
401 let mut output: Vec<u8> = Vec::with_capacity(data.len());
402 let mut index = 0usize;
403 while index < data.len() {
404 let length_byte = data[index];
405 index += 1;
406 if length_byte == 128 {
407 return Ok(output);
408 }
409 if length_byte < 128 {
410 let run_len = usize::from(length_byte) + 1;
411 let end = index
412 .checked_add(run_len)
413 .ok_or_else(|| PdfError::Corrupt("RunLengthDecode index overflow".to_string()))?;
414 if end > data.len() {
415 return Err(PdfError::Corrupt(
416 "RunLengthDecode literal run runs past end of stream".to_string(),
417 ));
418 }
419 output.extend_from_slice(&data[index..end]);
420 index = end;
421 } else {
422 let repeat = 257usize - usize::from(length_byte);
423 if index >= data.len() {
424 return Err(PdfError::Corrupt(
425 "RunLengthDecode repeat run is missing its payload byte".to_string(),
426 ));
427 }
428 let byte = data[index];
429 index += 1;
430 output.extend(std::iter::repeat_n(byte, repeat));
431 }
432 if output.len() as u64 > MAX_DECOMPRESSED_SIZE {
433 return Err(PdfError::Corrupt(
434 "decompressed stream exceeds maximum allowed size".to_string(),
435 ));
436 }
437 }
438 Ok(output)
439}
440
441fn apply_predictor(data: &[u8], decode_parms: Option<&PdfValue>) -> PdfResult<Vec<u8>> {
442 let parms = match decode_parms {
443 None => return Ok(data.to_vec()),
444 Some(PdfValue::Dictionary(dict)) => dict,
445 Some(PdfValue::Null) => return Ok(data.to_vec()),
446 Some(PdfValue::Array(_)) => {
447 return Err(PdfError::Unsupported(
451 "per-filter DecodeParms arrays are not supported".to_string(),
452 ));
453 }
454 Some(_) => {
455 return Err(PdfError::Corrupt(
456 "DecodeParms is not a dictionary".to_string(),
457 ));
458 }
459 };
460
461 let predictor = parms
462 .get("Predictor")
463 .and_then(PdfValue::as_integer)
464 .unwrap_or(1);
465 match predictor {
466 1 => Ok(data.to_vec()),
467 2 => tiff_predictor_decode(data, parms),
468 10..=15 => png_predictor_decode(data, parms),
469 other => Err(PdfError::Unsupported(format!(
470 "predictor {other} is not supported"
471 ))),
472 }
473}
474
475fn tiff_predictor_decode(data: &[u8], parms: &crate::types::PdfDictionary) -> PdfResult<Vec<u8>> {
476 let columns = parms
477 .get("Columns")
478 .and_then(PdfValue::as_integer)
479 .unwrap_or(1) as usize;
480 let colors = parms
481 .get("Colors")
482 .and_then(PdfValue::as_integer)
483 .unwrap_or(1) as usize;
484 let bits_per_component = parms
485 .get("BitsPerComponent")
486 .and_then(PdfValue::as_integer)
487 .unwrap_or(8) as usize;
488
489 if bits_per_component != 8 {
490 return Err(PdfError::Unsupported(format!(
491 "TIFF predictor with BitsPerComponent {bits_per_component} is not supported"
492 )));
493 }
494 if columns == 0 || colors == 0 {
495 return Err(PdfError::Corrupt(
496 "TIFF predictor Columns/Colors must be positive".to_string(),
497 ));
498 }
499 let row_stride = columns * colors;
500 if data.len() % row_stride != 0 {
501 return Err(PdfError::Corrupt(format!(
502 "TIFF predictor row length mismatch: data={} stride={row_stride}",
503 data.len()
504 )));
505 }
506 let mut output = Vec::with_capacity(data.len());
507 for row in data.chunks_exact(row_stride) {
508 for (component_index, byte) in row.iter().enumerate() {
509 if component_index < colors {
510 output.push(*byte);
512 } else {
513 let previous = output[output.len() - colors];
514 output.push(previous.wrapping_add(*byte));
515 }
516 }
517 }
518 Ok(output)
519}
520
521fn png_predictor_decode(data: &[u8], parms: &crate::types::PdfDictionary) -> PdfResult<Vec<u8>> {
522 let columns = parms
523 .get("Columns")
524 .and_then(PdfValue::as_integer)
525 .unwrap_or(1) as usize;
526 let colors = parms
527 .get("Colors")
528 .and_then(PdfValue::as_integer)
529 .unwrap_or(1) as usize;
530 let bits_per_component = parms
531 .get("BitsPerComponent")
532 .and_then(PdfValue::as_integer)
533 .unwrap_or(8) as usize;
534
535 if bits_per_component != 8 {
536 return Err(PdfError::Unsupported(format!(
537 "PNG predictor with BitsPerComponent {bits_per_component} is not supported"
538 )));
539 }
540 if columns == 0 || colors == 0 {
541 return Err(PdfError::Corrupt(
542 "PNG predictor Columns/Colors must be positive".to_string(),
543 ));
544 }
545 let bytes_per_pixel = colors; let row_data_len = columns * bytes_per_pixel;
547 let row_stride = row_data_len + 1; if data.len() % row_stride != 0 {
550 return Err(PdfError::Corrupt(format!(
551 "PNG predictor row length mismatch: data={} stride={row_stride}",
552 data.len()
553 )));
554 }
555 let row_count = data.len() / row_stride;
556 let mut output = Vec::with_capacity(row_count * row_data_len);
557 let mut prev_row = vec![0u8; row_data_len];
558 let mut row = vec![0u8; row_data_len];
559
560 for r in 0..row_count {
561 let base = r * row_stride;
562 let filter = data[base];
563 let src = &data[base + 1..base + row_stride];
564 row.copy_from_slice(src);
565 match filter {
566 0 => {} 1 => {
568 for i in 0..row_data_len {
570 let left = if i >= bytes_per_pixel {
571 row[i - bytes_per_pixel]
572 } else {
573 0
574 };
575 row[i] = row[i].wrapping_add(left);
576 }
577 }
578 2 => {
579 for i in 0..row_data_len {
581 row[i] = row[i].wrapping_add(prev_row[i]);
582 }
583 }
584 3 => {
585 for i in 0..row_data_len {
587 let left = if i >= bytes_per_pixel {
588 row[i - bytes_per_pixel]
589 } else {
590 0
591 };
592 let up = prev_row[i];
593 let avg = ((left as u16 + up as u16) / 2) as u8;
594 row[i] = row[i].wrapping_add(avg);
595 }
596 }
597 4 => {
598 for i in 0..row_data_len {
600 let left = if i >= bytes_per_pixel {
601 row[i - bytes_per_pixel]
602 } else {
603 0
604 };
605 let up = prev_row[i];
606 let up_left = if i >= bytes_per_pixel {
607 prev_row[i - bytes_per_pixel]
608 } else {
609 0
610 };
611 row[i] = row[i].wrapping_add(paeth(left, up, up_left));
612 }
613 }
614 other => {
615 return Err(PdfError::Corrupt(format!(
616 "unknown PNG row filter type {other}"
617 )));
618 }
619 }
620 output.extend_from_slice(&row);
621 prev_row.copy_from_slice(&row);
622 }
623
624 Ok(output)
625}
626
627fn paeth(a: u8, b: u8, c: u8) -> u8 {
628 let p = a as i32 + b as i32 - c as i32;
629 let pa = (p - a as i32).abs();
630 let pb = (p - b as i32).abs();
631 let pc = (p - c as i32).abs();
632 if pa <= pb && pa <= pc {
633 a
634 } else if pb <= pc {
635 b
636 } else {
637 c
638 }
639}
640
641#[cfg(test)]
642mod tests {
643 use super::*;
644 use crate::types::{PdfDictionary, PdfStream, PdfValue};
645 use flate2::{Compression, write::ZlibEncoder};
646 use std::io::Write;
647
648 fn make_stream(dict: PdfDictionary, data: Vec<u8>) -> PdfStream {
649 PdfStream { dict, data }
650 }
651
652 #[test]
653 fn passthrough_when_no_filter() {
654 let dict = PdfDictionary::new();
655 let stream = make_stream(dict, vec![1, 2, 3]);
656 assert_eq!(decode_stream(&stream).unwrap(), vec![1, 2, 3]);
657 }
658
659 #[test]
660 fn inflates_flate_decode() {
661 let raw = b"hello world";
662 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
663 encoder.write_all(raw).unwrap();
664 let compressed = encoder.finish().unwrap();
665 let mut dict = PdfDictionary::new();
666 dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
667 let stream = make_stream(dict, compressed);
668 assert_eq!(decode_stream(&stream).unwrap(), raw.to_vec());
669 }
670
671 #[test]
672 fn applies_png_up_predictor() {
673 let original: [u8; 8] = [10, 20, 30, 40, 15, 22, 33, 44];
675
676 let mut encoded = Vec::new();
678 encoded.push(0); encoded.extend_from_slice(&original[0..4]);
680 encoded.push(2); let diff: Vec<u8> = original[4..8]
682 .iter()
683 .zip(original[0..4].iter())
684 .map(|(v, up)| v.wrapping_sub(*up))
685 .collect();
686 encoded.extend_from_slice(&diff);
687
688 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
689 encoder.write_all(&encoded).unwrap();
690 let compressed = encoder.finish().unwrap();
691
692 let mut dict = PdfDictionary::new();
693 dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
694 let mut parms = PdfDictionary::new();
695 parms.insert("Predictor".to_string(), PdfValue::Integer(12));
696 parms.insert("Columns".to_string(), PdfValue::Integer(4));
697 dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
698
699 let stream = make_stream(dict, compressed);
700 let decoded = decode_stream(&stream).expect("decode");
701 assert_eq!(decoded, original.to_vec());
702 }
703
704 #[test]
705 fn applies_tiff_predictor() {
706 let original: [u8; 8] = [10, 20, 30, 40, 15, 22, 33, 44];
708
709 let mut encoded = Vec::new();
712 for row in original.chunks(4) {
713 encoded.push(row[0]);
714 for index in 1..row.len() {
715 encoded.push(row[index].wrapping_sub(row[index - 1]));
716 }
717 }
718
719 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
720 encoder.write_all(&encoded).unwrap();
721 let compressed = encoder.finish().unwrap();
722
723 let mut dict = PdfDictionary::new();
724 dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
725 let mut parms = PdfDictionary::new();
726 parms.insert("Predictor".to_string(), PdfValue::Integer(2));
727 parms.insert("Columns".to_string(), PdfValue::Integer(4));
728 dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
729
730 let stream = make_stream(dict, compressed);
731 let decoded = decode_stream(&stream).expect("decode");
732 assert_eq!(decoded, original.to_vec());
733 }
734
735 #[test]
736 fn decodes_ascii85_full_group() {
737 let encoded = b"9jqo^~>".to_vec();
739 let mut dict = PdfDictionary::new();
740 dict.insert("Filter".to_string(), PdfValue::Name("ASCII85Decode".into()));
741 let stream = make_stream(dict, encoded);
742 assert_eq!(decode_stream(&stream).unwrap(), b"Man ".to_vec());
743 }
744
745 #[test]
746 fn decodes_ascii85_z_shortcut() {
747 let encoded = b"z~>".to_vec();
748 let mut dict = PdfDictionary::new();
749 dict.insert("Filter".to_string(), PdfValue::Name("ASCII85Decode".into()));
750 let stream = make_stream(dict, encoded);
751 assert_eq!(decode_stream(&stream).unwrap(), vec![0, 0, 0, 0]);
752 }
753
754 #[test]
755 fn decodes_filter_chain_ascii85_then_flate() {
756 let plaintext = b"PdfStreamFilterChainTest".to_vec();
761 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
762 encoder.write_all(&plaintext).unwrap();
763 let flate_bytes = encoder.finish().unwrap();
764
765 let mut ascii85 = String::new();
767 for chunk in flate_bytes.chunks(4) {
768 let mut buf = [0u8; 4];
769 buf[..chunk.len()].copy_from_slice(chunk);
770 let value = u32::from_be_bytes(buf);
771 if chunk.len() == 4 && value == 0 {
772 ascii85.push('z');
773 continue;
774 }
775 let mut digits = [0u8; 5];
776 let mut v = value as u64;
777 for i in (0..5).rev() {
778 digits[i] = (v % 85) as u8 + b'!';
779 v /= 85;
780 }
781 let take = chunk.len() + 1;
782 for &digit in &digits[..take] {
783 ascii85.push(digit as char);
784 }
785 }
786 ascii85.push_str("~>");
787
788 let mut dict = PdfDictionary::new();
789 dict.insert(
790 "Filter".to_string(),
791 PdfValue::Array(vec![
792 PdfValue::Name("ASCII85Decode".into()),
793 PdfValue::Name("FlateDecode".into()),
794 ]),
795 );
796 let stream = make_stream(dict, ascii85.into_bytes());
797 assert_eq!(decode_stream(&stream).unwrap(), plaintext);
798 }
799
800 #[test]
801 fn decodes_ascii_hex() {
802 let encoded = b"48656C6C6F>".to_vec();
803 let mut dict = PdfDictionary::new();
804 dict.insert(
805 "Filter".to_string(),
806 PdfValue::Name("ASCIIHexDecode".into()),
807 );
808 let stream = make_stream(dict, encoded);
809 assert_eq!(decode_stream(&stream).unwrap(), b"Hello".to_vec());
810 }
811
812 #[test]
813 fn rejects_unsupported_predictor() {
814 let mut dict = PdfDictionary::new();
815 let mut parms = PdfDictionary::new();
816 parms.insert("Predictor".to_string(), PdfValue::Integer(3));
817 dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
818 let stream = make_stream(dict, vec![0, 0, 0, 0]);
819 match decode_stream(&stream) {
820 Err(PdfError::Unsupported(msg)) => {
821 assert!(msg.contains("predictor"), "got: {msg}")
822 }
823 other => panic!("expected Unsupported, got: {other:?}"),
824 }
825 }
826
827 fn encode_lzw(input: &[u8], early_change: bool) -> Vec<u8> {
831 use std::collections::HashMap;
832
833 let mut out: Vec<u8> = Vec::new();
834 let mut bit_buffer: u64 = 0;
835 let mut bit_count: u32 = 0;
836 let flush_code = |code: u32,
837 width: u32,
838 bit_buffer: &mut u64,
839 bit_count: &mut u32,
840 out: &mut Vec<u8>| {
841 *bit_buffer = (*bit_buffer << width) | u64::from(code);
842 *bit_count += width;
843 while *bit_count >= 8 {
844 *bit_count -= 8;
845 out.push(((*bit_buffer >> *bit_count) & 0xFF) as u8);
846 *bit_buffer &= (1u64 << *bit_count) - 1;
847 }
848 };
849
850 flush_code(256, 9, &mut bit_buffer, &mut bit_count, &mut out);
852
853 let mut dict: HashMap<Vec<u8>, u32> = HashMap::new();
854 for b in 0u32..256 {
855 dict.insert(vec![b as u8], b);
856 }
857 let mut next_code: u32 = 258;
858 let mut code_width: u32 = 9;
859
860 let mut buffer: Vec<u8> = Vec::new();
861 for &byte in input {
862 let mut extended = buffer.clone();
863 extended.push(byte);
864 if dict.contains_key(&extended) {
865 buffer = extended;
866 } else {
867 let code = dict[&buffer];
868 flush_code(code, code_width, &mut bit_buffer, &mut bit_count, &mut out);
869 dict.insert(extended, next_code);
870 next_code += 1;
871 let threshold = if early_change {
872 (1u32 << code_width) - 1
873 } else {
874 1u32 << code_width
875 };
876 if next_code >= threshold && code_width < 12 {
877 code_width += 1;
878 }
879 buffer = vec![byte];
880 }
881 }
882 if !buffer.is_empty() {
883 let code = dict[&buffer];
884 flush_code(code, code_width, &mut bit_buffer, &mut bit_count, &mut out);
885 }
886 flush_code(257, code_width, &mut bit_buffer, &mut bit_count, &mut out);
887 if bit_count > 0 {
888 out.push(((bit_buffer << (8 - bit_count)) & 0xFF) as u8);
889 }
890 out
891 }
892
893 #[test]
894 fn decodes_lzw_spec_example() {
895 let data = vec![0x80, 0x0B, 0x60, 0x50, 0x22, 0x0C, 0x0C, 0x85, 0x01];
899 let mut dict = PdfDictionary::new();
900 dict.insert("Filter".to_string(), PdfValue::Name("LZWDecode".into()));
901 let stream = make_stream(dict, data);
902 assert_eq!(decode_stream(&stream).unwrap(), b"-----A---B".to_vec());
903 }
904
905 #[test]
906 fn decodes_lzw_roundtrip_default_early_change() {
907 let plaintext = b"the quick brown fox jumps over the lazy dog".to_vec();
908 let encoded = encode_lzw(&plaintext, true);
909 let mut dict = PdfDictionary::new();
910 dict.insert("Filter".to_string(), PdfValue::Name("LZWDecode".into()));
911 let stream = make_stream(dict, encoded);
912 assert_eq!(decode_stream(&stream).unwrap(), plaintext);
913 }
914
915 #[test]
916 fn decodes_lzw_roundtrip_early_change_zero() {
917 let plaintext = b"the quick brown fox jumps over the lazy dog".to_vec();
918 let encoded = encode_lzw(&plaintext, false);
919 let mut dict = PdfDictionary::new();
920 dict.insert("Filter".to_string(), PdfValue::Name("LZWDecode".into()));
921 let mut parms = PdfDictionary::new();
922 parms.insert("EarlyChange".to_string(), PdfValue::Integer(0));
923 dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
924 let stream = make_stream(dict, encoded);
925 assert_eq!(decode_stream(&stream).unwrap(), plaintext);
926 }
927
928 #[test]
929 fn decodes_lzw_with_tiff_predictor() {
930 let original: [u8; 8] = [10, 20, 30, 40, 15, 22, 33, 44];
935 let mut predictor_encoded = Vec::new();
936 for row in original.chunks(4) {
937 predictor_encoded.push(row[0]);
938 for index in 1..row.len() {
939 predictor_encoded.push(row[index].wrapping_sub(row[index - 1]));
940 }
941 }
942 let lzw_bytes = encode_lzw(&predictor_encoded, true);
943 let mut dict = PdfDictionary::new();
944 dict.insert("Filter".to_string(), PdfValue::Name("LZWDecode".into()));
945 let mut parms = PdfDictionary::new();
946 parms.insert("Predictor".to_string(), PdfValue::Integer(2));
947 parms.insert("Columns".to_string(), PdfValue::Integer(4));
948 dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
949 let stream = make_stream(dict, lzw_bytes);
950 assert_eq!(decode_stream(&stream).unwrap(), original.to_vec());
951 }
952
953 #[test]
954 fn decodes_lzw_exercises_code_width_transitions() {
955 let mut plaintext = Vec::new();
960 for i in 0u16..1200 {
961 plaintext.push(b'a' + (i % 26) as u8);
962 plaintext.push(b'A' + (i % 26) as u8);
963 plaintext.push(b'0' + (i % 10) as u8);
964 }
965 let encoded = encode_lzw(&plaintext, true);
966 let mut dict = PdfDictionary::new();
967 dict.insert("Filter".to_string(), PdfValue::Name("LZWDecode".into()));
968 let stream = make_stream(dict, encoded);
969 assert_eq!(decode_stream(&stream).unwrap(), plaintext);
970 }
971
972 #[test]
973 fn decodes_run_length_literal_runs() {
974 let encoded = vec![2, b'A', b'B', b'C', 128];
976 let mut dict = PdfDictionary::new();
977 dict.insert(
978 "Filter".to_string(),
979 PdfValue::Name("RunLengthDecode".into()),
980 );
981 let stream = make_stream(dict, encoded);
982 assert_eq!(decode_stream(&stream).unwrap(), b"ABC".to_vec());
983 }
984
985 #[test]
986 fn decodes_run_length_repeat_runs() {
987 let encoded = vec![0xFF, b'Z', 128];
989 let mut dict = PdfDictionary::new();
990 dict.insert("Filter".to_string(), PdfValue::Name("RL".into()));
991 let stream = make_stream(dict, encoded);
992 assert_eq!(decode_stream(&stream).unwrap(), b"ZZ".to_vec());
993 }
994
995 #[test]
996 fn decodes_run_length_mixed_runs_without_eod() {
997 let encoded = vec![0, b'A', 0xFE, b'B', 1, b'C', b'D'];
1001 let mut dict = PdfDictionary::new();
1002 dict.insert(
1003 "Filter".to_string(),
1004 PdfValue::Name("RunLengthDecode".into()),
1005 );
1006 let stream = make_stream(dict, encoded);
1007 assert_eq!(decode_stream(&stream).unwrap(), b"ABBBCD".to_vec());
1008 }
1009
1010 #[test]
1011 fn decodes_filter_chain_run_length_then_flate() {
1012 let plaintext = b"RunLengthInsideAFilterChain".to_vec();
1017 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
1018 encoder.write_all(&plaintext).unwrap();
1019 let flate_bytes = encoder.finish().unwrap();
1020
1021 let mut rl_bytes = Vec::new();
1024 let mut offset = 0usize;
1025 while offset < flate_bytes.len() {
1026 let run_len = (flate_bytes.len() - offset).min(128);
1027 rl_bytes.push((run_len - 1) as u8);
1028 rl_bytes.extend_from_slice(&flate_bytes[offset..offset + run_len]);
1029 offset += run_len;
1030 }
1031 rl_bytes.push(128);
1032
1033 let mut dict = PdfDictionary::new();
1034 dict.insert(
1035 "Filter".to_string(),
1036 PdfValue::Array(vec![
1037 PdfValue::Name("RunLengthDecode".into()),
1038 PdfValue::Name("FlateDecode".into()),
1039 ]),
1040 );
1041 let stream = make_stream(dict, rl_bytes);
1042 assert_eq!(decode_stream(&stream).unwrap(), plaintext);
1043 }
1044
1045 #[test]
1046 fn rejects_run_length_truncated_literal_run() {
1047 let encoded = vec![3, b'A', b'B'];
1049 let mut dict = PdfDictionary::new();
1050 dict.insert(
1051 "Filter".to_string(),
1052 PdfValue::Name("RunLengthDecode".into()),
1053 );
1054 let stream = make_stream(dict, encoded);
1055 let err = decode_stream(&stream).unwrap_err();
1056 assert!(matches!(err, PdfError::Corrupt(_)), "got: {err:?}");
1057 }
1058
1059 #[test]
1060 fn rejects_run_length_truncated_repeat_run() {
1061 let encoded = vec![200];
1064 let mut dict = PdfDictionary::new();
1065 dict.insert(
1066 "Filter".to_string(),
1067 PdfValue::Name("RunLengthDecode".into()),
1068 );
1069 let stream = make_stream(dict, encoded);
1070 let err = decode_stream(&stream).unwrap_err();
1071 assert!(matches!(err, PdfError::Corrupt(_)), "got: {err:?}");
1072 }
1073
1074 #[test]
1075 fn rejects_lzw_out_of_range_code() {
1076 let mut out: Vec<u8> = Vec::new();
1080 let mut bit_buffer: u64 = 0;
1081 let mut bit_count: u32 = 0;
1082 let mut push = |code: u32, width: u32| {
1083 bit_buffer = (bit_buffer << width) | u64::from(code);
1084 bit_count += width;
1085 while bit_count >= 8 {
1086 bit_count -= 8;
1087 out.push(((bit_buffer >> bit_count) & 0xFF) as u8);
1088 bit_buffer &= (1u64 << bit_count) - 1;
1089 }
1090 };
1091 push(256, 9); push(511, 9); if bit_count > 0 {
1094 out.push(((bit_buffer << (8 - bit_count)) & 0xFF) as u8);
1095 }
1096 let mut dict = PdfDictionary::new();
1097 dict.insert("Filter".to_string(), PdfValue::Name("LZWDecode".into()));
1098 let stream = make_stream(dict, out);
1099 let err = decode_stream(&stream).unwrap_err();
1100 assert!(matches!(err, PdfError::Corrupt(_)), "got: {err:?}");
1101 }
1102}