1use super::objects::{PdfDictionary, PdfObject};
12use super::{ParseError, ParseOptions, ParseResult};
13
14#[cfg(feature = "compression")]
15use flate2::read::ZlibDecoder;
16use std::io::Read;
17
18const MAX_DECOMPRESSED_SIZE: usize = 256 * 1024 * 1024;
27
28const MAX_COMPRESSION_RATIO: usize = 1000;
34
35fn read_to_end_limited<R: Read>(reader: &mut R, max_bytes: usize) -> std::io::Result<Vec<u8>> {
40 let mut result = Vec::new();
41 let mut buffer = [0u8; 16384];
42
43 loop {
44 match reader.read(&mut buffer) {
45 Ok(0) => break,
46 Ok(n) => {
47 if result.len() + n > max_bytes {
48 return Err(std::io::Error::new(
49 std::io::ErrorKind::Other,
50 format!(
51 "Decompressed size exceeds limit of {} bytes ({} MB). \
52 Possible decompression bomb.",
53 max_bytes,
54 max_bytes / (1024 * 1024)
55 ),
56 ));
57 }
58 result.extend_from_slice(&buffer[..n]);
59 }
60 Err(e) => return Err(e),
61 }
62 }
63
64 Ok(result)
65}
66
67fn check_compression_ratio(input_size: usize, output_size: usize) -> Result<(), std::io::Error> {
72 if input_size > 0 && output_size / input_size > MAX_COMPRESSION_RATIO {
73 return Err(std::io::Error::new(
74 std::io::ErrorKind::Other,
75 format!(
76 "Suspicious compression ratio {}:1 (input={}B, output={}B). \
77 Max allowed ratio is {}:1.",
78 output_size / input_size,
79 input_size,
80 output_size,
81 MAX_COMPRESSION_RATIO
82 ),
83 ));
84 }
85 Ok(())
86}
87
88use super::filter_impls::ccitt::decode_ccitt;
90use super::filter_impls::dct::decode_dct;
91use super::filter_impls::jbig2::decode_jbig2;
92pub use super::filter_impls::ccitt::decode_ccitt as decode_ccitt_public;
94pub use super::filter_impls::dct::{parse_jpeg_info, JpegColorSpace, JpegInfo};
95pub use super::filter_impls::jbig2::decode_jbig2 as decode_jbig2_public;
96
97#[derive(Debug, Clone, PartialEq)]
99pub enum Filter {
100 ASCIIHexDecode,
102
103 ASCII85Decode,
105
106 LZWDecode,
108
109 FlateDecode,
111
112 RunLengthDecode,
114
115 CCITTFaxDecode,
117
118 JBIG2Decode,
120
121 DCTDecode,
123
124 JPXDecode,
126
127 Crypt,
129}
130
131impl Filter {
132 pub fn from_name(name: &str) -> Option<Self> {
134 match name {
135 "ASCIIHexDecode" => Some(Filter::ASCIIHexDecode),
136 "ASCII85Decode" => Some(Filter::ASCII85Decode),
137 "LZWDecode" => Some(Filter::LZWDecode),
138 "FlateDecode" => Some(Filter::FlateDecode),
139 "RunLengthDecode" => Some(Filter::RunLengthDecode),
140 "CCITTFaxDecode" => Some(Filter::CCITTFaxDecode),
141 "JBIG2Decode" => Some(Filter::JBIG2Decode),
142 "DCTDecode" => Some(Filter::DCTDecode),
143 "JPXDecode" => Some(Filter::JPXDecode),
144 "Crypt" => Some(Filter::Crypt),
145 _ => None,
146 }
147 }
148}
149
150pub fn decode_stream(
152 data: &[u8],
153 dict: &PdfDictionary,
154 _options: &ParseOptions,
155) -> ParseResult<Vec<u8>> {
156 let filters = match dict.get("Filter") {
158 Some(PdfObject::Name(name)) => vec![name.as_str()],
159 Some(PdfObject::Array(array)) => {
160 let mut filter_names = Vec::new();
161 for obj in &array.0 {
162 if let PdfObject::Name(name) = obj {
163 filter_names.push(name.as_str());
164 } else {
165 return Err(ParseError::SyntaxError {
166 position: 0,
167 message: "Invalid filter in array".to_string(),
168 });
169 }
170 }
171 filter_names
172 }
173 None => {
174 return Ok(data.to_vec());
176 }
177 _ => {
178 return Err(ParseError::SyntaxError {
179 position: 0,
180 message: "Invalid Filter type".to_string(),
181 });
182 }
183 };
184
185 let decode_params = dict.get("DecodeParms");
187
188 let mut result = data.to_vec();
190 for (i, filter_name) in filters.iter().enumerate() {
191 let filter = Filter::from_name(filter_name).ok_or_else(|| ParseError::SyntaxError {
192 position: 0,
193 message: format!("Unknown filter: {filter_name}"),
194 })?;
195
196 let filter_params = get_filter_params(decode_params, i);
198
199 result = apply_filter_with_params(&result, filter, filter_params)?;
200 }
201
202 Ok(result)
203}
204
205#[allow(dead_code)]
207pub(crate) fn apply_filter(data: &[u8], filter: Filter) -> ParseResult<Vec<u8>> {
208 match filter {
209 Filter::FlateDecode => decode_flate(data),
210 Filter::ASCIIHexDecode => decode_ascii_hex(data),
211 Filter::ASCII85Decode => decode_ascii85(data),
212 Filter::LZWDecode => decode_lzw(data, None),
213 Filter::RunLengthDecode => decode_run_length(data),
214 Filter::CCITTFaxDecode => decode_ccitt(data, None),
215 Filter::JBIG2Decode => decode_jbig2(data, None),
216 Filter::DCTDecode => decode_dct(data),
217 _ => Err(ParseError::SyntaxError {
218 position: 0,
219 message: format!("Filter {filter:?} not yet implemented"),
220 }),
221 }
222}
223
224#[cfg(feature = "compression")]
226fn decode_flate(data: &[u8]) -> ParseResult<Vec<u8>> {
227 if let Ok(result) = try_standard_zlib_decode(data) {
229 return Ok(result);
230 }
231
232 if let Ok(result) = try_raw_deflate_decode(data) {
234 return Ok(result);
235 }
236
237 if data.len() > 10 {
239 for skip_bytes in 1..=5 {
240 if let Ok(result) = try_standard_zlib_decode(&data[skip_bytes..]) {
241 return Ok(result);
242 }
243 if let Ok(result) = try_raw_deflate_decode(&data[skip_bytes..]) {
244 return Ok(result);
245 }
246 }
247 }
248
249 if data.len() > 20 {
251 for truncate_bytes in 1..=10 {
252 let truncated = &data[..data.len() - truncate_bytes];
253 if let Ok(result) = try_standard_zlib_decode(truncated) {
254 return Ok(result);
255 }
256 if let Ok(result) = try_raw_deflate_decode(truncated) {
257 return Ok(result);
258 }
259 }
260 }
261
262 if let Ok(result) = try_gzip_decode(data) {
264 return Ok(result);
265 }
266
267 if let Ok(partial) = try_partial_flate_decode(data) {
269 tracing::debug!(
270 "Warning: Using partial FlateDecode recovery, {} bytes recovered",
271 partial.len()
272 );
273 return Ok(partial);
274 }
275
276 if data.len() > 20 {
278 for predictor in [10, 11, 12, 13, 14, 15] {
279 if let Ok(result) = try_flate_decode_with_predictor(data, predictor) {
280 tracing::debug!(
281 "Warning: FlateDecode succeeded with predictor {}",
282 predictor
283 );
284 return Ok(result);
285 }
286 }
287 }
288
289 tracing::debug!("Warning: All FlateDecode strategies failed, returning empty data");
291 Ok(Vec::new())
292}
293
294#[cfg(feature = "compression")]
295fn try_standard_zlib_decode(data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
296 let mut decoder = ZlibDecoder::new(data);
297 let result = read_to_end_limited(&mut decoder, MAX_DECOMPRESSED_SIZE)?;
298 check_compression_ratio(data.len(), result.len())?;
299 Ok(result)
300}
301
302#[cfg(feature = "compression")]
303fn try_raw_deflate_decode(data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
304 use flate2::read::DeflateDecoder;
305 let mut decoder = DeflateDecoder::new(data);
306 let result = read_to_end_limited(&mut decoder, MAX_DECOMPRESSED_SIZE)?;
307 check_compression_ratio(data.len(), result.len())?;
308 Ok(result)
309}
310
311#[cfg(feature = "compression")]
312fn try_gzip_decode(data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
313 use flate2::read::GzDecoder;
314 let mut decoder = GzDecoder::new(data);
315 let result = read_to_end_limited(&mut decoder, MAX_DECOMPRESSED_SIZE)?;
316 check_compression_ratio(data.len(), result.len())?;
317 Ok(result)
318}
319
320#[cfg(feature = "compression")]
321fn try_partial_flate_decode(data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
322 use flate2::read::ZlibDecoder;
323 use std::io::ErrorKind;
324
325 let mut decoder = ZlibDecoder::new(data);
327 let mut result = Vec::new();
328 let mut buffer = [0; 8192];
329
330 loop {
331 match decoder.read(&mut buffer) {
332 Ok(0) => break, Ok(n) => {
334 if result.len() + n > MAX_DECOMPRESSED_SIZE {
335 return Err(std::io::Error::new(
336 ErrorKind::Other,
337 format!(
338 "Partial decompression exceeds {} MB limit",
339 MAX_DECOMPRESSED_SIZE / (1024 * 1024)
340 ),
341 ));
342 }
343 result.extend_from_slice(&buffer[..n]);
344 }
345 Err(e) if e.kind() == ErrorKind::UnexpectedEof => {
346 if !result.is_empty() {
348 check_compression_ratio(data.len(), result.len())?;
349 return Ok(result);
350 }
351 return Err(e);
352 }
353 Err(e) => return Err(e),
354 }
355 }
356
357 if result.is_empty() {
358 Err(std::io::Error::new(
359 ErrorKind::InvalidData,
360 "No data decoded",
361 ))
362 } else {
363 check_compression_ratio(data.len(), result.len())?;
364 Ok(result)
365 }
366}
367
368#[cfg(feature = "compression")]
369fn try_flate_decode_with_predictor(data: &[u8], predictor: u8) -> Result<Vec<u8>, std::io::Error> {
370 use flate2::read::ZlibDecoder;
371
372 let mut decoder = ZlibDecoder::new(data);
374 let raw_data = read_to_end_limited(&mut decoder, MAX_DECOMPRESSED_SIZE)?;
375 check_compression_ratio(data.len(), raw_data.len())?;
376
377 if predictor >= 10 && predictor <= 15 {
379 apply_png_predictor(&raw_data, predictor)
380 } else {
381 Ok(raw_data)
382 }
383}
384
385#[cfg(feature = "compression")]
386fn apply_png_predictor(data: &[u8], predictor: u8) -> Result<Vec<u8>, std::io::Error> {
387 if data.is_empty() {
388 return Ok(data.to_vec());
389 }
390
391 let common_widths = [1, 2, 3, 4, 8, 16, 24, 32, 48, 64, 96, 128];
394
395 for &width in &common_widths {
396 if let Ok(result) = apply_png_predictor_with_width(data, predictor, width) {
397 if result.len() > data.len() / 2 && result.len() < data.len() * 2 {
399 return Ok(result);
400 }
401 }
402 }
403
404 Ok(data.to_vec())
406}
407
408#[cfg(feature = "compression")]
409fn apply_png_predictor_with_width(
410 data: &[u8],
411 _predictor: u8,
412 width: usize,
413) -> Result<Vec<u8>, std::io::Error> {
414 use std::io::{Error, ErrorKind};
415
416 if width == 0 || data.len() % (width + 1) != 0 {
417 return Err(Error::new(ErrorKind::InvalidInput, "Invalid width"));
418 }
419
420 let mut result = Vec::new();
421 let row_len = width + 1; for row_data in data.chunks_exact(row_len) {
424 if row_data.is_empty() {
425 continue;
426 }
427
428 let predictor_byte = row_data[0];
429 let row = &row_data[1..];
430
431 match predictor_byte {
432 0 => {
433 result.extend_from_slice(row);
435 }
436 1 => {
437 result.push(row[0]);
439 for i in 1..row.len() {
440 let prev = if i >= width {
441 result[result.len() - width]
442 } else {
443 0
444 };
445 result.push(row[i].wrapping_add(prev));
446 }
447 }
448 2 => {
449 for i in 0..row.len() {
451 let up = if result.len() >= width {
452 result[result.len() - width + i]
453 } else {
454 0
455 };
456 result.push(row[i].wrapping_add(up));
457 }
458 }
459 _ => {
460 result.extend_from_slice(row);
462 }
463 }
464 }
465
466 Ok(result)
467}
468
469#[cfg(not(feature = "compression"))]
470fn decode_flate(_data: &[u8]) -> ParseResult<Vec<u8>> {
471 Err(ParseError::StreamDecodeError(
472 "FlateDecode requires 'compression' feature".to_string(),
473 ))
474}
475
476fn decode_ascii_hex(data: &[u8]) -> ParseResult<Vec<u8>> {
478 let mut result = Vec::new();
479 let mut chars = data.iter().filter(|&&b| !b.is_ascii_whitespace());
480
481 loop {
482 let high = match chars.next() {
483 Some(&b'>') => break, Some(&ch) => ch,
485 None => break,
486 };
487
488 let low = match chars.next() {
489 Some(&b'>') => {
490 b'0'
492 }
493 Some(&ch) => ch,
494 None => b'0', };
496
497 let high_val = hex_digit_value(high).ok_or_else(|| {
498 ParseError::StreamDecodeError(format!("Invalid hex digit: {}", high as char))
499 })?;
500 let low_val = hex_digit_value(low).ok_or_else(|| {
501 ParseError::StreamDecodeError(format!("Invalid hex digit: {}", low as char))
502 })?;
503
504 result.push((high_val << 4) | low_val);
505
506 if low == b'>' {
507 break;
508 }
509 }
510
511 Ok(result)
512}
513
514fn hex_digit_value(ch: u8) -> Option<u8> {
516 match ch {
517 b'0'..=b'9' => Some(ch - b'0'),
518 b'A'..=b'F' => Some(ch - b'A' + 10),
519 b'a'..=b'f' => Some(ch - b'a' + 10),
520 _ => None,
521 }
522}
523
524fn decode_ascii85(data: &[u8]) -> ParseResult<Vec<u8>> {
526 let mut result = Vec::new();
527 let mut chars = data.iter().filter(|&&b| !b.is_ascii_whitespace());
528 let mut group = Vec::with_capacity(5);
529
530 let mut ch = match chars.next() {
532 Some(&b'<') => {
533 if chars.next() == Some(&b'~') {
534 chars.next()
536 } else {
537 Some(&b'<')
539 }
540 }
541 other => other,
542 };
543
544 while let Some(&c) = ch {
545 match c {
546 b'~' => {
547 if chars.next() == Some(&b'>') {
549 break;
550 } else {
551 return Err(ParseError::StreamDecodeError(
552 "Invalid ASCII85 end marker".to_string(),
553 ));
554 }
555 }
556 b'z' if group.is_empty() => {
557 result.extend_from_slice(&[0, 0, 0, 0]);
559 }
560 b'!'..=b'u' => {
561 group.push(c);
562 if group.len() == 5 {
563 let value = group
565 .iter()
566 .enumerate()
567 .map(|(i, &ch)| (ch - b'!') as u32 * 85u32.pow(4 - i as u32))
568 .sum::<u32>();
569
570 result.push((value >> 24) as u8);
571 result.push((value >> 16) as u8);
572 result.push((value >> 8) as u8);
573 result.push(value as u8);
574
575 group.clear();
576 }
577 }
578 _ => {
579 return Err(ParseError::StreamDecodeError(format!(
580 "Invalid ASCII85 character: {}",
581 c as char
582 )));
583 }
584 }
585 ch = chars.next();
586 }
587
588 if !group.is_empty() {
590 let original_len = group.len();
592
593 while group.len() < 5 {
595 group.push(b'u');
596 }
597
598 let value = group
599 .iter()
600 .enumerate()
601 .map(|(i, &ch)| (ch - b'!') as u32 * 85u32.pow(4 - i as u32))
602 .sum::<u32>();
603
604 let output_bytes = original_len - 1;
606 for i in 0..output_bytes {
607 result.push((value >> (24 - 8 * i)) as u8);
608 }
609 }
610
611 Ok(result)
612}
613
614#[cfg(test)]
615mod tests {
616 use super::*;
617 use crate::parser::objects::{PdfArray, PdfDictionary, PdfName, PdfObject};
618
619 #[test]
620 fn test_ascii_hex_decode() {
621 let data = b"48656C6C6F>";
622 let result = decode_ascii_hex(data).unwrap();
623 assert_eq!(result, b"Hello");
624
625 let data = b"48 65 6C 6C 6F>"; let result = decode_ascii_hex(data).unwrap();
627 assert_eq!(result, b"Hello");
628
629 let data = b"48656C6C6>"; let result = decode_ascii_hex(data).unwrap();
631 assert_eq!(result, b"Hell`");
632 }
633
634 #[test]
635 fn test_ascii85_decode() {
636 let data = b"87cURD]j7BEbo80~>";
637 let result = decode_ascii85(data).unwrap();
638 assert_eq!(result, b"Hello world!");
639
640 let data = b"z~>"; let result = decode_ascii85(data).unwrap();
642 assert_eq!(result, &[0, 0, 0, 0]);
643 }
644
645 #[test]
646 fn test_filter_from_name() {
647 assert_eq!(
648 Filter::from_name("ASCIIHexDecode"),
649 Some(Filter::ASCIIHexDecode)
650 );
651 assert_eq!(
652 Filter::from_name("ASCII85Decode"),
653 Some(Filter::ASCII85Decode)
654 );
655 assert_eq!(Filter::from_name("LZWDecode"), Some(Filter::LZWDecode));
656 assert_eq!(Filter::from_name("FlateDecode"), Some(Filter::FlateDecode));
657 assert_eq!(
658 Filter::from_name("RunLengthDecode"),
659 Some(Filter::RunLengthDecode)
660 );
661 assert_eq!(
662 Filter::from_name("CCITTFaxDecode"),
663 Some(Filter::CCITTFaxDecode)
664 );
665 assert_eq!(Filter::from_name("JBIG2Decode"), Some(Filter::JBIG2Decode));
666 assert_eq!(Filter::from_name("DCTDecode"), Some(Filter::DCTDecode));
667 assert_eq!(Filter::from_name("JPXDecode"), Some(Filter::JPXDecode));
668 assert_eq!(Filter::from_name("Crypt"), Some(Filter::Crypt));
669 assert_eq!(Filter::from_name("UnknownFilter"), None);
670 }
671
672 #[test]
673 fn test_filter_equality() {
674 assert_eq!(Filter::ASCIIHexDecode, Filter::ASCIIHexDecode);
675 assert_ne!(Filter::ASCIIHexDecode, Filter::ASCII85Decode);
676 assert_ne!(Filter::FlateDecode, Filter::LZWDecode);
677 }
678
679 #[test]
680 fn test_filter_clone() {
681 let filter = Filter::FlateDecode;
682 let cloned = filter.clone();
683 assert_eq!(filter, cloned);
684 }
685
686 #[test]
687 fn test_decode_stream_no_filter() {
688 let data = b"Hello, world!";
689 let dict = PdfDictionary::new();
690
691 let result = decode_stream(data, &dict, &ParseOptions::default()).unwrap();
692 assert_eq!(result, data);
693 }
694
695 #[test]
696 fn test_decode_stream_single_filter() {
697 let data = b"48656C6C6F>";
698 let mut dict = PdfDictionary::new();
699 dict.insert(
700 "Filter".to_string(),
701 PdfObject::Name(PdfName("ASCIIHexDecode".to_string())),
702 );
703
704 let result = decode_stream(data, &dict, &ParseOptions::default()).unwrap();
705 assert_eq!(result, b"Hello");
706 }
707
708 #[test]
709 fn test_decode_stream_invalid_filter() {
710 let data = b"test data";
711 let mut dict = PdfDictionary::new();
712 dict.insert(
713 "Filter".to_string(),
714 PdfObject::Name(PdfName("UnknownFilter".to_string())),
715 );
716
717 let result = decode_stream(data, &dict, &ParseOptions::default());
718 assert!(result.is_err());
719 }
720
721 #[test]
722 fn test_decode_stream_filter_array() {
723 let data = b"48656C6C6F>";
724 let mut dict = PdfDictionary::new();
725 let filters = vec![PdfObject::Name(PdfName("ASCIIHexDecode".to_string()))];
726 dict.insert("Filter".to_string(), PdfObject::Array(PdfArray(filters)));
727
728 let result = decode_stream(data, &dict, &ParseOptions::default()).unwrap();
729 assert_eq!(result, b"Hello");
730 }
731
732 #[test]
733 fn test_decode_stream_invalid_filter_type() {
734 let data = b"test data";
735 let mut dict = PdfDictionary::new();
736 dict.insert("Filter".to_string(), PdfObject::Integer(42)); let result = decode_stream(data, &dict, &ParseOptions::default());
739 assert!(result.is_err());
740 }
741
742 #[test]
743 fn test_ascii_hex_decode_empty() {
744 let data = b">";
745 let result = decode_ascii_hex(data).unwrap();
746 assert!(result.is_empty());
747 }
748
749 #[test]
750 fn test_ascii_hex_decode_invalid() {
751 let data = b"GG>"; let result = decode_ascii_hex(data);
753 assert!(result.is_err());
754 }
755
756 #[test]
757 fn test_ascii_hex_decode_no_terminator() {
758 let data = b"48656C6C6F"; let result = decode_ascii_hex(data).unwrap();
760 assert_eq!(result, b"Hello"); }
762
763 #[test]
764 fn test_ascii85_decode_empty() {
765 let data = b"~>";
766 let result = decode_ascii85(data).unwrap();
767 assert!(result.is_empty());
768 }
769
770 #[test]
771 fn test_ascii85_decode_invalid() {
772 let data = b"invalid~>";
773 let result = decode_ascii85(data);
774 assert!(result.is_err());
775 }
776
777 #[cfg(feature = "compression")]
778 #[test]
779 fn test_flate_decode() {
780 use flate2::write::ZlibEncoder;
781 use flate2::Compression;
782 use std::io::Write;
783
784 let original = b"Hello, compressed world!";
785 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
786 encoder.write_all(original).unwrap();
787 let compressed = encoder.finish().unwrap();
788
789 let result = decode_flate(&compressed).unwrap();
790 assert_eq!(result, original);
791 }
792
793 #[cfg(not(feature = "compression"))]
794 #[test]
795 fn test_flate_decode_not_supported() {
796 let data = b"compressed data";
797 let result = decode_flate(data);
798 assert!(result.is_err());
799 }
800
801 #[test]
802 fn test_apply_filter() {
803 let data = b"48656C6C6F>";
804 let result = apply_filter(data, Filter::ASCIIHexDecode).unwrap();
805 assert_eq!(result, b"Hello");
806 }
807
808 #[test]
809 fn test_apply_filter_unsupported() {
810 let data = b"test data";
811 let unsupported_filters = vec![Filter::JPXDecode, Filter::Crypt];
812
813 for filter in unsupported_filters {
814 let result = apply_filter(data, filter);
815 assert!(result.is_err());
816 }
817 }
818
819 #[test]
820 fn test_apply_filter_dct_decode() {
821 let invalid_data = b"not jpeg data";
823 let result = apply_filter(invalid_data, Filter::DCTDecode);
824 assert!(result.is_err()); let valid_jpeg = vec![
828 0xFF, 0xD8, 0xFF, 0xD9, ];
831 let result = apply_filter(&valid_jpeg, Filter::DCTDecode);
832 assert!(result.is_ok());
833 assert_eq!(result.unwrap(), valid_jpeg); }
835
836 #[test]
839 fn test_apply_filter_with_params_no_predictor() {
840 let data = b"48656C6C6F>";
841 let dict = PdfDictionary::new();
842
843 let result = apply_filter_with_params(data, Filter::ASCIIHexDecode, Some(&dict)).unwrap();
844 assert_eq!(result, b"Hello");
845 }
846
847 #[test]
848 fn test_apply_predictor_none() {
849 let data = vec![1, 2, 3, 4];
850 let dict = PdfDictionary::new();
851
852 let result = apply_predictor(&data, 1, &dict).unwrap();
853 assert_eq!(result, data);
854 }
855
856 #[test]
857 fn test_apply_predictor_unknown() {
858 let data = vec![1, 2, 3, 4];
859 let dict = PdfDictionary::new();
860
861 let result = apply_predictor(&data, 99, &dict).unwrap();
863 assert_eq!(result, data);
864 }
865
866 #[test]
867 fn test_png_predictor_sub_filter() {
868 let data = vec![1, 5, 10]; let result = apply_png_sub_filter(&data, 1);
871 assert_eq!(result, vec![1, 6, 16]); }
873
874 #[test]
875 fn test_png_predictor_up_filter() {
876 let data = vec![1, 2, 3];
878 let prev_row = vec![5, 10, 15];
879 let result = apply_png_up_filter(&data, Some(&prev_row));
880 assert_eq!(result, vec![6, 12, 18]); }
882
883 #[test]
884 fn test_png_predictor_up_filter_no_prev() {
885 let data = vec![1, 2, 3];
887 let result = apply_png_up_filter(&data, None);
888 assert_eq!(result, vec![1, 2, 3]); }
890
891 #[test]
892 fn test_png_predictor_average_filter() {
893 let data = vec![2, 4]; let prev_row = vec![6, 8];
896 let result = apply_png_average_filter(&data, Some(&prev_row), 1);
897 assert_eq!(result, vec![5, 10]);
900 }
901
902 #[test]
903 fn test_png_predictor_paeth_filter() {
904 let data = vec![1, 2]; let prev_row = vec![3, 4];
907 let result = apply_png_paeth_filter(&data, Some(&prev_row), 1);
908 assert_eq!(result.len(), 2);
910 }
911
912 #[test]
913 fn test_paeth_predictor_algorithm() {
914 assert_eq!(paeth_predictor(1, 2, 0), 2);
918
919 assert_eq!(paeth_predictor(5, 2, 3), 5);
922
923 assert_eq!(paeth_predictor(5, 8, 3), 8);
926 }
927
928 #[test]
929 fn test_apply_png_predictor_invalid_data() {
930 let mut params = PdfDictionary::new();
931 params.insert("Columns".to_string(), PdfObject::Integer(3));
932
933 let data = vec![0, 1, 2, 3, 4, 5]; let result = apply_png_predictor_with_width(&data, 10, 3);
936 assert!(result.is_err());
937 }
938
939 #[test]
940 fn test_apply_png_predictor_valid_simple() {
941 let mut params = PdfDictionary::new();
942 params.insert("Columns".to_string(), PdfObject::Integer(2));
943 params.insert("BitsPerComponent".to_string(), PdfObject::Integer(8));
944 params.insert("Colors".to_string(), PdfObject::Integer(1));
945
946 let data = vec![
948 0, 1, 2, 0, 3, 4, ];
951
952 let result = apply_png_predictor_with_width(&data, 10, 2).unwrap();
953 assert_eq!(result, vec![1, 2, 3, 4]);
954 }
955
956 #[test]
957 fn test_apply_png_predictor_with_sub_filter() {
958 let mut params = PdfDictionary::new();
959 params.insert("Columns".to_string(), PdfObject::Integer(3));
960 params.insert("BitsPerComponent".to_string(), PdfObject::Integer(8));
961 params.insert("Colors".to_string(), PdfObject::Integer(1));
962
963 let data = vec![
965 1, 1, 2, 3, ];
967
968 let result = apply_png_predictor_with_width(&data, 10, 3).unwrap();
969 assert_eq!(result, vec![1, 2, 3]); }
972
973 #[test]
974 fn test_apply_png_predictor_invalid_filter_type() {
975 let mut params = PdfDictionary::new();
976 params.insert("Columns".to_string(), PdfObject::Integer(2));
977
978 let data = vec![5, 1, 2];
980 let result = apply_png_predictor_with_width(&data, 10, 2);
981 if result.is_err() {
983 let error_msg = result.unwrap_err().to_string();
985 assert!(
986 error_msg.contains("filter")
987 || error_msg.contains("predictor")
988 || error_msg.contains("Invalid")
989 );
990 } else {
991 let _decoded_data = result.unwrap();
993 }
994 }
995
996 #[test]
997 fn test_get_filter_params_dict() {
998 let mut dict = PdfDictionary::new();
999 dict.insert("Predictor".to_string(), PdfObject::Integer(12));
1000 let obj = PdfObject::Dictionary(dict);
1001
1002 let result = get_filter_params(Some(&obj), 0);
1003 assert!(result.is_some());
1004 assert_eq!(
1005 result.unwrap().get("Predictor"),
1006 Some(&PdfObject::Integer(12))
1007 );
1008 }
1009
1010 #[test]
1011 fn test_get_filter_params_array() {
1012 let mut inner_dict = PdfDictionary::new();
1013 inner_dict.insert("Predictor".to_string(), PdfObject::Integer(15));
1014
1015 let array = vec![PdfObject::Dictionary(inner_dict)];
1016 let obj = PdfObject::Array(crate::parser::objects::PdfArray(array));
1017
1018 let result = get_filter_params(Some(&obj), 0);
1019 assert!(result.is_some());
1020 assert_eq!(
1021 result.unwrap().get("Predictor"),
1022 Some(&PdfObject::Integer(15))
1023 );
1024 }
1025
1026 #[test]
1027 fn test_get_filter_params_none() {
1028 let result = get_filter_params(None, 0);
1029 assert!(result.is_none());
1030 }
1031
1032 #[test]
1033 fn test_compressed_xref_integration() {
1034 use flate2::write::ZlibEncoder;
1036 use flate2::Compression;
1037 use std::io::Write;
1038
1039 #[cfg(feature = "compression")]
1040 {
1041 let original_data = vec![
1043 0, 1, 2, 0, 3, 4, ];
1046
1047 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
1049 encoder.write_all(&original_data).unwrap();
1050 let compressed = encoder.finish().unwrap();
1051
1052 let mut decode_params = PdfDictionary::new();
1054 decode_params.insert("Predictor".to_string(), PdfObject::Integer(12)); decode_params.insert("Columns".to_string(), PdfObject::Integer(2));
1056 decode_params.insert("BitsPerComponent".to_string(), PdfObject::Integer(8));
1057 decode_params.insert("Colors".to_string(), PdfObject::Integer(1));
1058
1059 let result =
1061 apply_filter_with_params(&compressed, Filter::FlateDecode, Some(&decode_params))
1062 .unwrap();
1063 assert_eq!(result, vec![1, 2, 3, 4]);
1064 }
1065 }
1066
1067 fn encode_lzw_test_data(codes: &[u16]) -> Vec<u8> {
1071 let mut result = Vec::new();
1072 let mut bit_buffer = 0u32;
1073 let mut bits_in_buffer = 0;
1074 let mut code_size = 9;
1075
1076 for &code in codes {
1077 bit_buffer = (bit_buffer << code_size) | (code as u32);
1079 bits_in_buffer += code_size;
1080
1081 while bits_in_buffer >= 8 {
1083 let byte = ((bit_buffer >> (bits_in_buffer - 8)) & 0xFF) as u8;
1084 result.push(byte);
1085 bits_in_buffer -= 8;
1086 }
1087
1088 if code == 511 && code_size == 9 {
1090 code_size = 10;
1091 } else if code == 1023 && code_size == 10 {
1092 code_size = 11;
1093 } else if code == 2047 && code_size == 11 {
1094 code_size = 12;
1095 }
1096 }
1097
1098 if bits_in_buffer > 0 {
1100 let byte = ((bit_buffer << (8 - bits_in_buffer)) & 0xFF) as u8;
1101 result.push(byte);
1102 }
1103
1104 result
1105 }
1106
1107 #[test]
1108 fn test_lzw_decode_simple() {
1109 let codes = vec![65, 66, 67, 257];
1112 let data = encode_lzw_test_data(&codes);
1113 let result = decode_lzw(&data, None).unwrap();
1114 assert_eq!(result, b"ABC");
1115 }
1116
1117 #[test]
1118 fn test_lzw_decode_with_repetition() {
1119 let codes = vec![65, 65, 258, 257];
1122 let data = encode_lzw_test_data(&codes);
1123 let result = decode_lzw(&data, None).unwrap();
1124 assert_eq!(result, b"AAAA");
1125 }
1126
1127 #[test]
1128 fn test_lzw_decode_clear_code() {
1129 let codes = vec![65, 66, 256, 67, 68, 257];
1132 let data = encode_lzw_test_data(&codes);
1133 let result = decode_lzw(&data, None).unwrap();
1134 assert_eq!(result, b"ABCD");
1135 }
1136
1137 #[test]
1138 fn test_lzw_decode_growing_codes() {
1139 let mut params = PdfDictionary::new();
1143 params.insert("EarlyChange".to_string(), PdfObject::Integer(1));
1144
1145 let data = vec![0x08, 0x21, 0x08, 0x61, 0x08, 0x20, 0x80];
1147 let result = decode_lzw(&data, Some(¶ms));
1148 assert!(result.is_ok());
1149 }
1150
1151 #[test]
1152 fn test_lzw_decode_early_change_false() {
1153 let mut params = PdfDictionary::new();
1154 params.insert("EarlyChange".to_string(), PdfObject::Integer(0));
1155
1156 let codes = vec![65, 66, 67, 257];
1158 let data = encode_lzw_test_data(&codes);
1159 let result = decode_lzw(&data, Some(¶ms)).unwrap();
1160 assert_eq!(result, b"ABC");
1161 }
1162
1163 #[test]
1164 fn test_lzw_decode_invalid_code() {
1165 let data = vec![0x08, 0x21, 0xFF, 0xFF, 0x00];
1167 let result = decode_lzw(&data, None);
1168 assert!(result.is_err());
1169 }
1170
1171 #[test]
1172 fn test_lzw_decode_empty() {
1173 let codes = vec![257];
1175 let data = encode_lzw_test_data(&codes);
1176 let result = decode_lzw(&data, None).unwrap();
1177 assert!(result.is_empty());
1178 }
1179
1180 #[test]
1181 fn test_lzw_bit_reader() {
1182 let data = vec![0b10101010, 0b11001100, 0b11110000];
1183 let mut reader = LzwBitReader::new(&data);
1184
1185 assert_eq!(reader.read_bits(4), Some(0b1010));
1187
1188 assert_eq!(reader.read_bits(8), Some(0b10101100));
1190
1191 assert_eq!(reader.read_bits(6), Some(0b110011));
1193
1194 assert_eq!(reader.read_bits(6), Some(0b110000));
1196
1197 assert_eq!(reader.read_bits(8), None);
1199 }
1200
1201 #[test]
1202 fn test_lzw_bit_reader_edge_cases() {
1203 let data = vec![0xFF];
1204 let mut reader = LzwBitReader::new(&data);
1205
1206 assert_eq!(reader.read_bits(0), None);
1208
1209 assert_eq!(reader.read_bits(17), None);
1211
1212 assert_eq!(reader.read_bits(8), Some(0xFF));
1214
1215 assert_eq!(reader.read_bits(1), None);
1217 }
1218
1219 #[test]
1220 fn test_apply_filter_lzw() {
1221 let codes = vec![65, 66, 67, 257];
1223 let data = encode_lzw_test_data(&codes);
1224 let result = apply_filter(&data, Filter::LZWDecode).unwrap();
1225 assert_eq!(result, b"ABC");
1226 }
1227
1228 #[test]
1229 fn test_apply_filter_with_params_lzw() {
1230 let mut params = PdfDictionary::new();
1232 params.insert("EarlyChange".to_string(), PdfObject::Integer(0));
1233
1234 let codes = vec![65, 66, 67, 257];
1235 let data = encode_lzw_test_data(&codes);
1236 let result = apply_filter_with_params(&data, Filter::LZWDecode, Some(¶ms)).unwrap();
1237 assert_eq!(result, b"ABC");
1238 }
1239
1240 #[test]
1243 fn test_run_length_decode_literal() {
1244 let data = vec![2, b'A', b'B', b'C'];
1246 let result = decode_run_length(&data).unwrap();
1247 assert_eq!(result, b"ABC");
1248 }
1249
1250 #[test]
1251 fn test_run_length_decode_repeat() {
1252 let data = vec![253u8, b'X']; let result = decode_run_length(&data).unwrap();
1255 assert_eq!(result, b"XXXX");
1256 }
1257
1258 #[test]
1259 fn test_run_length_decode_mixed() {
1260 let data = vec![
1262 1, b'A', b'B', 254u8, b'C', 1, b'D', b'E', ];
1266 let result = decode_run_length(&data).unwrap();
1267 assert_eq!(result, b"ABCCCDE");
1268 }
1269
1270 #[test]
1271 fn test_run_length_decode_eod() {
1272 let data = vec![0, b'A', 128u8, 1, b'B', b'C']; let result = decode_run_length(&data).unwrap();
1275 assert_eq!(result, b"A"); }
1277
1278 #[test]
1279 fn test_run_length_decode_empty() {
1280 let data = vec![];
1282 let result = decode_run_length(&data).unwrap();
1283 assert!(result.is_empty());
1284 }
1285
1286 #[test]
1287 fn test_run_length_decode_single_literal() {
1288 let data = vec![0, b'Z'];
1290 let result = decode_run_length(&data).unwrap();
1291 assert_eq!(result, b"Z");
1292 }
1293
1294 #[test]
1295 fn test_run_length_decode_single_repeat() {
1296 let data = vec![255u8, b'Y']; let result = decode_run_length(&data).unwrap();
1299 assert_eq!(result, b"YY");
1300 }
1301
1302 #[test]
1303 fn test_run_length_decode_max_repeat() {
1304 let data = vec![129u8, b'M']; let result = decode_run_length(&data).unwrap();
1307 assert_eq!(result.len(), 128);
1308 assert!(result.iter().all(|&b| b == b'M'));
1309 }
1310
1311 #[test]
1312 fn test_run_length_decode_max_literal() {
1313 let mut data = vec![127];
1315 data.extend((0..128).map(|i| i as u8));
1316 let result = decode_run_length(&data).unwrap();
1317 assert_eq!(result.len(), 128);
1318 assert_eq!(result, (0..128).map(|i| i as u8).collect::<Vec<u8>>());
1319 }
1320
1321 #[test]
1322 fn test_run_length_decode_error_literal_overflow() {
1323 let data = vec![5, b'A', b'B']; let result = decode_run_length(&data);
1326 assert!(result.is_err());
1327 }
1328
1329 #[test]
1330 fn test_run_length_decode_error_missing_repeat_byte() {
1331 let data = vec![254u8]; let result = decode_run_length(&data);
1334 assert!(result.is_err());
1335 }
1336
1337 #[test]
1338 fn test_apply_filter_run_length() {
1339 let data = vec![2, b'X', b'Y', b'Z'];
1341 let result = apply_filter(&data, Filter::RunLengthDecode).unwrap();
1342 assert_eq!(result, b"XYZ");
1343 }
1344
1345 #[test]
1346 fn test_apply_filter_with_params_run_length() {
1347 let data = vec![254u8, b'A', 1, b'B', b'C']; let result = apply_filter_with_params(&data, Filter::RunLengthDecode, None).unwrap();
1350 assert_eq!(result, b"AAABC");
1351 }
1352
1353 #[test]
1356 fn test_read_to_end_limited_within_limit() {
1357 let data = vec![42u8; 1000];
1358 let mut cursor = std::io::Cursor::new(&data);
1359 let result = read_to_end_limited(&mut cursor, 2000).unwrap();
1360 assert_eq!(result.len(), 1000);
1361 }
1362
1363 #[test]
1364 fn test_read_to_end_limited_at_exact_limit() {
1365 let data = vec![42u8; 1000];
1366 let mut cursor = std::io::Cursor::new(&data);
1367 let result = read_to_end_limited(&mut cursor, 1000).unwrap();
1368 assert_eq!(result.len(), 1000);
1369 }
1370
1371 #[test]
1372 fn test_read_to_end_limited_exceeds_limit() {
1373 let data = vec![42u8; 2000];
1374 let mut cursor = std::io::Cursor::new(&data);
1375 let result = read_to_end_limited(&mut cursor, 1000);
1376 assert!(result.is_err());
1377 let err = result.unwrap_err();
1378 assert!(
1379 err.to_string().contains("exceeds limit"),
1380 "Expected decompression limit error, got: {}",
1381 err
1382 );
1383 }
1384
1385 #[test]
1386 fn test_check_compression_ratio_normal() {
1387 assert!(check_compression_ratio(100, 1000).is_ok());
1389 }
1390
1391 #[test]
1392 fn test_check_compression_ratio_high() {
1393 assert!(check_compression_ratio(1, 1001).is_err());
1395 }
1396
1397 #[test]
1398 fn test_check_compression_ratio_zero_input() {
1399 assert!(check_compression_ratio(0, 1000).is_ok());
1401 }
1402
1403 #[cfg(feature = "compression")]
1404 #[test]
1405 fn test_flate_normal_data_succeeds() {
1406 use flate2::write::ZlibEncoder;
1407 use flate2::Compression;
1408 use std::io::Write;
1409
1410 let mut original = Vec::with_capacity(100_000);
1413 for i in 0..100_000u32 {
1414 original.push((i % 256) as u8);
1415 }
1416 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
1417 encoder.write_all(&original).unwrap();
1418 let compressed = encoder.finish().unwrap();
1419
1420 let result = try_standard_zlib_decode(&compressed);
1421 assert!(result.is_ok());
1422 assert_eq!(result.unwrap().len(), 100_000);
1423 }
1424
1425 #[cfg(feature = "compression")]
1426 #[test]
1427 fn test_flate_high_ratio_rejected() {
1428 use flate2::write::ZlibEncoder;
1429 use flate2::Compression;
1430 use std::io::Write;
1431
1432 let original = vec![0u8; 2 * 1024 * 1024];
1435 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::best());
1436 encoder.write_all(&original).unwrap();
1437 let compressed = encoder.finish().unwrap();
1438
1439 let result = try_standard_zlib_decode(&compressed);
1440 assert!(result.is_err(), "High compression ratio should be rejected");
1441 let err = result.unwrap_err();
1442 assert!(
1443 err.to_string().contains("compression ratio")
1444 || err.to_string().contains("exceeds limit"),
1445 "Expected compression ratio error, got: {}",
1446 err
1447 );
1448 }
1449
1450 #[cfg(feature = "compression")]
1451 #[test]
1452 fn test_flate_compression_ratio_check() {
1453 let result = check_compression_ratio(10, 10_010);
1456 assert!(result.is_err());
1457 let err = result.unwrap_err();
1458 assert!(err.to_string().contains("Suspicious compression ratio"));
1459 }
1460
1461 #[test]
1462 fn test_read_to_end_limited_empty_input() {
1463 let data: Vec<u8> = Vec::new();
1464 let mut cursor = std::io::Cursor::new(&data);
1465 let result = read_to_end_limited(&mut cursor, 1000).unwrap();
1466 assert!(result.is_empty());
1467 }
1468}
1469
1470pub(crate) fn apply_filter_with_params(
1472 data: &[u8],
1473 filter: Filter,
1474 params: Option<&PdfDictionary>,
1475) -> ParseResult<Vec<u8>> {
1476 let result = match filter {
1477 Filter::FlateDecode => {
1478 if let Some(decode_params) = params {
1482 if decode_params
1483 .get("Predictor")
1484 .and_then(|p| p.as_integer())
1485 .is_some()
1486 {
1487 match try_standard_zlib_decode(data) {
1489 Ok(decoded) => decoded,
1490 Err(_) => {
1491 data.to_vec()
1494 }
1495 }
1496 } else {
1497 decode_flate(data)?
1498 }
1499 } else {
1500 decode_flate(data)?
1501 }
1502 }
1503 Filter::ASCIIHexDecode => decode_ascii_hex(data)?,
1504 Filter::ASCII85Decode => decode_ascii85(data)?,
1505 Filter::LZWDecode => decode_lzw(data, params)?,
1506 Filter::RunLengthDecode => decode_run_length(data)?,
1507 Filter::CCITTFaxDecode => decode_ccitt(data, params)?,
1508 Filter::JBIG2Decode => decode_jbig2(data, params)?,
1509 Filter::DCTDecode => decode_dct(data)?,
1510 _ => {
1511 return Err(ParseError::SyntaxError {
1512 position: 0,
1513 message: format!("Filter {filter:?} not yet implemented"),
1514 });
1515 }
1516 };
1517
1518 if let Some(params_dict) = params {
1520 if let Some(predictor_obj) = params_dict.get("Predictor") {
1521 if let Some(predictor) = predictor_obj.as_integer() {
1522 match apply_predictor(&result, predictor as u32, params_dict) {
1523 Ok(predictor_result) => return Ok(predictor_result),
1524 Err(_) => {
1525 return Ok(result);
1528 }
1529 }
1530 }
1531 }
1532 }
1533
1534 Ok(result)
1535}
1536
1537fn get_filter_params(decode_params: Option<&PdfObject>, _index: usize) -> Option<&PdfDictionary> {
1539 match decode_params {
1540 Some(PdfObject::Dictionary(dict)) => Some(dict),
1541 Some(PdfObject::Array(array)) => {
1542 array.0.first().and_then(|obj| obj.as_dict())
1545 }
1546 _ => None,
1547 }
1548}
1549
1550fn apply_predictor(data: &[u8], predictor: u32, params: &PdfDictionary) -> ParseResult<Vec<u8>> {
1552 match predictor {
1553 1 => {
1554 Ok(data.to_vec())
1556 }
1557 10..=15 => {
1558 apply_png_predictor_advanced(data, predictor, params)
1560 }
1561 _ => {
1562 #[cfg(debug_assertions)]
1564 tracing::debug!("Warning: Unknown predictor {predictor}, returning data as-is");
1565 Ok(data.to_vec())
1566 }
1567 }
1568}
1569
1570fn apply_png_predictor_advanced(
1572 data: &[u8],
1573 _predictor: u32,
1574 params: &PdfDictionary,
1575) -> ParseResult<Vec<u8>> {
1576 let columns = params
1578 .get("Columns")
1579 .and_then(|obj| obj.as_integer())
1580 .unwrap_or(1) as usize;
1581
1582 let bpc = params
1584 .get("BitsPerComponent")
1585 .and_then(|obj| obj.as_integer())
1586 .unwrap_or(8) as usize;
1587
1588 let colors = params
1590 .get("Colors")
1591 .and_then(|obj| obj.as_integer())
1592 .unwrap_or(1) as usize;
1593
1594 let bytes_per_pixel = (bpc * colors).div_ceil(8);
1596
1597 let row_size = columns + 1;
1599
1600 if data.len() % row_size != 0 {
1601 return Err(ParseError::StreamDecodeError(
1602 "PNG predictor: data length not multiple of row size".to_string(),
1603 ));
1604 }
1605
1606 let num_rows = data.len() / row_size;
1607 let mut result = Vec::with_capacity(columns * num_rows);
1608
1609 for row in 0..num_rows {
1610 let row_start = row * row_size;
1611 let predictor_byte = data[row_start];
1612 let row_data = &data[row_start + 1..row_start + row_size];
1613
1614 let filtered_row = match predictor_byte {
1616 0 => {
1617 row_data.to_vec()
1619 }
1620 1 => {
1621 apply_png_sub_filter(row_data, bytes_per_pixel)
1623 }
1624 2 => {
1625 let prev_row = if row > 0 {
1627 Some(&result[(row - 1) * columns..row * columns])
1628 } else {
1629 None
1630 };
1631 apply_png_up_filter(row_data, prev_row)
1632 }
1633 3 => {
1634 let prev_row = if row > 0 {
1636 Some(&result[(row - 1) * columns..row * columns])
1637 } else {
1638 None
1639 };
1640 apply_png_average_filter(row_data, prev_row, bytes_per_pixel)
1641 }
1642 4 => {
1643 let prev_row = if row > 0 {
1645 Some(&result[(row - 1) * columns..row * columns])
1646 } else {
1647 None
1648 };
1649 apply_png_paeth_filter(row_data, prev_row, bytes_per_pixel)
1650 }
1651 _ => {
1652 return Err(ParseError::StreamDecodeError(format!(
1653 "PNG predictor: unknown filter type {predictor_byte}"
1654 )));
1655 }
1656 };
1657
1658 result.extend_from_slice(&filtered_row);
1659 }
1660
1661 Ok(result)
1662}
1663
1664fn apply_png_sub_filter(data: &[u8], bytes_per_pixel: usize) -> Vec<u8> {
1666 let mut result = Vec::with_capacity(data.len());
1667
1668 for (i, &byte) in data.iter().enumerate() {
1669 if i < bytes_per_pixel {
1670 result.push(byte);
1671 } else {
1672 result.push(byte.wrapping_add(result[i - bytes_per_pixel]));
1673 }
1674 }
1675
1676 result
1677}
1678
1679fn apply_png_up_filter(data: &[u8], prev_row: Option<&[u8]>) -> Vec<u8> {
1681 let mut result = Vec::with_capacity(data.len());
1682
1683 for (i, &byte) in data.iter().enumerate() {
1684 let up_byte = prev_row.and_then(|row| row.get(i)).unwrap_or(&0);
1685 result.push(byte.wrapping_add(*up_byte));
1686 }
1687
1688 result
1689}
1690
1691fn apply_png_average_filter(
1693 data: &[u8],
1694 prev_row: Option<&[u8]>,
1695 bytes_per_pixel: usize,
1696) -> Vec<u8> {
1697 let mut result = Vec::with_capacity(data.len());
1698
1699 for (i, &byte) in data.iter().enumerate() {
1700 let left_byte = if i < bytes_per_pixel {
1701 0
1702 } else {
1703 result[i - bytes_per_pixel]
1704 };
1705 let up_byte = prev_row.and_then(|row| row.get(i)).unwrap_or(&0);
1706 let average = ((left_byte as u16 + *up_byte as u16) / 2) as u8;
1707 result.push(byte.wrapping_add(average));
1708 }
1709
1710 result
1711}
1712
1713fn apply_png_paeth_filter(data: &[u8], prev_row: Option<&[u8]>, bytes_per_pixel: usize) -> Vec<u8> {
1715 let mut result = Vec::with_capacity(data.len());
1716
1717 for (i, &byte) in data.iter().enumerate() {
1718 let left_byte = if i < bytes_per_pixel {
1719 0
1720 } else {
1721 result[i - bytes_per_pixel]
1722 };
1723 let up_byte = prev_row.and_then(|row| row.get(i)).unwrap_or(&0);
1724 let up_left_byte = if i < bytes_per_pixel {
1725 0
1726 } else {
1727 *prev_row
1728 .and_then(|row| row.get(i - bytes_per_pixel))
1729 .unwrap_or(&0)
1730 };
1731
1732 let paeth = paeth_predictor(left_byte, *up_byte, up_left_byte);
1733 result.push(byte.wrapping_add(paeth));
1734 }
1735
1736 result
1737}
1738
1739fn paeth_predictor(left: u8, up: u8, up_left: u8) -> u8 {
1741 let p = left as i16 + up as i16 - up_left as i16;
1742 let pa = (p - left as i16).abs();
1743 let pb = (p - up as i16).abs();
1744 let pc = (p - up_left as i16).abs();
1745
1746 if pa <= pb && pa <= pc {
1747 left
1748 } else if pb <= pc {
1749 up
1750 } else {
1751 up_left
1752 }
1753}
1754
1755fn decode_lzw(data: &[u8], params: Option<&PdfDictionary>) -> ParseResult<Vec<u8>> {
1761 let early_change = params
1763 .and_then(|p| p.get("EarlyChange"))
1764 .and_then(|v| v.as_integer())
1765 .map(|v| v != 0)
1766 .unwrap_or(true); const MIN_BITS: u32 = 9;
1770 const MAX_BITS: u32 = 12;
1771 const CLEAR_CODE: u16 = 256;
1772 const EOD_CODE: u16 = 257;
1773 #[allow(dead_code)]
1774 const FIRST_CODE: u16 = 258;
1775
1776 let mut dictionary: Vec<Vec<u8>> = Vec::with_capacity(4096);
1778 for i in 0..=255 {
1779 dictionary.push(vec![i]);
1780 }
1781 dictionary.push(vec![]); dictionary.push(vec![]); let mut result = Vec::new();
1786 let mut bit_reader = LzwBitReader::new(data);
1787 let mut code_size = MIN_BITS;
1788 let mut prev_code: Option<u16> = None;
1789
1790 while let Some(c) = bit_reader.read_bits(code_size) {
1791 let code = c as u16;
1792
1793 if code == EOD_CODE {
1794 break;
1795 }
1796
1797 if code == CLEAR_CODE {
1798 dictionary.truncate(258);
1800 code_size = MIN_BITS;
1801 prev_code = None;
1802 continue;
1803 }
1804
1805 if let Some(prev) = prev_code {
1807 let string = if (code as usize) < dictionary.len() {
1808 dictionary[code as usize].clone()
1810 } else if code as usize == dictionary.len() {
1811 let mut s = dictionary[prev as usize].clone();
1813 s.push(dictionary[prev as usize][0]);
1814 s
1815 } else {
1816 return Err(ParseError::StreamDecodeError(format!(
1817 "LZW decode error: invalid code {code}"
1818 )));
1819 };
1820
1821 result.extend_from_slice(&string);
1823
1824 if result.len() > MAX_DECOMPRESSED_SIZE {
1826 return Err(ParseError::StreamDecodeError(format!(
1827 "LZW decompressed size exceeds {} MB limit",
1828 MAX_DECOMPRESSED_SIZE / (1024 * 1024)
1829 )));
1830 }
1831
1832 if dictionary.len() < 4096 {
1834 let mut new_entry = dictionary[prev as usize].clone();
1835 new_entry.push(string[0]);
1836 dictionary.push(new_entry);
1837
1838 let dict_size = dictionary.len();
1840 let threshold = if early_change {
1841 1 << code_size
1842 } else {
1843 (1 << code_size) + 1
1844 };
1845
1846 if dict_size >= threshold as usize && code_size < MAX_BITS {
1847 code_size += 1;
1848 }
1849 }
1850 } else {
1851 if (code as usize) < dictionary.len() {
1853 result.extend_from_slice(&dictionary[code as usize]);
1854 } else {
1855 return Err(ParseError::StreamDecodeError(format!(
1856 "LZW decode error: invalid first code {code}"
1857 )));
1858 }
1859 }
1860
1861 prev_code = Some(code);
1862 }
1863
1864 Ok(result)
1865}
1866
1867struct LzwBitReader<'a> {
1869 data: &'a [u8],
1870 byte_pos: usize,
1871 bit_pos: u8,
1872}
1873
1874impl<'a> LzwBitReader<'a> {
1875 fn new(data: &'a [u8]) -> Self {
1876 Self {
1877 data,
1878 byte_pos: 0,
1879 bit_pos: 0,
1880 }
1881 }
1882
1883 fn read_bits(&mut self, n: u32) -> Option<u32> {
1885 if n == 0 || n > 16 {
1886 return None;
1887 }
1888
1889 let mut result = 0u32;
1890 let mut bits_read = 0;
1891
1892 while bits_read < n {
1893 if self.byte_pos >= self.data.len() {
1894 return None;
1895 }
1896
1897 let bits_available = 8 - self.bit_pos;
1898 let bits_to_read = (n - bits_read).min(bits_available as u32);
1899
1900 let mask = ((1u32 << bits_to_read) - 1) as u8;
1902 let shift = bits_available - bits_to_read as u8;
1903 let bits = (self.data[self.byte_pos] >> shift) & mask;
1904
1905 result = (result << bits_to_read) | (bits as u32);
1906 bits_read += bits_to_read;
1907 self.bit_pos += bits_to_read as u8;
1908
1909 if self.bit_pos >= 8 {
1910 self.bit_pos = 0;
1911 self.byte_pos += 1;
1912 }
1913 }
1914
1915 Some(result)
1916 }
1917}
1918
1919fn decode_run_length(data: &[u8]) -> ParseResult<Vec<u8>> {
1924 let mut result = Vec::new();
1925 let mut i = 0;
1926
1927 while i < data.len() {
1928 let length = data[i] as i8;
1929 i += 1;
1930
1931 if length == -128 {
1932 break;
1934 } else if length >= 0 {
1935 let count = (length as usize) + 1;
1937 if i + count > data.len() {
1938 return Err(ParseError::StreamDecodeError(
1939 "RunLength decode error: insufficient data for literal copy".to_string(),
1940 ));
1941 }
1942 result.extend_from_slice(&data[i..i + count]);
1943 i += count;
1944 } else {
1945 if i >= data.len() {
1947 return Err(ParseError::StreamDecodeError(
1948 "RunLength decode error: missing byte to repeat".to_string(),
1949 ));
1950 }
1951 let repeat_byte = data[i];
1952 let count = ((-length) as usize) + 1;
1953 for _ in 0..count {
1954 result.push(repeat_byte);
1955 }
1956 i += 1;
1957 }
1958
1959 if result.len() > MAX_DECOMPRESSED_SIZE {
1961 return Err(ParseError::StreamDecodeError(format!(
1962 "RunLength decompressed size exceeds {} MB limit",
1963 MAX_DECOMPRESSED_SIZE / (1024 * 1024)
1964 )));
1965 }
1966 }
1967
1968 Ok(result)
1969}