1use crate::encodings;
2use crate::encodings::Encoding;
3use crate::encodings::cmap::ToUnicodeCMap;
4use crate::error::DecompressError;
5use crate::{Document, Error, Result};
6use indexmap::IndexMap;
7use log::warn;
8use std::cmp::max;
9use std::fmt;
10use std::str;
11
12const MAX_DECOMPRESSED_BYTES: usize = 256 * 1024 * 1024;
16
17pub type ObjectId = (u32, u16);
19
20#[derive(Clone, Default, PartialEq)]
22pub struct Dictionary(IndexMap<Vec<u8>, Object>);
23
24#[derive(Debug, Clone, PartialEq)]
28pub struct Stream {
29 pub dict: Dictionary,
31 pub content: Vec<u8>,
33 pub allows_compression: bool,
36 pub start_position: Option<usize>,
38}
39
40#[derive(Clone, PartialEq)]
42pub enum Object {
43 Null,
44 Boolean(bool),
45 Integer(i64),
46 Real(f32),
47 Name(Vec<u8>),
48 String(Vec<u8>, StringFormat),
49 Array(Vec<Object>),
50 Dictionary(Dictionary),
51 Stream(Stream),
52 Reference(ObjectId),
53}
54
55#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
57pub enum StringFormat {
58 #[default]
59 Literal,
60 Hexadecimal,
61}
62
63impl From<bool> for Object {
64 fn from(value: bool) -> Self {
65 Object::Boolean(value)
66 }
67}
68
69impl From<i64> for Object {
70 fn from(number: i64) -> Self {
71 Object::Integer(number)
72 }
73}
74
75macro_rules! from_smaller_ints {
76 ($( $Int: ty )+) => {
77 $(
78 impl From<$Int> for Object {
79 fn from(number: $Int) -> Self {
80 Object::Integer(i64::from(number))
81 }
82 }
83 )+
84 }
85}
86
87from_smaller_ints! {
88 i8 i16 i32
89 u8 u16 u32
90}
91
92impl From<f64> for Object {
93 fn from(number: f64) -> Self {
94 Object::Real(number as f32)
95 }
96}
97
98impl From<f32> for Object {
99 fn from(number: f32) -> Self {
100 Object::Real(number)
101 }
102}
103
104impl From<String> for Object {
105 fn from(name: String) -> Self {
106 Object::Name(name.into_bytes())
107 }
108}
109
110impl<'a> From<&'a str> for Object {
111 fn from(name: &'a str) -> Self {
112 Object::Name(name.as_bytes().to_vec())
113 }
114}
115
116impl From<Vec<Object>> for Object {
117 fn from(array: Vec<Object>) -> Self {
118 Object::Array(array)
119 }
120}
121
122impl From<Dictionary> for Object {
123 fn from(dict: Dictionary) -> Self {
124 Object::Dictionary(dict)
125 }
126}
127
128impl From<Stream> for Object {
129 fn from(stream: Stream) -> Self {
130 Object::Stream(stream)
131 }
132}
133
134impl From<ObjectId> for Object {
135 fn from(id: ObjectId) -> Self {
136 Object::Reference(id)
137 }
138}
139
140impl Object {
141 pub fn string_literal<S: Into<Vec<u8>>>(s: S) -> Self {
142 Object::String(s.into(), StringFormat::Literal)
143 }
144
145 pub fn is_null(&self) -> bool {
146 matches!(*self, Object::Null)
147 }
148
149 pub fn as_bool(&self) -> Result<bool> {
150 match self {
151 Object::Boolean(value) => Ok(*value),
152 _ => Err(Error::ObjectType {
153 expected: "Boolean",
154 found: self.enum_variant(),
155 }),
156 }
157 }
158
159 pub fn as_i64(&self) -> Result<i64> {
160 match self {
161 Object::Integer(value) => Ok(*value),
162 _ => Err(Error::ObjectType {
163 expected: "Integer",
164 found: self.enum_variant(),
165 }),
166 }
167 }
168
169 pub fn as_f32(&self) -> Result<f32> {
170 match self {
171 Object::Real(value) => Ok(*value),
172 _ => Err(Error::ObjectType {
173 expected: "Real",
174 found: self.enum_variant(),
175 }),
176 }
177 }
178
179 pub fn as_float(&self) -> Result<f32> {
182 match self {
183 Object::Integer(value) => Ok(*value as f32),
184 Object::Real(value) => Ok(*value),
185 _ => Err(Error::ObjectType {
186 expected: "Integer or Real",
187 found: self.enum_variant(),
188 }),
189 }
190 }
191
192 pub fn as_name(&self) -> Result<&[u8]> {
193 match self {
194 Object::Name(name) => Ok(name),
195 _ => Err(Error::ObjectType {
196 expected: "Name",
197 found: self.enum_variant(),
198 }),
199 }
200 }
201
202 pub fn as_str(&self) -> Result<&[u8]> {
203 match self {
204 Object::String(string, _) => Ok(string),
205 _ => Err(Error::ObjectType {
206 expected: "String",
207 found: self.enum_variant(),
208 }),
209 }
210 }
211
212 pub fn as_str_mut(&mut self) -> Result<&mut Vec<u8>> {
213 match self {
214 Object::String(string, _) => Ok(string),
215 _ => Err(Error::ObjectType {
216 expected: "String",
217 found: self.enum_variant(),
218 }),
219 }
220 }
221
222 pub fn as_reference(&self) -> Result<ObjectId> {
223 match self {
224 Object::Reference(id) => Ok(*id),
225 _ => Err(Error::ObjectType {
226 expected: "Reference",
227 found: self.enum_variant(),
228 }),
229 }
230 }
231
232 pub fn as_array(&self) -> Result<&Vec<Object>> {
233 match self {
234 Object::Array(arr) => Ok(arr),
235 _ => Err(Error::ObjectType {
236 expected: "Array",
237 found: self.enum_variant(),
238 }),
239 }
240 }
241
242 pub fn as_array_mut(&mut self) -> Result<&mut Vec<Object>> {
243 match self {
244 Object::Array(arr) => Ok(arr),
245 _ => Err(Error::ObjectType {
246 expected: "Array",
247 found: self.enum_variant(),
248 }),
249 }
250 }
251
252 pub fn as_dict(&self) -> Result<&Dictionary> {
253 match self {
254 Object::Dictionary(dict) => Ok(dict),
255 _ => Err(Error::ObjectType {
256 expected: "Dictionary",
257 found: self.enum_variant(),
258 }),
259 }
260 }
261
262 pub fn as_dict_mut(&mut self) -> Result<&mut Dictionary> {
263 match self {
264 Object::Dictionary(dict) => Ok(dict),
265 _ => Err(Error::ObjectType {
266 expected: "Dictionary",
267 found: self.enum_variant(),
268 }),
269 }
270 }
271
272 pub fn as_stream(&self) -> Result<&Stream> {
273 match self {
274 Object::Stream(stream) => Ok(stream),
275 _ => Err(Error::ObjectType {
276 expected: "Stream",
277 found: self.enum_variant(),
278 }),
279 }
280 }
281
282 pub fn as_stream_mut(&mut self) -> Result<&mut Stream> {
283 match self {
284 Object::Stream(stream) => Ok(stream),
285 _ => Err(Error::ObjectType {
286 expected: "Stream",
287 found: self.enum_variant(),
288 }),
289 }
290 }
291
292 pub fn type_name(&self) -> Result<&[u8]> {
294 match self {
295 Object::Dictionary(dict) => dict.get_type(),
296 Object::Stream(stream) => stream.dict.get_type(),
297 obj => Err(Error::ObjectType {
298 expected: "Dictionary or Stream",
299 found: obj.enum_variant(),
300 }),
301 }
302 }
303
304 pub fn enum_variant(&self) -> &'static str {
305 match self {
306 Object::Null => "Null",
307 Object::Boolean(_) => "Boolean",
308 Object::Integer(_) => "Integer",
309 Object::Real(_) => "Real",
310 Object::Name(_) => "Name",
311 Object::String(_, _) => "String",
312 Object::Array(_) => "Array",
313 Object::Dictionary(_) => "Dictionary",
314 Object::Stream(_) => "Stream",
315 Object::Reference(_) => "Reference",
316 }
317 }
318}
319
320impl fmt::Debug for Object {
321 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
322 match self {
323 Object::Null => write!(f, "Null"),
324 Object::Boolean(value) => write!(f, "{value}"),
325 Object::Integer(value) => write!(f, "{value}"),
326 Object::Real(value) => write!(f, "{value}"),
327 Object::Name(name) => write!(f, "/{}", String::from_utf8_lossy(name)),
328 Object::String(text, StringFormat::Literal) => {
329 write!(f, "({})", String::from_utf8_lossy(text))
330 }
331 Object::String(text, StringFormat::Hexadecimal) => {
332 write!(f, "<")?;
333 for b in text {
334 write!(f, "{b:02x}")?
335 }
336 write!(f, ">")
337 }
338 Object::Array(array) => {
339 let items = array
340 .iter()
341 .map(|item| format!("{item:?}"))
342 .collect::<Vec<String>>();
343 write!(f, "[{}]", items.join(" "))
344 }
345 Object::Dictionary(dict) => write!(f, "{dict:?}"),
346 Object::Stream(stream) => write!(f, "{:?}stream...endstream", stream.dict),
347 Object::Reference(id) => write!(f, "{} {} R", id.0, id.1),
348 }
349 }
350}
351
352impl Dictionary {
353 pub fn new() -> Dictionary {
354 Dictionary(IndexMap::new())
355 }
356
357 pub fn has(&self, key: &[u8]) -> bool {
358 self.0.contains_key(key)
359 }
360
361 pub fn get(&self, key: &[u8]) -> Result<&Object> {
362 self.0
363 .get(key)
364 .ok_or(Error::DictKey(String::from_utf8_lossy(key).to_string()))
365 }
366
367 pub fn get_deref<'a>(&'a self, key: &[u8], doc: &'a Document) -> Result<&'a Object> {
370 doc.dereference(self.get(key)?).map(|(_, object)| object)
371 }
372
373 pub fn get_mut(&mut self, key: &[u8]) -> Result<&mut Object> {
374 self.0
375 .get_mut(key)
376 .ok_or(Error::DictKey(String::from_utf8_lossy(key).to_string()))
377 }
378
379 pub fn set<K, V>(&mut self, key: K, value: V)
380 where
381 K: Into<Vec<u8>>,
382 V: Into<Object>,
383 {
384 self.0.insert(key.into(), value.into());
385 }
386
387 pub fn len(&self) -> usize {
388 self.0.len()
389 }
390
391 pub fn is_empty(&self) -> bool {
392 self.0.len() == 0
393 }
394
395 pub fn remove(&mut self, key: &[u8]) -> Option<Object> {
396 self.0.swap_remove(key)
397 }
398
399 pub fn has_type(&self, type_name: &[u8]) -> bool {
400 self.get(b"Type").and_then(|s| s.as_name()).ok() == Some(type_name)
401 }
402
403 pub fn get_type(&self) -> Result<&[u8]> {
404 self.get(b"Type")
405 .and_then(Object::as_name)
406 .or_else(|_| self.get(b"Linearized").and(Ok(b"Linearized")))
407 }
408
409 pub fn iter(&'_ self) -> indexmap::map::Iter<'_, Vec<u8>, Object> {
410 self.0.iter()
411 }
412
413 pub fn iter_mut(&'_ mut self) -> indexmap::map::IterMut<'_, Vec<u8>, Object> {
414 self.0.iter_mut()
415 }
416
417 pub fn get_font_encoding(&'_ self, doc: &Document) -> Result<Encoding<'_>> {
418 if !self.has_type(b"Font") {
419 return Err(Error::DictType {
420 expected: "Font",
421 found: String::from_utf8_lossy(self.get_type().unwrap_or(b"None")).to_string(),
422 });
423 }
424
425 match self.get(b"Encoding").and_then(Object::as_name) {
432 Ok(b"StandardEncoding") => Ok(Encoding::OneByteEncoding(&encodings::STANDARD_ENCODING)),
433 Ok(b"MacRomanEncoding") => {
434 Ok(Encoding::OneByteEncoding(&encodings::MAC_ROMAN_ENCODING))
435 }
436 Ok(b"MacExpertEncoding") => {
437 Ok(Encoding::OneByteEncoding(&encodings::MAC_EXPERT_ENCODING))
438 }
439 Ok(b"WinAnsiEncoding") => Ok(Encoding::OneByteEncoding(&encodings::WIN_ANSI_ENCODING)),
440 Ok(b"PDFDocEncoding") => {
441 log::warn!("PDFDocEncoding is not a valid character encoding for a font");
442 Ok(Encoding::OneByteEncoding(&encodings::PDF_DOC_ENCODING))
443 }
444 Ok(b"Identity-H") | Ok(b"Identity-V") => {
445 let stream = self.get_deref(b"ToUnicode", doc)?.as_stream()?;
446 self.get_encoding_from_to_unicode_cmap(stream)
447 }
448 Ok(name) => Ok(Encoding::SimpleEncoding(name)),
449 Err(err) => {
450 warn!(
451 "Could not parse the encoding, error: {err:#?}\nFont: {self:#?}\nTrying to retrieve ToUnicode."
452 );
453 let stream = self
454 .get_deref(b"ToUnicode", doc)
455 .and_then(Object::as_stream);
456 if let Ok(stream) = stream {
457 return self.get_encoding_from_to_unicode_cmap(stream);
458 }
459
460 warn!("Using standard encoding as a fallback!");
461 Ok(Encoding::OneByteEncoding(&encodings::STANDARD_ENCODING))
462 }
463 }
464 }
465
466 fn get_encoding_from_to_unicode_cmap(&'_ self, stream: &Stream) -> Result<Encoding<'_>> {
467 let content = stream.get_plain_content()?;
468 let cmap = ToUnicodeCMap::parse(content)?;
469 Ok(Encoding::UnicodeMapEncoding(cmap))
470 }
471
472 pub fn extend(&mut self, other: &Dictionary) {
473 let keep_both_objects = |new_dict: &mut IndexMap<Vec<u8>, Object>,
474 key: &Vec<u8>,
475 value: &Object,
476 old_value: Object| {
477 let mut final_array;
478
479 match value {
480 Object::Array(array) => {
481 final_array = Vec::with_capacity(array.len() + 1);
482 final_array.push(old_value);
483 final_array.extend(array.to_owned());
484 }
485 _ => {
486 final_array = vec![value.to_owned(), old_value];
487 }
488 }
489
490 new_dict.insert(key.to_owned(), Object::Array(final_array));
491 };
492
493 let mut new_dict = std::mem::take(&mut self.0);
494 new_dict.reserve_exact(other.0.len());
495
496 for (key, value) in other.0.iter() {
497 if let Some(old_value) = new_dict.get(key) {
498 let old_value = old_value.to_owned();
499 match (&old_value, value) {
500 (Object::Dictionary(old_dict), Object::Dictionary(dict)) => {
501 let mut replaced_dict = old_dict.to_owned();
502 replaced_dict.extend(dict);
503 new_dict.insert(key.to_owned(), Object::Dictionary(replaced_dict));
504 }
505 (Object::Array(old_array), Object::Array(array)) => {
506 let mut replaced_array = old_array.to_owned();
507 replaced_array.extend(array.to_owned());
508 new_dict.insert(key.to_owned(), Object::Array(replaced_array));
509 }
510 (Object::Integer(old_id), Object::Integer(id)) => {
511 let array = vec![Object::Integer(*old_id), Object::Integer(*id)];
512 new_dict.insert(key.to_owned(), Object::Array(array));
513 }
514 (Object::Real(old_id), Object::Real(id)) => {
515 let array = vec![Object::Real(*old_id), Object::Real(*id)];
516 new_dict.insert(key.to_owned(), Object::Array(array));
517 }
518 (Object::String(old_ids, old_format), Object::String(ids, format)) => {
519 let array = vec![
520 Object::String(old_ids.to_owned(), old_format.to_owned()),
521 Object::String(ids.to_owned(), format.to_owned()),
522 ];
523 new_dict.insert(key.to_owned(), Object::Array(array));
524 }
525 (Object::Reference(old_object_id), Object::Reference(object_id)) => {
526 let array = vec![
527 Object::Reference(*old_object_id),
528 Object::Reference(*object_id),
529 ];
530 new_dict.insert(key.to_owned(), Object::Array(array));
531 }
532 (Object::Null, _)
533 | (Object::Boolean(_), _)
534 | (Object::Name(_), _)
535 | (Object::Stream(_), _) => {
536 new_dict.insert(key.to_owned(), old_value);
537 }
538 (_, _) => keep_both_objects(&mut new_dict, key, value, old_value),
539 }
540 } else {
541 new_dict.insert(key.to_owned(), value.to_owned());
542 }
543 }
544
545 self.0 = new_dict;
546 }
547
548 pub fn as_hashmap(&self) -> &IndexMap<Vec<u8>, Object> {
550 &self.0
551 }
552
553 pub fn as_hashmap_mut(&mut self) -> &mut IndexMap<Vec<u8>, Object> {
555 &mut self.0
556 }
557}
558
559#[macro_export]
560macro_rules! dictionary {
561 () => {
562 $crate::Dictionary::new()
563 };
564 ($( $key: expr => $value: expr ),+ ,) => {
565 dictionary!( $($key => $value),+ )
566 };
567 ($( $key: expr => $value: expr ),*) => {{
568 let mut dict = $crate::Dictionary::new();
569 $(
570 dict.set($key, $value);
571 )*
572 dict
573 }}
574}
575
576impl fmt::Debug for Dictionary {
577 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
578 let entries = self
579 .into_iter()
580 .map(|(key, value)| format!("/{} {:?}", String::from_utf8_lossy(key), value))
581 .collect::<Vec<String>>();
582 write!(f, "<<{}>>", entries.concat())
583 }
584}
585
586impl IntoIterator for Dictionary {
587 type Item = (Vec<u8>, Object);
588 type IntoIter = indexmap::map::IntoIter<Vec<u8>, Object>;
589
590 fn into_iter(self) -> Self::IntoIter {
591 self.0.into_iter()
592 }
593}
594
595impl<'a> IntoIterator for &'a Dictionary {
596 type Item = (&'a Vec<u8>, &'a Object);
597 type IntoIter = indexmap::map::Iter<'a, Vec<u8>, Object>;
598
599 fn into_iter(self) -> Self::IntoIter {
600 self.0.iter()
601 }
602}
603
604impl<'a> IntoIterator for &'a mut Dictionary {
605 type Item = (&'a Vec<u8>, &'a mut Object);
606 type IntoIter = indexmap::map::IterMut<'a, Vec<u8>, Object>;
607
608 fn into_iter(self) -> Self::IntoIter {
609 self.0.iter_mut()
610 }
611}
612
613use std::iter::FromIterator;
614impl<K: Into<Vec<u8>>> FromIterator<(K, Object)> for Dictionary {
615 fn from_iter<I: IntoIterator<Item = (K, Object)>>(iter: I) -> Self {
616 let mut dict = Dictionary::new();
617 for (k, v) in iter {
618 dict.set(k, v);
619 }
620 dict
621 }
622}
623
624impl Stream {
625 pub fn new(mut dict: Dictionary, content: Vec<u8>) -> Stream {
626 dict.set("Length", content.len() as i64);
627 Stream {
628 dict,
629 content,
630 allows_compression: true,
631 start_position: None,
632 }
633 }
634
635 pub fn with_position(dict: Dictionary, position: usize) -> Stream {
636 Stream {
637 dict,
638 content: vec![],
639 allows_compression: true,
640 start_position: Some(position),
641 }
642 }
643
644 #[inline]
647 pub fn with_compression(mut self, allows_compression: bool) -> Stream {
648 self.allows_compression = allows_compression;
649 self
650 }
651
652 pub fn filters(&self) -> Result<Vec<&[u8]>> {
653 let filter = self.dict.get(b"Filter")?;
654
655 if let Ok(name) = filter.as_name() {
656 Ok(vec![name])
657 } else if let Ok(names) = filter.as_array() {
658 names.iter().map(Object::as_name).collect()
659 } else {
660 Err(Error::ObjectType {
661 expected: "Name or Array",
662 found: filter.enum_variant(),
663 })
664 }
665 }
666
667 pub fn set_content(&mut self, content: Vec<u8>) {
668 self.content = content;
669 self.dict.set("Length", self.content.len() as i64);
670 }
671
672 pub fn set_plain_content(&mut self, content: Vec<u8>) {
673 self.dict.remove(b"DecodeParms");
674 self.dict.remove(b"Filter");
675 self.dict.set("Length", content.len() as i64);
676 self.content = content;
677 }
678
679 pub fn get_plain_content(&self) -> Result<Vec<u8>> {
680 match self.filters() {
681 Ok(vec) if !vec.is_empty() => self.decompressed_content(),
682 _ => Ok(self.content.clone()),
683 }
684 }
685
686 pub fn compress(&mut self) -> Result<()> {
687 self.compress_with_level(9)
688 }
689
690 pub fn compress_with_level(&mut self, level: u32) -> Result<()> {
694 use flate2::Compression;
695 use flate2::write::ZlibEncoder;
696 use std::io::prelude::*;
697
698 if self.dict.get(b"Filter").is_err() {
699 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::new(level));
700 encoder.write_all(self.content.as_slice())?;
701 let compressed = encoder.finish()?;
702 if compressed.len() + 19 < self.content.len() {
703 self.dict.set("Filter", "FlateDecode");
704 self.set_content(compressed);
705 }
706 }
707 Ok(())
708 }
709
710 pub fn decompressed_content(&self) -> Result<Vec<u8>> {
711 let params = self.dict.get(b"DecodeParms").and_then(Object::as_dict).ok();
712 let filters = self.filters()?;
713
714 let mut input = self.content.as_slice();
715 let mut output = vec![];
716
717 for filter in filters {
719 output = match filter {
720 b"FlateDecode" => Self::decompress_zlib(input, params)?,
721 b"LZWDecode" => Self::decompress_lzw(input, params)?,
722 b"ASCII85Decode" => Self::decode_ascii85(input)?,
723 b"ASCIIHexDecode" | b"AHx" => Self::decode_ascii_hex(input)?,
724 b"RunLengthDecode" | b"RL" => Self::decode_run_length(input)?,
725 #[cfg(feature = "embed_image")]
726 b"CCITTFaxDecode" | b"CCF" => Self::decode_ccitt_fax(input, params)?,
727 #[cfg(feature = "embed_image")]
728 b"JBIG2Decode" => Self::decode_jbig2(input)?,
729 #[cfg(feature = "embed_image")]
730 b"JPXDecode" => Self::decode_jpx(input)?,
731 b"DCTDecode" | b"DCT" => input.to_vec(), _ => return Err(Error::Unimplemented("decompression algorithms")),
733 };
734 input = &output;
735 }
736 Ok(output)
737 }
738
739 fn decompress_lzw(input: &[u8], params: Option<&Dictionary>) -> Result<Vec<u8>> {
740 use weezl::{BitOrder, decode::Decoder};
741 const MIN_BITS: u8 = 9;
742
743 let early_change = params
744 .and_then(|p| p.get(b"EarlyChange").ok())
745 .and_then(|p| Object::as_i64(p).ok())
746 .map(|v| v != 0)
747 .unwrap_or(true);
748
749 let mut decoder = if early_change {
750 Decoder::with_tiff_size_switch(BitOrder::Msb, MIN_BITS - 1)
751 } else {
752 Decoder::new(BitOrder::Msb, MIN_BITS - 1)
753 };
754
755 let output = Self::decompress_lzw_loop(input, &mut decoder)?;
756 Self::decompress_predictor(output, params)
757 }
758
759 fn decompress_lzw_loop(input: &[u8], decoder: &mut weezl::decode::Decoder) -> Result<Vec<u8>> {
760 let mut output = vec![];
761
762 let result = decoder.into_stream(&mut output).decode_all(input);
763 if let Err(err) = result.status {
764 warn!("{err}");
765 }
766 if output.len() > MAX_DECOMPRESSED_BYTES {
767 return Err(Error::StreamTooLarge {
768 limit: MAX_DECOMPRESSED_BYTES,
769 });
770 }
771
772 Ok(output)
773 }
774
775 fn decompress_zlib(input: &[u8], params: Option<&Dictionary>) -> Result<Vec<u8>> {
776 use flate2::read::ZlibDecoder;
777 use std::io::prelude::*;
778
779 let mut output = Vec::with_capacity(input.len().min(4096) * 2);
780 let decoder = ZlibDecoder::new(input);
781
782 if !input.is_empty() {
783 decoder
785 .take(MAX_DECOMPRESSED_BYTES as u64 + 1)
786 .read_to_end(&mut output)
787 .unwrap_or_else(|err| {
788 warn!("{err}");
789 0
790 });
791 if output.len() > MAX_DECOMPRESSED_BYTES {
792 return Err(Error::StreamTooLarge {
793 limit: MAX_DECOMPRESSED_BYTES,
794 });
795 }
796 }
797 Self::decompress_predictor(output, params)
798 }
799
800 fn decode_ascii85(input: &[u8]) -> Result<Vec<u8>> {
801 let mut output = vec![];
802 let mut buffer: u32 = 0;
803 let mut count = 0;
804 let input_no_eod = if input.len() >= 2 && &input[input.len() - 2..] == b"~>" {
806 &input[..input.len() - 2]
807 } else {
808 log::warn!("ASCII85 stream is missing its EOD marker");
809 input
810 };
811 for &ch in input_no_eod {
812 if ch == b'z' {
813 if count != 0 {
814 return Err(DecompressError::Ascii85(
815 "z character is not allowed in the middle of a group",
816 )
817 .into());
818 }
819 output.extend_from_slice(&[0, 0, 0, 0]);
820 continue;
821 }
822
823 if ch.is_ascii_whitespace() {
824 continue;
825 }
826
827 if !(b'!'..=b'u').contains(&ch) {
828 break;
829 }
830 buffer = buffer
831 .checked_mul(85)
832 .ok_or(DecompressError::Ascii85("multiplication overflow"))?;
833 buffer += (ch - b'!') as u32;
834 count += 1;
835
836 if count == 5 {
837 output.extend_from_slice(&buffer.to_be_bytes());
838 buffer = 0;
839 count = 0;
840 }
841 }
842
843 if count > 0 {
844 for _ in count..5 {
845 buffer = buffer
846 .checked_mul(85)
847 .ok_or(DecompressError::Ascii85("multiplication overflow"))?;
848 buffer += 84;
849 }
850
851 let bytes = buffer.to_be_bytes();
852 output.extend_from_slice(&bytes[..count - 1]);
853 }
854
855 Ok(output)
856 }
857
858 fn decode_ascii_hex(input: &[u8]) -> Result<Vec<u8>> {
859 let mut output = Vec::with_capacity(input.len() / 2);
860 let mut hi: Option<u8> = None;
861
862 for &ch in input {
863 if ch == b'>' {
864 break; }
866 if ch.is_ascii_whitespace() {
867 continue;
868 }
869 let nibble = match ch {
870 b'0'..=b'9' => ch - b'0',
871 b'A'..=b'F' => ch - b'A' + 10,
872 b'a'..=b'f' => ch - b'a' + 10,
873 _ => return Err(DecompressError::AsciiHex("invalid hex digit").into()),
874 };
875 match hi {
876 None => hi = Some(nibble),
877 Some(h) => {
878 output.push((h << 4) | nibble);
879 hi = None;
880 }
881 }
882 }
883 if let Some(h) = hi {
885 output.push(h << 4);
886 }
887 Ok(output)
888 }
889
890 fn decode_run_length(input: &[u8]) -> Result<Vec<u8>> {
891 let mut output = Vec::new();
892 let mut i = 0;
893 while i < input.len() {
894 let length = input[i];
895 i += 1;
896 match length {
897 128 => break, 0..=127 => {
899 let count = length as usize + 1;
900 let end = (i + count).min(input.len());
901 output.extend_from_slice(&input[i..end]);
902 i = end;
903 }
904 _ => {
905 if i >= input.len() {
907 break;
908 }
909 let count = 257 - length as usize;
910 let byte = input[i];
911 i += 1;
912 output.extend(std::iter::repeat_n(byte, count));
913 }
914 }
915 }
916 Ok(output)
917 }
918
919 #[cfg(feature = "embed_image")]
920 fn decode_ccitt_fax(input: &[u8], params: Option<&Dictionary>) -> Result<Vec<u8>> {
921 let k = params
922 .and_then(|p| p.get(b"K").ok())
923 .and_then(|o| Object::as_i64(o).ok())
924 .unwrap_or(0);
925 let columns = params
926 .and_then(|p| p.get(b"Columns").ok())
927 .and_then(|o| Object::as_i64(o).ok())
928 .unwrap_or(1728) as u32;
929 let rows = params
930 .and_then(|p| p.get(b"Rows").ok())
931 .and_then(|o| Object::as_i64(o).ok())
932 .unwrap_or(0) as u32;
933 let end_of_block = params
934 .and_then(|p| p.get(b"EndOfBlock").ok())
935 .and_then(|o| Object::as_bool(o).ok())
936 .unwrap_or(true);
937 let end_of_line = params
938 .and_then(|p| p.get(b"EndOfLine").ok())
939 .and_then(|o| Object::as_bool(o).ok())
940 .unwrap_or(false);
941 let byte_align = params
942 .and_then(|p| p.get(b"EncodedByteAlign").ok())
943 .and_then(|o| Object::as_bool(o).ok())
944 .unwrap_or(false);
945 let black_is_1 = params
946 .and_then(|p| p.get(b"BlackIs1").ok())
947 .and_then(|o| Object::as_bool(o).ok())
948 .unwrap_or(false);
949
950 let encoding = if k < 0 {
951 hayro_ccitt::EncodingMode::Group4
952 } else if k == 0 {
953 hayro_ccitt::EncodingMode::Group3_1D
954 } else {
955 hayro_ccitt::EncodingMode::Group3_2D { k: k as u32 }
956 };
957
958 let settings = hayro_ccitt::DecodeSettings {
959 columns,
960 rows,
961 end_of_block,
962 end_of_line,
963 rows_are_byte_aligned: byte_align,
964 encoding,
965 invert_black: black_is_1,
966 };
967
968 struct ByteDecoder {
969 output: Vec<u8>,
970 buffer: u8,
971 bit_count: u8,
972 }
973
974 impl ByteDecoder {
975 fn flush(&mut self) {
976 if self.bit_count > 0 {
977 self.output.push(self.buffer << (8 - self.bit_count));
978 self.buffer = 0;
979 self.bit_count = 0;
980 }
981 }
982 }
983
984 impl hayro_ccitt::Decoder for ByteDecoder {
985 fn push_pixel(&mut self, white: bool) {
986 self.buffer = (self.buffer << 1) | u8::from(white);
987 self.bit_count += 1;
988 if self.bit_count == 8 {
989 self.output.push(self.buffer);
990 self.buffer = 0;
991 self.bit_count = 0;
992 }
993 }
994
995 fn push_pixel_chunk(&mut self, white: bool, chunk_count: u32) {
996 let byte = if white { 0xFF } else { 0x00 };
997 self.output
998 .extend(std::iter::repeat_n(byte, chunk_count as usize));
999 }
1000
1001 fn next_line(&mut self) {
1002 self.flush();
1003 }
1004 }
1005
1006 let mut decoder = ByteDecoder {
1007 output: Vec::new(),
1008 buffer: 0,
1009 bit_count: 0,
1010 };
1011
1012 match hayro_ccitt::decode(input, &mut decoder, &settings) {
1013 Ok(_) => Ok(decoder.output),
1014 Err(_) if !decoder.output.is_empty() => {
1015 Ok(decoder.output)
1017 }
1018 Err(_) => Err(Error::Unimplemented("CCITTFaxDecode failed")),
1019 }
1020 }
1021
1022 #[cfg(feature = "embed_image")]
1023 fn decode_jbig2(input: &[u8]) -> Result<Vec<u8>> {
1024 let image = hayro_jbig2::decode_embedded(input, None)
1028 .map_err(|_| Error::Unimplemented("JBIG2Decode failed"))?;
1029
1030 let row_bytes = (image.width as usize).div_ceil(8);
1031 let mut packed = vec![0u8; row_bytes * image.height as usize];
1032
1033 struct InvertDecoder<'a> {
1034 data: &'a mut [u8],
1035 pos: usize,
1036 buffer: u8,
1037 bit_count: u8,
1038 }
1039
1040 impl hayro_jbig2::Decoder for InvertDecoder<'_> {
1041 fn push_pixel(&mut self, black: bool) {
1042 self.buffer = (self.buffer << 1) | u8::from(!black);
1044 self.bit_count += 1;
1045 if self.bit_count == 8 {
1046 if self.pos < self.data.len() {
1047 self.data[self.pos] = self.buffer;
1048 }
1049 self.pos += 1;
1050 self.buffer = 0;
1051 self.bit_count = 0;
1052 }
1053 }
1054
1055 fn push_pixel_chunk(&mut self, black: bool, chunk_count: u32) {
1056 let byte = if black { 0x00 } else { 0xFF };
1057 let end = (self.pos + chunk_count as usize).min(self.data.len());
1058 for b in &mut self.data[self.pos..end] {
1059 *b = byte;
1060 }
1061 self.pos = end;
1062 }
1063
1064 fn next_line(&mut self) {
1065 if self.bit_count > 0 {
1066 if self.pos < self.data.len() {
1067 self.data[self.pos] = self.buffer << (8 - self.bit_count);
1068 }
1069 self.pos += 1;
1070 self.buffer = 0;
1071 self.bit_count = 0;
1072 }
1073 }
1074 }
1075
1076 let mut decoder = InvertDecoder {
1077 data: &mut packed,
1078 pos: 0,
1079 buffer: 0,
1080 bit_count: 0,
1081 };
1082 image.decode(&mut decoder);
1083
1084 Ok(packed)
1085 }
1086
1087 #[cfg(feature = "embed_image")]
1088 fn decode_jpx(input: &[u8]) -> Result<Vec<u8>> {
1089 let settings = hayro_jpeg2000::DecodeSettings {
1090 resolve_palette_indices: false,
1091 strict: false,
1092 target_resolution: None,
1093 };
1094
1095 let image = hayro_jpeg2000::Image::new(input, &settings)
1096 .map_err(|_| Error::Unimplemented("JPXDecode failed"))?;
1097 image
1098 .decode()
1099 .map_err(|_| Error::Unimplemented("JPXDecode failed"))
1100 }
1101
1102 fn decompress_predictor(mut data: Vec<u8>, params: Option<&Dictionary>) -> Result<Vec<u8>> {
1103 use crate::filters::png;
1104
1105 if let Some(params) = params {
1106 let predictor = params
1107 .get(b"Predictor")
1108 .and_then(Object::as_i64)
1109 .unwrap_or(1);
1110 if (10..=15).contains(&predictor) {
1111 let pixels_per_row = max(
1112 1,
1113 params.get(b"Columns").and_then(Object::as_i64).unwrap_or(1),
1114 ) as usize;
1115 let colors = max(
1116 1,
1117 params.get(b"Colors").and_then(Object::as_i64).unwrap_or(1),
1118 ) as usize;
1119 let bits = max(
1120 8,
1121 params
1122 .get(b"BitsPerComponent")
1123 .and_then(Object::as_i64)
1124 .unwrap_or(8),
1125 ) as usize;
1126 let bytes_per_pixel = colors * bits / 8;
1127 data = png::decode_frame(data.as_slice(), bytes_per_pixel, pixels_per_row)?;
1128 }
1129 Ok(data)
1130 } else {
1131 Ok(data)
1132 }
1133 }
1134
1135 pub fn decompress(&mut self) -> Result<()> {
1136 let data = self.decompressed_content()?;
1137 self.dict.remove(b"DecodeParms");
1138 self.dict.remove(b"Filter");
1139 self.set_content(data);
1140 Ok(())
1141 }
1142
1143 pub fn is_compressed(&self) -> bool {
1144 self.dict.get(b"Filter").is_ok()
1145 }
1146}
1147
1148#[cfg(test)]
1149mod test {
1150 use crate::{Error, error::DecompressError};
1151
1152 use super::{MAX_DECOMPRESSED_BYTES, Stream};
1153
1154 #[test]
1155 fn test_decode_ascii85() {
1156 let input = r#"9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>Cj@.4Gp$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,O<
1157 DJ+*.@<*K0@<6L(Df-\0Ec5e;DffZ(EZee.Bl.9pF"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKYi(
1158 DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIal(
1159 DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G>u
1160 D.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>"#;
1161 let expected = "Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.";
1162 let output = Stream::decode_ascii85(input.as_bytes()).unwrap();
1163 println!("{}", String::from_utf8(output.clone()).unwrap());
1164 assert_eq!(&output, expected.as_bytes());
1165 }
1166
1167 #[test]
1168 fn test_decode_ascii85_overflow() {
1169 let input = b"uuuuu~>";
1170 let output = Stream::decode_ascii85(input);
1171 assert!(matches!(
1173 output,
1174 Err(Error::Decompress(DecompressError::Ascii85(_)))
1175 ));
1176 }
1177
1178 #[test]
1179 fn test_decode_ascii_hex() {
1180 let input = b"48656C6C6F>";
1181 let output = Stream::decode_ascii_hex(input).unwrap();
1182 assert_eq!(output, b"Hello");
1183 }
1184
1185 #[test]
1186 fn test_decode_ascii_hex_lowercase() {
1187 let input = b"48656c6c6f>";
1188 let output = Stream::decode_ascii_hex(input).unwrap();
1189 assert_eq!(output, b"Hello");
1190 }
1191
1192 #[test]
1193 fn test_decode_ascii_hex_whitespace() {
1194 let input = b"48 65 6C 6C 6F>";
1195 let output = Stream::decode_ascii_hex(input).unwrap();
1196 assert_eq!(output, b"Hello");
1197 }
1198
1199 #[test]
1200 fn test_decode_ascii_hex_odd_nibble() {
1201 let input = b"ABC>";
1203 let output = Stream::decode_ascii_hex(input).unwrap();
1204 assert_eq!(output, vec![0xAB, 0xC0]);
1205 }
1206
1207 #[test]
1208 fn test_decode_run_length() {
1209 let input = vec![4, 10, 11, 12, 13, 14, 253, 3, 128];
1210 let output = Stream::decode_run_length(&input).unwrap();
1211 assert_eq!(output, vec![10, 11, 12, 13, 14, 3, 3, 3, 3]);
1212 }
1213
1214 #[test]
1215 fn test_decode_run_length_eod() {
1216 let input = vec![0, 42, 128, 0, 99];
1218 let output = Stream::decode_run_length(&input).unwrap();
1219 assert_eq!(output, vec![42]);
1220 }
1221
1222 #[test]
1223 fn test_decode_run_length_repeat() {
1224 let input = vec![255, 0xAA, 128];
1226 let output = Stream::decode_run_length(&input).unwrap();
1227 assert_eq!(output, vec![0xAA, 0xAA]);
1228 }
1229
1230 #[test]
1240 fn decompress_zlib_within_limit_succeeds() {
1241 use flate2::Compression;
1242 use flate2::write::ZlibEncoder;
1243 use std::io::Write;
1244
1245 let plaintext = vec![0u8; 100];
1247 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
1248 encoder.write_all(&plaintext).unwrap();
1249 let compressed = encoder.finish().unwrap();
1250
1251 let result = Stream::decompress_zlib(&compressed, None);
1252 assert!(
1253 result.is_ok(),
1254 "small stream should decompress successfully"
1255 );
1256 assert_eq!(result.unwrap(), plaintext);
1257 }
1258
1259 #[test]
1261 fn stream_too_large_error_has_correct_limit() {
1262 let err = crate::Error::StreamTooLarge {
1263 limit: MAX_DECOMPRESSED_BYTES,
1264 };
1265 assert!(
1266 matches!(err, crate::Error::StreamTooLarge { limit } if limit == MAX_DECOMPRESSED_BYTES),
1267 "StreamTooLarge must carry MAX_DECOMPRESSED_BYTES as the limit"
1268 );
1269 }
1270}