1use super::encodings::Encoding;
2use super::{Bookmark, Dictionary, Object, ObjectId};
3use crate::encryption::crypt_filters::*;
4use crate::encryption::{self, EncryptionState, PasswordAlgorithm};
5use crate::xobject::PdfImage;
6use crate::xref::{Xref, XrefType};
7use crate::{Error, ObjectStream, Result, Stream};
8use log::debug;
9use std::cmp::max;
10use std::collections::{BTreeMap, HashMap, HashSet};
11use std::io::Write;
12use std::str;
13use std::sync::Arc;
14
15#[derive(Debug, Clone)]
20pub struct Document {
21 pub version: String,
23
24 pub binary_mark: Vec<u8>,
27
28 pub trailer: Dictionary,
30
31 pub reference_table: Xref,
33
34 pub objects: BTreeMap<ObjectId, Object>,
36
37 pub max_id: u32,
39
40 pub max_bookmark_id: u32,
42
43 pub bookmarks: Vec<u32>,
45
46 pub bookmark_table: HashMap<u32, Bookmark>,
49
50 pub xref_start: usize,
55
56 pub encryption_state: Option<EncryptionState>,
59
60 pub pending_obj_streams: Vec<ObjectId>,
66}
67
68impl Document {
69 pub fn new() -> Self {
71 Self {
72 version: "1.4".to_string(),
73 binary_mark: vec![0xBB, 0xAD, 0xC0, 0xDE],
74 trailer: Dictionary::new(),
75 reference_table: Xref::new(0, XrefType::CrossReferenceStream),
76 objects: BTreeMap::new(),
77 max_id: 0,
78 max_bookmark_id: 0,
79 bookmarks: Vec::new(),
80 bookmark_table: HashMap::new(),
81 xref_start: 0,
82 encryption_state: None,
83 pending_obj_streams: Vec::new(),
84 }
85 }
86
87 pub fn new_from_prev(prev: &Document) -> Self {
89 let mut new_trailer = prev.trailer.clone();
90 new_trailer.set("Prev", Object::Integer(prev.xref_start as i64));
91 Self {
92 version: "1.4".to_string(),
93 binary_mark: vec![0xBB, 0xAD, 0xC0, 0xDE],
94 trailer: new_trailer,
95 reference_table: Xref::new(0, prev.reference_table.cross_reference_type),
96 objects: BTreeMap::new(),
97 max_id: prev.max_id,
98 max_bookmark_id: prev.max_bookmark_id,
99 bookmarks: Vec::new(),
100 bookmark_table: HashMap::new(),
101 xref_start: 0,
102 encryption_state: None,
103 pending_obj_streams: Vec::new(),
104 }
105 }
106
107 const DEREF_LIMIT: usize = 128;
108
109 fn recursive_fix_pages(&mut self, bookmarks: &[u32], first: bool) -> ObjectId {
110 if !bookmarks.is_empty() {
111 for id in bookmarks {
112 let (children, mut page) = match self.bookmark_table.get(id) {
113 Some(n) => (n.children.clone(), n.page),
114 None => return (0, 0),
115 };
116
117 if 0 == page.0 && !children.is_empty() {
118 let objectid = self.recursive_fix_pages(&children[..], false);
119
120 let bookmark = self.bookmark_table.get_mut(id).unwrap();
121 bookmark.page = objectid;
122 page = objectid;
123 }
124
125 if !first && 0 != page.0 {
126 return page;
127 }
128
129 if first && !children.is_empty() {
130 self.recursive_fix_pages(&children[..], first);
131 }
132 }
133 }
134
135 (0, 0)
136 }
137
138 pub fn adjust_zero_pages(&mut self) {
144 self.recursive_fix_pages(&self.bookmarks.clone(), true);
145 }
146
147 pub fn dereference<'a>(
154 &'a self,
155 mut object: &'a Object,
156 ) -> Result<(Option<ObjectId>, &'a Object)> {
157 let mut nb_deref = 0;
158 let mut id = None;
159
160 while let Ok(ref_id) = object.as_reference() {
161 id = Some(ref_id);
162 object = self
163 .objects
164 .get(&ref_id)
165 .ok_or(Error::ObjectNotFound(ref_id))?;
166
167 nb_deref += 1;
168 if nb_deref > Self::DEREF_LIMIT {
169 return Err(Error::ReferenceLimit);
170 }
171 }
172
173 Ok((id, object))
174 }
175
176 pub fn get_object(&self, id: ObjectId) -> Result<&Object> {
178 let object = self.objects.get(&id).ok_or(Error::ObjectNotFound(id))?;
179 self.dereference(object).map(|(_, object)| object)
180 }
181
182 pub fn has_object(&self, id: ObjectId) -> bool {
186 self.objects.contains_key(&id)
187 }
188
189 pub fn get_object_mut(&mut self, id: ObjectId) -> Result<&mut Object> {
191 let object = self.objects.get(&id).ok_or(Error::ObjectNotFound(id))?;
192 let (ref_id, _obj) = self.dereference(object)?;
193
194 let target_id = ref_id.unwrap_or(id);
195 self.objects
196 .get_mut(&target_id)
197 .ok_or(Error::ObjectNotFound(target_id))
198 }
199
200 pub fn resolve_pending_object_streams(&mut self) -> Result<()> {
209 let ids: Vec<ObjectId> = self.pending_obj_streams.drain(..).collect();
211 for container_id in ids {
212 let mut stream = self
213 .objects
214 .get(&container_id)
215 .ok_or(Error::ObjStmDecompress {
216 container_id: container_id.0,
217 })?
218 .as_stream()?
219 .clone();
220 let obj_stream =
221 ObjectStream::new(&mut stream).map_err(|_| Error::ObjStmDecompress {
222 container_id: container_id.0,
223 })?;
224 for (id, object) in obj_stream.objects {
228 if self
229 .reference_table
230 .compressed_object_belongs_to(id, container_id)
231 {
232 self.objects.entry(id).or_insert(object);
233 }
234 }
235 self.objects.remove(&container_id);
237 }
238 Ok(())
239 }
240
241 pub fn get_object_page(&self, id: ObjectId) -> Result<ObjectId> {
243 for (_, object_id) in self.get_pages() {
244 let page = self.get_object(object_id)?.as_dict()?;
245 let annots = page.get(b"Annots")?.as_array()?;
246 let mut objects_ids = annots.iter().map(Object::as_reference);
247
248 let contains = objects_ids.any(|object_id| Some(id) == object_id.ok());
249 if contains {
250 return Ok(object_id);
251 }
252 }
253
254 Err(Error::PageNumberNotFound(0))
255 }
256
257 pub fn get_dictionary(&self, id: ObjectId) -> Result<&Dictionary> {
259 self.get_object(id).and_then(Object::as_dict)
260 }
261
262 pub fn get_dictionary_mut(&mut self, id: ObjectId) -> Result<&mut Dictionary> {
264 self.get_object_mut(id).and_then(Object::as_dict_mut)
265 }
266
267 pub fn get_dict_in_dict<'a>(
269 &'a self,
270 node: &'a Dictionary,
271 key: &[u8],
272 ) -> Result<&'a Dictionary> {
273 match node.get(key)? {
274 Object::Reference(object_id) => self.get_dictionary(*object_id),
275 Object::Dictionary(dic) => Ok(dic),
276 obj => Err(Error::ObjectType {
277 expected: "Dictionary",
278 found: obj.enum_variant(),
279 }),
280 }
281 }
282
283 pub fn traverse_objects<A: Fn(&mut Object)>(&mut self, action: A) -> Vec<ObjectId> {
285 fn traverse_array<A: Fn(&mut Object)>(
286 array: &mut [Object],
287 action: &A,
288 refs: &mut Vec<ObjectId>,
289 ) {
290 for item in array.iter_mut() {
291 traverse_object(item, action, refs);
292 }
293 }
294 fn traverse_dictionary<A: Fn(&mut Object)>(
295 dict: &mut Dictionary,
296 action: &A,
297 refs: &mut Vec<ObjectId>,
298 ) {
299 for (_, v) in dict.iter_mut() {
300 traverse_object(v, action, refs);
301 }
302 }
303 fn traverse_object<A: Fn(&mut Object)>(
304 object: &mut Object,
305 action: &A,
306 refs: &mut Vec<ObjectId>,
307 ) {
308 action(object);
309 match object {
310 Object::Array(array) => traverse_array(array, action, refs),
311 Object::Dictionary(dict) => traverse_dictionary(dict, action, refs),
312 Object::Stream(stream) => traverse_dictionary(&mut stream.dict, action, refs),
313 Object::Reference(id) if !refs.contains(id) => {
314 refs.push(*id);
315 }
316 _ => {}
317 }
318 }
319 let mut refs = vec![];
320 traverse_dictionary(&mut self.trailer, &action, &mut refs);
321 let mut index = 0;
322 while index < refs.len() {
323 if let Some(object) = self.objects.get_mut(&refs[index]) {
324 traverse_object(object, &action, &mut refs);
325 }
326 index += 1;
327 }
328 refs
329 }
330
331 pub fn get_encrypted(&self) -> Result<&Dictionary> {
333 self.trailer
334 .get(b"Encrypt")
335 .and_then(Object::as_reference)
336 .and_then(|id| self.get_dictionary(id))
337 }
338
339 pub fn is_encrypted(&self) -> bool {
341 self.get_encrypted().is_ok()
342 }
343
344 pub fn was_encrypted(&self) -> bool {
346 self.encryption_state.is_some()
347 }
348
349 pub fn authenticate_raw_owner_password<P>(&self, password: P) -> Result<()>
351 where
352 P: AsRef<[u8]>,
353 {
354 if !self.is_encrypted() {
355 return Err(Error::NotEncrypted);
356 }
357
358 let password = password.as_ref();
359 let algorithm = PasswordAlgorithm::try_from(self)?;
360 algorithm.authenticate_owner_password(self, password)?;
361
362 Ok(())
363 }
364
365 pub fn authenticate_raw_user_password<P>(&self, password: P) -> Result<()>
367 where
368 P: AsRef<[u8]>,
369 {
370 if !self.is_encrypted() {
371 return Err(Error::NotEncrypted);
372 }
373
374 let password = password.as_ref();
375 let algorithm = PasswordAlgorithm::try_from(self)?;
376 algorithm.authenticate_user_password(self, password)?;
377
378 Ok(())
379 }
380
381 pub fn authenticate_raw_password<P>(&self, password: P) -> Result<()>
383 where
384 P: AsRef<[u8]>,
385 {
386 if !self.is_encrypted() {
387 return Err(Error::NotEncrypted);
388 }
389
390 let password = password.as_ref();
391 let algorithm = PasswordAlgorithm::try_from(self)?;
392 algorithm
393 .authenticate_owner_password(self, password)
394 .or(algorithm.authenticate_user_password(self, password))?;
395
396 Ok(())
397 }
398
399 pub fn authenticate_owner_password(&self, password: &str) -> Result<()> {
401 if !self.is_encrypted() {
402 return Err(Error::NotEncrypted);
403 }
404
405 let algorithm = PasswordAlgorithm::try_from(self)?;
406 let password = algorithm.sanitize_password(password)?;
407 algorithm.authenticate_owner_password(self, &password)?;
408
409 Ok(())
410 }
411
412 pub fn authenticate_user_password(&self, password: &str) -> Result<()> {
414 if !self.is_encrypted() {
415 return Err(Error::NotEncrypted);
416 }
417
418 let algorithm = PasswordAlgorithm::try_from(self)?;
419 let password = algorithm.sanitize_password(password)?;
420 algorithm.authenticate_user_password(self, &password)?;
421
422 Ok(())
423 }
424
425 pub fn authenticate_password(&self, password: &str) -> Result<()> {
427 if !self.is_encrypted() {
428 return Err(Error::NotEncrypted);
429 }
430
431 let algorithm = PasswordAlgorithm::try_from(self)?;
432 let password = algorithm.sanitize_password(password)?;
433 algorithm
434 .authenticate_owner_password(self, &password)
435 .or(algorithm.authenticate_user_password(self, &password))?;
436
437 Ok(())
438 }
439
440 pub fn get_crypt_filters(&self) -> BTreeMap<Vec<u8>, Arc<dyn CryptFilter>> {
442 let mut crypt_filters = BTreeMap::new();
443
444 if let Ok(filters) = self
445 .get_encrypted()
446 .and_then(|dict| dict.get(b"CF"))
447 .and_then(|object| object.as_dict())
448 {
449 for (name, filter) in filters {
450 let Ok(filter) = filter.as_dict() else {
451 continue;
452 };
453
454 if filter.get(b"Type").is_ok() && !filter.has_type(b"CryptFilter") {
455 continue;
456 }
457
458 let cfm = filter.get(b"CFM").and_then(|object| object.as_name()).ok();
460
461 let crypt_filter: Arc<dyn CryptFilter> = match cfm {
462 Some(b"V2") => Arc::new(Rc4CryptFilter),
465 Some(b"AESV2") => Arc::new(Aes128CryptFilter),
471 Some(b"AESV3") => Arc::new(Aes256CryptFilter),
477 Some(b"Identity") | None => Arc::new(IdentityCryptFilter),
480 _ => continue,
482 };
483
484 crypt_filters.insert(name.to_vec(), crypt_filter);
485 }
486 }
487
488 crypt_filters
489 }
490
491 pub fn encrypt(&mut self, state: &EncryptionState) -> Result<()> {
493 if self.is_encrypted() {
494 return Err(Error::AlreadyEncrypted);
495 }
496
497 let encrypted = state.encode()?;
498
499 for (&id, obj) in self.objects.iter_mut() {
500 encryption::encrypt_object(state, id, obj)?;
501 }
502
503 let object_id = self.add_object(encrypted);
504 self.trailer.set(b"Encrypt", Object::Reference(object_id));
505 self.encryption_state = None;
506
507 Ok(())
508 }
509
510 pub fn decrypt(&mut self, password: &str) -> Result<()> {
512 if !self.is_encrypted() {
513 return Err(Error::NotEncrypted);
514 }
515
516 let algorithm = PasswordAlgorithm::try_from(&*self)?;
517 let password = algorithm.sanitize_password(password)?;
518 self.decrypt_raw(&password)
519 }
520
521 pub fn decrypt_raw<P>(&mut self, password: P) -> Result<()>
524 where
525 P: AsRef<[u8]>,
526 {
527 if !self.is_encrypted() {
528 return Err(Error::NotEncrypted);
529 }
530
531 self.authenticate_raw_password(&password)?;
532
533 let encryption_obj_id = self
535 .trailer
536 .get(b"Encrypt")
537 .and_then(Object::as_reference)?;
538
539 let state = EncryptionState::decode(&*self, password)?;
540
541 for (&id, obj) in self.objects.iter_mut() {
542 if id == encryption_obj_id {
544 continue;
545 }
546
547 encryption::decrypt_object(&state, id, obj)?;
548 }
549
550 let mut object_streams = vec![];
552
553 for (_, object) in self.objects.iter_mut() {
554 let Ok(ref mut stream) = object.as_stream_mut() else {
555 continue;
556 };
557
558 if !stream.dict.has_type(b"ObjStm") {
559 continue;
560 }
561
562 let Some(obj_stream) = ObjectStream::new(stream).ok() else {
563 continue;
564 };
565
566 object_streams.extend(obj_stream.objects);
569 }
570
571 for (id, entry) in object_streams {
573 self.objects.entry(id).or_insert(entry);
574 }
575
576 let object_id = self.trailer.remove(b"Encrypt").unwrap().as_reference()?;
577 self.objects.remove(&object_id);
578
579 self.encryption_state = Some(state);
580
581 Ok(())
582 }
583
584 pub fn catalog(&self) -> Result<&Dictionary> {
586 self.trailer
587 .get(b"Root")
588 .and_then(Object::as_reference)
589 .and_then(|id| self.get_dictionary(id))
590 }
591
592 pub fn catalog_mut(&mut self) -> Result<&mut Dictionary> {
595 self.trailer
596 .get(b"Root")
597 .and_then(Object::as_reference)
598 .and_then(move |id| self.get_dictionary_mut(id))
599 }
600
601 pub fn get_pages(&self) -> BTreeMap<u32, ObjectId> {
603 self.page_iter()
604 .enumerate()
605 .map(|(i, p)| ((i + 1) as u32, p))
606 .collect()
607 }
608
609 pub fn page_iter(&self) -> impl Iterator<Item = ObjectId> + '_ {
610 PageTreeIter::new(self)
611 }
612
613 pub fn get_page_contents(&self, page_id: ObjectId) -> Vec<ObjectId> {
615 let mut streams = vec![];
616 if let Ok(page) = self.get_dictionary(page_id) {
617 let mut nb_deref = 0;
618 if let Ok(mut contents) = page.get(b"Contents") {
621 loop {
622 match contents {
623 Object::Reference(id) => match self.objects.get(id) {
624 None | Some(Object::Stream(_)) => {
625 streams.push(*id);
626 }
627 Some(o) => {
628 nb_deref += 1;
629 if nb_deref < Self::DEREF_LIMIT {
630 contents = o;
631 continue;
632 }
633 }
634 },
635 Object::Array(arr) => {
636 for content in arr {
637 if let Ok(id) = content.as_reference() {
638 streams.push(id)
639 }
640 }
641 }
642 _ => {}
643 }
644 break;
645 }
646 }
647 }
648 streams
649 }
650
651 pub fn add_page_contents(&mut self, page_id: ObjectId, content: Vec<u8>) -> Result<()> {
653 let page = self.get_dictionary(page_id)?;
654 let mut current_content_list: Vec<Object> = match page.get(b"Contents") {
655 Ok(Object::Reference(id)) => {
656 vec![Object::Reference(*id)]
657 }
658 Ok(Object::Array(arr)) => arr.clone(),
659 _ => vec![],
660 };
661 let content_object_id =
662 self.add_object(Object::Stream(Stream::new(Dictionary::new(), content)));
663 current_content_list.push(Object::Reference(content_object_id));
664
665 let page_mut = self.get_object_mut(page_id).and_then(Object::as_dict_mut)?;
666 page_mut.set("Contents", current_content_list);
667 Ok(())
668 }
669
670 pub fn get_page_content(&self, page_id: ObjectId) -> Result<Vec<u8>> {
672 let mut content = Vec::new();
673 let content_streams = self.get_page_contents(page_id);
674 for object_id in content_streams {
675 if let Ok(content_stream) = self.get_object(object_id).and_then(Object::as_stream) {
676 match content_stream.decompressed_content() {
677 Ok(data) => content.write_all(&data)?,
678 Err(_) => content.write_all(&content_stream.content)?,
679 };
680 }
681 }
682 Ok(content)
683 }
684
685 pub fn get_page_resources(
687 &self,
688 page_id: ObjectId,
689 ) -> Result<(Option<&Dictionary>, Vec<ObjectId>)> {
690 fn collect_resources(
691 page_node: &Dictionary,
692 resource_ids: &mut Vec<ObjectId>,
693 doc: &Document,
694 already_seen: &mut HashSet<ObjectId>,
695 ) -> Result<()> {
696 if let Ok(resource_id) = page_node.get(b"Resources").and_then(Object::as_reference) {
697 resource_ids.push(resource_id);
698 }
699 if let Ok(parent_id) = page_node.get(b"Parent").and_then(Object::as_reference) {
700 if already_seen.contains(&parent_id) {
701 return Err(Error::ReferenceCycle(parent_id));
702 }
703 already_seen.insert(parent_id);
704 let parent_dict = doc.get_dictionary(parent_id)?;
705 collect_resources(parent_dict, resource_ids, doc, already_seen)?;
706 }
707 Ok(())
708 }
709
710 let mut resource_dict = None;
711 let mut resource_ids = Vec::new();
712 if let Ok(page) = self.get_dictionary(page_id) {
713 resource_dict = page.get(b"Resources").and_then(Object::as_dict).ok();
714 collect_resources(page, &mut resource_ids, self, &mut HashSet::new())?;
715 }
716 Ok((resource_dict, resource_ids))
717 }
718
719 pub fn get_page_fonts(&self, page_id: ObjectId) -> Result<BTreeMap<Vec<u8>, &Dictionary>> {
721 fn collect_fonts_from_resources<'a>(
722 resources: &'a Dictionary,
723 fonts: &mut BTreeMap<Vec<u8>, &'a Dictionary>,
724 doc: &'a Document,
725 ) {
726 if let Ok(font) = resources.get(b"Font") {
727 let font_dict = match font {
728 Object::Reference(id) => doc.get_object(*id).and_then(Object::as_dict).ok(),
729 Object::Dictionary(dict) => Some(dict),
730 _ => None,
731 };
732 if let Some(font_dict) = font_dict {
733 for (name, value) in font_dict.iter() {
734 let font = match value {
735 Object::Reference(id) => doc.get_dictionary(*id).ok(),
736 Object::Dictionary(dict) => Some(dict),
737 _ => None,
738 };
739 if !fonts.contains_key(name) {
740 font.map(|font| fonts.insert(name.clone(), font));
741 }
742 }
743 }
744 }
745 }
746
747 let mut fonts = BTreeMap::new();
748 let (resource_dict, resource_ids) = self.get_page_resources(page_id)?;
749 if let Some(resources) = resource_dict {
750 collect_fonts_from_resources(resources, &mut fonts, self);
751 }
752 for resource_id in resource_ids {
753 if let Ok(resources) = self.get_dictionary(resource_id) {
754 collect_fonts_from_resources(resources, &mut fonts, self);
755 }
756 }
757 Ok(fonts)
758 }
759
760 pub fn get_page_annotations(&self, page_id: ObjectId) -> Result<Vec<&Dictionary>> {
764 let mut annotations = vec![];
765 if let Ok(page) = self.get_dictionary(page_id) {
766 match page.get(b"Annots") {
767 Ok(Object::Reference(id)) => self
768 .get_object(*id)
769 .and_then(Object::as_array)?
770 .iter()
771 .flat_map(Object::as_reference)
772 .flat_map(|id| self.get_dictionary(id))
773 .for_each(|a| annotations.push(a)),
774 Ok(Object::Array(a)) => a
775 .iter()
776 .flat_map(Object::as_reference)
777 .flat_map(|id| self.get_dictionary(id))
778 .for_each(|a| annotations.push(a)),
779 _ => {}
780 }
781 }
782 Ok(annotations)
783 }
784
785 pub fn get_page_images(&'_ self, page_id: ObjectId) -> Result<Vec<PdfImage<'_>>> {
786 let mut images = vec![];
787 if let Ok(page) = self.get_dictionary(page_id) {
788 let resources = self.get_dict_in_dict(page, b"Resources")?;
789 let xobject = match self.get_dict_in_dict(resources, b"XObject") {
790 Ok(xobject) => xobject,
791 Err(err) => match err {
792 Error::DictKey(_) => return Ok(Vec::default()),
794 _ => Err(err)?,
795 },
796 };
797
798 for (_, xvalue) in xobject.iter() {
799 let id = xvalue.as_reference()?;
800 let xvalue = self.get_object(id)?;
801 let xvalue = xvalue.as_stream()?;
802 let dict = &xvalue.dict;
803 if dict.get(b"Subtype")?.as_name()? != b"Image" {
804 continue;
805 }
806 let width = dict.get(b"Width")?.as_i64()?;
807 let height = dict.get(b"Height")?.as_i64()?;
808 let color_space = match dict.get(b"ColorSpace") {
809 Ok(cs) => match cs {
810 Object::Array(array) => {
811 Some(String::from_utf8_lossy(array[0].as_name()?).to_string())
812 }
813 Object::Name(name) => Some(String::from_utf8_lossy(name).to_string()),
814 _ => None,
815 },
816 Err(_) => None,
817 };
818 let bits_per_component = match dict.get(b"BitsPerComponent") {
819 Ok(bpc) => Some(bpc.as_i64()?),
820 Err(_) => None,
821 };
822 let mut filters = vec![];
823 if let Ok(filter) = dict.get(b"Filter") {
824 match filter {
825 Object::Array(array) => {
826 for obj in array.iter() {
827 let name = obj.as_name()?;
828 filters.push(String::from_utf8_lossy(name).to_string());
829 }
830 }
831 Object::Name(name) => {
832 filters.push(String::from_utf8_lossy(name).to_string());
833 }
834 _ => {}
835 }
836 };
837
838 images.push(PdfImage {
839 id,
840 width,
841 height,
842 color_space,
843 bits_per_component,
844 filters: Some(filters),
845 content: &xvalue.content,
846 origin_dict: &xvalue.dict,
847 });
848 }
849 }
850 Ok(images)
851 }
852
853 pub fn decode_text(encoding: &Encoding, bytes: &[u8]) -> Result<String> {
854 debug!("Decoding text with {encoding:#?}");
855 encoding.bytes_to_string(bytes)
856 }
857
858 pub fn encode_text(encoding: &Encoding, text: &str) -> Vec<u8> {
859 encoding.string_to_bytes(text)
860 }
861}
862
863impl Default for Document {
864 fn default() -> Self {
865 Self::new()
866 }
867}
868
869struct PageTreeIter<'a> {
870 doc: &'a Document,
871 stack: Vec<&'a [Object]>,
872 kids: Option<&'a [Object]>,
873 iter_limit: usize,
874}
875
876impl<'a> PageTreeIter<'a> {
877 const PAGE_TREE_DEPTH_LIMIT: usize = 256;
878
879 fn new(doc: &'a Document) -> Self {
880 if let Ok(page_tree_id) = doc
881 .catalog()
882 .and_then(|cat| cat.get(b"Pages"))
883 .and_then(Object::as_reference)
884 {
885 Self {
886 doc,
887 kids: Self::kids(doc, page_tree_id),
888 stack: Vec::with_capacity(32),
889 iter_limit: doc.objects.len(),
890 }
891 } else {
892 Self {
893 doc,
894 kids: None,
895 stack: Vec::new(),
896 iter_limit: doc.objects.len(),
897 }
898 }
899 }
900
901 fn kids(doc: &Document, page_tree_id: ObjectId) -> Option<&[Object]> {
902 doc.get_dictionary(page_tree_id)
903 .and_then(|page_tree| page_tree.get_deref(b"Kids", doc))
904 .and_then(Object::as_array)
905 .map(|k| k.as_slice())
906 .ok()
907 }
908}
909
910impl Iterator for PageTreeIter<'_> {
911 type Item = ObjectId;
912
913 fn next(&mut self) -> Option<Self::Item> {
914 loop {
915 while let Some((kid, new_kids)) = self.kids.and_then(|k| k.split_first()) {
916 if self.iter_limit == 0 {
917 return None;
918 }
919 self.iter_limit -= 1;
920
921 self.kids = Some(new_kids);
922
923 if let Ok(kid_id) = kid.as_reference() {
924 if let Ok(type_name) = self
925 .doc
926 .get_dictionary(kid_id)
927 .and_then(Dictionary::get_type)
928 {
929 match type_name {
930 b"Page" => {
931 return Some(kid_id);
932 }
933 b"Pages" if self.stack.len() < Self::PAGE_TREE_DEPTH_LIMIT => {
934 let kids = self.kids.unwrap();
935 if !kids.is_empty() {
936 self.stack.push(kids);
937 }
938 self.kids = Self::kids(self.doc, kid_id);
939 }
940 _ => {}
941 }
942 }
943 }
944 }
945
946 if let kids @ Some(_) = self.stack.pop() {
948 self.kids = kids;
949 } else {
950 return None;
951 }
952 }
953 }
954
955 fn size_hint(&self) -> (usize, Option<usize>) {
956 let kids = self.kids.unwrap_or(&[]);
957
958 let nb_pages: usize = kids
959 .iter()
960 .chain(self.stack.iter().flat_map(|k| k.iter()))
961 .map(|kid| {
962 if let Ok(dict) = kid
963 .as_reference()
964 .and_then(|id| self.doc.get_dictionary(id))
965 {
966 if let Ok(b"Pages") = dict.get_type() {
967 let count = dict
968 .get_deref(b"Count", self.doc)
969 .and_then(Object::as_i64)
970 .unwrap_or(0);
971 max(0, count) as usize
973 } else {
974 1
975 }
976 } else {
977 1
978 }
979 })
980 .sum();
981
982 (nb_pages, Some(nb_pages))
983 }
984}
985
986impl std::iter::FusedIterator for PageTreeIter<'_> {}