1use std::collections::{HashMap, HashSet, VecDeque};
2use std::hash::Hash;
3use std::path::Path;
4use std::sync::RwLock;
5
6use crate::crypto;
7use crate::crypto::SecurityState;
8use crate::error::{JustPdfError, Result};
9use crate::object::{self, IndirectRef, PdfDict, PdfObject};
10use crate::stream;
11use crate::tokenizer::Tokenizer;
12use crate::xref::{self, Xref, XrefEntry};
13
14enum PdfData {
20 Owned(Vec<u8>),
21 #[cfg(feature = "mmap")]
22 Mmap(memmap2::Mmap),
23}
24
25impl std::fmt::Debug for PdfData {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 match self {
28 Self::Owned(v) => f.debug_tuple("Owned").field(&v.len()).finish(),
29 #[cfg(feature = "mmap")]
30 Self::Mmap(m) => f.debug_tuple("Mmap").field(&m.len()).finish(),
31 }
32 }
33}
34
35impl PdfData {
36 fn as_bytes(&self) -> &[u8] {
37 match self {
38 Self::Owned(v) => v,
39 #[cfg(feature = "mmap")]
40 Self::Mmap(m) => m,
41 }
42 }
43}
44
45struct LruCache<K: Eq + Hash + Clone, V> {
51 map: HashMap<K, V>,
52 order: VecDeque<K>,
53 capacity: usize,
54}
55
56impl<K: Eq + Hash + Clone + std::fmt::Debug, V: std::fmt::Debug> std::fmt::Debug
57 for LruCache<K, V>
58{
59 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60 f.debug_struct("LruCache")
61 .field("len", &self.map.len())
62 .field("capacity", &self.capacity)
63 .finish()
64 }
65}
66
67impl<K: Eq + Hash + Clone, V> LruCache<K, V> {
68 fn new(capacity: usize) -> Self {
69 assert!(capacity > 0, "LruCache capacity must be > 0");
70 Self {
71 map: HashMap::with_capacity(capacity),
72 order: VecDeque::with_capacity(capacity),
73 capacity,
74 }
75 }
76
77 fn get(&mut self, key: &K) -> Option<&V> {
79 if self.map.contains_key(key) {
80 self.touch(key);
82 self.map.get(key)
83 } else {
84 None
85 }
86 }
87
88 fn insert(&mut self, key: K, value: V) {
91 if self.map.contains_key(&key) {
92 self.map.insert(key.clone(), value);
94 self.touch(&key);
95 return;
96 }
97 if self.map.len() >= self.capacity {
99 if let Some(evicted) = self.order.pop_back() {
100 self.map.remove(&evicted);
101 }
102 }
103 self.order.push_front(key.clone());
104 self.map.insert(key, value);
105 }
106
107 #[allow(dead_code)]
108 fn contains_key(&self, key: &K) -> bool {
109 self.map.contains_key(key)
110 }
111
112 fn clear(&mut self) {
113 self.map.clear();
114 self.order.clear();
115 }
116
117 fn len(&self) -> usize {
118 self.map.len()
119 }
120
121 fn set_capacity(&mut self, capacity: usize) {
124 assert!(capacity > 0, "LruCache capacity must be > 0");
125 self.capacity = capacity;
126 while self.map.len() > self.capacity {
127 if let Some(evicted) = self.order.pop_back() {
128 self.map.remove(&evicted);
129 }
130 }
131 }
132
133 fn touch(&mut self, key: &K) {
135 if let Some(pos) = self.order.iter().position(|k| k == key) {
136 self.order.remove(pos);
137 }
138 self.order.push_front(key.clone());
139 }
140}
141
142const DEFAULT_CACHE_CAPACITY: usize = 2048;
144
145pub struct PdfDocument {
151 pub version: (u8, u8),
153 pub xref: Xref,
155 data: PdfData,
157 objects: RwLock<LruCache<IndirectRef, PdfObject>>,
159 security: Option<SecurityState>,
161 decoded_obj_streams: RwLock<HashMap<u32, Vec<u8>>>,
163}
164
165impl std::fmt::Debug for PdfDocument {
166 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
167 let obj_cache_len = self
168 .objects
169 .read()
170 .map(|c| c.len())
171 .unwrap_or(0);
172 f.debug_struct("PdfDocument")
173 .field("version", &self.version)
174 .field("xref", &self.xref)
175 .field("data", &self.data)
176 .field("objects_cached", &obj_cache_len)
177 .field("security", &self.security)
178 .finish()
179 }
180}
181
182impl PdfDocument {
183 pub fn open(path: &Path) -> Result<Self> {
185 let data = std::fs::read(path)?;
186 Self::from_bytes(data)
187 }
188
189 pub fn from_bytes(data: Vec<u8>) -> Result<Self> {
191 Self::from_pdf_data(PdfData::Owned(data))
192 }
193
194 fn from_pdf_data(data: PdfData) -> Result<Self> {
196 let bytes = data.as_bytes();
197 if bytes.len() < 8 {
198 return Err(JustPdfError::NotPdf);
199 }
200
201 let version = parse_version(bytes)?;
203
204 let xref = xref::load_xref(bytes)?;
206
207 let mut doc = Self {
208 version,
209 xref,
210 data,
211 objects: RwLock::new(LruCache::new(DEFAULT_CACHE_CAPACITY)),
212 security: None,
213 decoded_obj_streams: RwLock::new(HashMap::new()),
214 };
215
216 doc.detect_encryption()?;
218
219 Ok(doc)
220 }
221
222 #[cfg(feature = "mmap")]
227 pub fn open_mmap(path: &Path) -> Result<Self> {
228 let file = std::fs::File::open(path)?;
229 let mmap = unsafe { memmap2::Mmap::map(&file)? };
232 Self::from_pdf_data(PdfData::Mmap(mmap))
233 }
234
235 pub(crate) fn from_raw_parts(data: Vec<u8>, xref: Xref, version: (u8, u8)) -> Self {
238 Self {
239 version,
240 xref,
241 data: PdfData::Owned(data),
242 objects: RwLock::new(LruCache::new(DEFAULT_CACHE_CAPACITY)),
243 security: None,
244 decoded_obj_streams: RwLock::new(HashMap::new()),
245 }
246 }
247
248 fn detect_encryption(&mut self) -> Result<()> {
250 let encrypt_ref = match self.xref.trailer.get_ref(b"Encrypt") {
252 Some(r) => r.clone(),
253 None => {
254 if self.xref.trailer.get_dict(b"Encrypt").is_some() {
256 return self.detect_encryption_inline();
257 }
258 return Ok(());
259 }
260 };
261
262 let encrypt_obj = self.load_object_raw(&encrypt_ref, &mut HashSet::new())?;
264 let encrypt_dict = match &encrypt_obj {
265 PdfObject::Dict(d) => d,
266 _ => {
267 return Err(JustPdfError::EncryptionError {
268 detail: "encryption object is not a dictionary".into(),
269 });
270 }
271 };
272
273 let ed = crypto::EncryptionDict::from_dict(encrypt_dict)?;
274
275 if ed.filter != b"Standard" {
277 return Err(JustPdfError::UnsupportedEncryption {
278 detail: format!(
279 "unsupported security handler: {}",
280 String::from_utf8_lossy(&ed.filter)
281 ),
282 });
283 }
284
285 let file_id = self.extract_file_id();
287
288 let mut state =
289 SecurityState::new(ed, file_id, Some(encrypt_ref.obj_num));
290
291 if let Ok(key) = crypto::auth::authenticate(&state, b"") {
293 state.file_key = Some(key);
294 }
295
296 self.security = Some(state);
297 Ok(())
298 }
299
300 fn detect_encryption_inline(&mut self) -> Result<()> {
302 let encrypt_dict = self.xref.trailer.get_dict(b"Encrypt").unwrap().clone();
303 let ed = crypto::EncryptionDict::from_dict(&encrypt_dict)?;
304
305 if ed.filter != b"Standard" {
306 return Err(JustPdfError::UnsupportedEncryption {
307 detail: format!(
308 "unsupported security handler: {}",
309 String::from_utf8_lossy(&ed.filter)
310 ),
311 });
312 }
313
314 let file_id = self.extract_file_id();
315 let mut state = SecurityState::new(ed, file_id, None);
316
317 if let Ok(key) = crypto::auth::authenticate(&state, b"") {
318 state.file_key = Some(key);
319 }
320
321 self.security = Some(state);
322 Ok(())
323 }
324
325 fn extract_file_id(&self) -> Vec<u8> {
327 if let Some(PdfObject::Array(arr)) = self.xref.trailer.get(b"ID") {
328 if let Some(PdfObject::String(id)) = arr.first() {
329 return id.clone();
330 }
331 }
332 Vec::new()
333 }
334
335 pub fn is_encrypted(&self) -> bool {
337 self.security.is_some()
338 }
339
340 pub fn is_authenticated(&self) -> bool {
342 match &self.security {
343 Some(s) => s.is_authenticated(),
344 None => true, }
346 }
347
348 pub fn authenticate(&mut self, password: &[u8]) -> Result<()> {
351 let state = match &mut self.security {
352 Some(s) => s,
353 None => return Ok(()), };
355
356 if state.is_authenticated() {
357 return Ok(()); }
359
360 let key = crypto::auth::authenticate(state, password)?;
361 state.file_key = Some(key);
362
363 self.objects.write().unwrap().clear();
365 self.decoded_obj_streams.write().unwrap().clear();
366
367 Ok(())
368 }
369
370 pub fn permissions(&self) -> Option<crypto::Permissions> {
372 self.security.as_ref().map(|s| s.permissions())
373 }
374
375 pub fn security_state(&self) -> Option<&SecurityState> {
377 self.security.as_ref()
378 }
379
380 pub fn object_count(&self) -> usize {
382 self.xref.len()
383 }
384
385 pub fn catalog_ref(&self) -> Option<&IndirectRef> {
387 self.xref.trailer.get_ref(b"Root")
388 }
389
390 pub fn trailer(&self) -> &PdfDict {
392 &self.xref.trailer
393 }
394
395 pub fn resolve(&self, iref: &IndirectRef) -> Result<PdfObject> {
403 {
405 let mut cache = self.objects.write().unwrap();
406 if let Some(obj) = cache.get(iref) {
407 return Ok(obj.clone());
408 }
409 }
410
411 if let Some(ref sec) = self.security {
413 if !sec.is_authenticated() {
414 return Err(JustPdfError::EncryptedDocument);
415 }
416 }
417
418 let obj = self.load_object(iref, &mut HashSet::new())?;
420 let result = obj.clone();
421 self.objects.write().unwrap().insert(iref.clone(), obj);
422 Ok(result)
423 }
424
425 fn load_object(
428 &self,
429 iref: &IndirectRef,
430 visited: &mut HashSet<IndirectRef>,
431 ) -> Result<PdfObject> {
432 let obj = self.load_object_raw(iref, visited)?;
433
434 if let Some(ref sec) = self.security {
436 if sec.is_authenticated() {
437 return crypto::decrypt_object(obj, sec, iref.obj_num, iref.gen_num);
438 }
439 }
440
441 Ok(obj)
442 }
443
444 fn load_object_raw(
446 &self,
447 iref: &IndirectRef,
448 visited: &mut HashSet<IndirectRef>,
449 ) -> Result<PdfObject> {
450 if !visited.insert(iref.clone()) {
451 return Err(JustPdfError::CircularReference {
452 obj_num: iref.obj_num,
453 gen_num: iref.gen_num,
454 });
455 }
456
457 let entry = self
458 .xref
459 .get(iref.obj_num)
460 .ok_or(JustPdfError::ObjectNotFound {
461 obj_num: iref.obj_num,
462 gen_num: iref.gen_num,
463 })?
464 .clone();
465
466 match entry {
467 XrefEntry::InUse { offset, .. } => {
468 let mut tokenizer = Tokenizer::new_at(self.data.as_bytes(), offset as usize);
469 let (_parsed_ref, obj) = object::parse_indirect_object(&mut tokenizer)?;
470 Ok(obj)
471 }
472 XrefEntry::Compressed {
473 obj_stream_num,
474 index_within,
475 } => self.load_compressed_object(obj_stream_num, index_within, visited),
476 XrefEntry::Free { .. } => Ok(PdfObject::Null),
477 }
478 }
479
480 fn load_compressed_object(
483 &self,
484 obj_stream_num: u32,
485 index_within: u16,
486 visited: &mut HashSet<IndirectRef>,
487 ) -> Result<PdfObject> {
488 {
490 let cache = self.decoded_obj_streams.read().unwrap();
491 if !cache.contains_key(&obj_stream_num) {
492 drop(cache); let stream_ref = IndirectRef {
495 obj_num: obj_stream_num,
496 gen_num: 0,
497 };
498
499 let stream_obj = {
501 let raw = self.load_object_raw(&stream_ref, visited)?;
502 if let Some(ref sec) = self.security {
504 if sec.is_authenticated() {
505 crypto::decrypt_object(raw, sec, obj_stream_num, 0)?
506 } else {
507 raw
508 }
509 } else {
510 raw
511 }
512 };
513
514 let (dict, raw_data) = match &stream_obj {
515 PdfObject::Stream { dict, data } => (dict, data),
516 _ => {
517 return Err(JustPdfError::InvalidObject {
518 offset: 0,
519 detail: format!("object stream {obj_stream_num} is not a stream"),
520 });
521 }
522 };
523
524 let decoded = stream::decode_stream(raw_data, dict)?;
525 self.decoded_obj_streams
526 .write()
527 .unwrap()
528 .insert(obj_stream_num, decoded);
529 }
530 }
531
532 let cache = self.decoded_obj_streams.read().unwrap();
533 let decoded = cache.get(&obj_stream_num).unwrap();
534
535 let mut tokenizer = Tokenizer::new(decoded);
542
543 let mut obj_offsets = Vec::new();
547 loop {
548 let saved_pos = tokenizer.pos();
549 let obj_num = match tokenizer.next_token()? {
550 Some(crate::tokenizer::token::Token::Integer(v)) => v as u32,
551 _ => {
552 tokenizer.seek(saved_pos);
553 break;
554 }
555 };
556 let offset = match tokenizer.next_token()? {
557 Some(crate::tokenizer::token::Token::Integer(v)) => v as usize,
558 _ => break,
559 };
560 obj_offsets.push((obj_num, offset));
561 }
562
563 let first = tokenizer.pos();
566
567 let idx = index_within as usize;
568 if idx >= obj_offsets.len() {
569 return Err(JustPdfError::ObjectNotFound {
570 obj_num: 0,
571 gen_num: 0,
572 });
573 }
574
575 let (_obj_num, obj_offset) = obj_offsets[idx];
576 let abs_offset = first + obj_offset;
577
578 let mut tokenizer = Tokenizer::new_at(decoded, abs_offset);
579 object::parse_object(&mut tokenizer)
580 }
581
582 pub fn object_refs(&self) -> impl Iterator<Item = IndirectRef> + '_ {
584 self.xref
585 .entries
586 .iter()
587 .filter_map(|(&obj_num, entry)| match entry {
588 XrefEntry::InUse { gen_num, .. } => Some(IndirectRef {
589 obj_num,
590 gen_num: *gen_num,
591 }),
592 XrefEntry::Compressed { .. } => Some(IndirectRef {
593 obj_num,
594 gen_num: 0,
595 }),
596 XrefEntry::Free { .. } => None,
597 })
598 }
599
600 pub fn decode_stream(&self, dict: &PdfDict, raw_data: &[u8]) -> Result<Vec<u8>> {
602 stream::decode_stream(raw_data, dict)
603 }
604
605 pub fn raw_data(&self) -> &[u8] {
607 self.data.as_bytes()
608 }
609
610 pub fn set_cache_capacity(&mut self, capacity: usize) {
612 self.objects.write().unwrap().set_capacity(capacity);
613 }
614
615 pub fn cached_object_count(&self) -> usize {
617 self.objects.read().unwrap().len()
618 }
619}
620
621fn parse_version(data: &[u8]) -> Result<(u8, u8)> {
623 let search_len = data.len().min(1024);
625 let needle = b"%PDF-";
626
627 for i in 0..search_len.saturating_sub(needle.len() + 3) {
628 if &data[i..i + needle.len()] == needle {
629 let major = data.get(i + 5).copied().unwrap_or(0);
630 let dot = data.get(i + 6).copied().unwrap_or(0);
631 let minor = data.get(i + 7).copied().unwrap_or(0);
632
633 if major.is_ascii_digit() && dot == b'.' && minor.is_ascii_digit() {
634 return Ok((major - b'0', minor - b'0'));
635 }
636 }
637 }
638
639 Err(JustPdfError::NotPdf)
640}
641
642#[cfg(test)]
643mod tests {
644 use super::*;
645
646 #[test]
647 fn test_parse_version() {
648 assert_eq!(parse_version(b"%PDF-1.7\n").unwrap(), (1, 7));
649 assert_eq!(parse_version(b"%PDF-2.0\n").unwrap(), (2, 0));
650 assert_eq!(parse_version(b"%PDF-1.4 stuff").unwrap(), (1, 4));
651 }
652
653 #[test]
654 fn test_parse_version_not_pdf() {
655 assert!(parse_version(b"Hello World").is_err());
656 assert!(parse_version(b"").is_err());
657 }
658
659 #[test]
660 fn test_parse_version_offset() {
661 assert_eq!(parse_version(b"\xEF\xBB\xBF%PDF-1.7\n").unwrap(), (1, 7));
663 }
664
665 fn build_minimal_pdf() -> Vec<u8> {
667 let mut pdf = Vec::new();
668 pdf.extend_from_slice(b"%PDF-1.4\n");
670
671 let obj1_offset = pdf.len();
673 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
674
675 let obj2_offset = pdf.len();
677 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n");
678
679 let obj3_offset = pdf.len();
681 pdf.extend_from_slice(
682 b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\nendobj\n",
683 );
684
685 let xref_offset = pdf.len();
687 pdf.extend_from_slice(b"xref\n");
688 pdf.extend_from_slice(b"0 4\n");
689 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
690 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
691 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
692 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
693
694 pdf.extend_from_slice(b"trailer\n<< /Size 4 /Root 1 0 R >>\n");
696 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF\n").as_bytes());
697
698 pdf
699 }
700
701 #[test]
702 fn test_open_minimal_pdf() {
703 let data = build_minimal_pdf();
704 let doc = PdfDocument::from_bytes(data).unwrap();
705
706 assert_eq!(doc.version, (1, 4));
707 assert!(doc.object_count() > 0);
708 assert!(!doc.is_encrypted());
709
710 let catalog_ref = doc.catalog_ref().unwrap().clone();
712 let catalog = doc.resolve(&catalog_ref).unwrap();
713 match &catalog {
714 PdfObject::Dict(d) => {
715 assert_eq!(d.get_name(b"Type"), Some(b"Catalog".as_slice()));
716 }
717 _ => panic!("expected dict for catalog"),
718 }
719 }
720
721 #[test]
722 fn test_not_pdf() {
723 let result = PdfDocument::from_bytes(b"Hello World, not a PDF".to_vec());
724 assert!(result.is_err());
725 }
726
727 #[test]
728 fn test_empty_file() {
729 let result = PdfDocument::from_bytes(vec![]);
730 assert!(result.is_err());
731 }
732
733 #[test]
734 fn test_truncated_pdf() {
735 let result = PdfDocument::from_bytes(b"%PDF-1.4\n".to_vec());
736 assert!(result.is_err());
737 }
738
739 #[test]
740 fn test_object_not_found() {
741 let data = build_minimal_pdf();
742 let doc = PdfDocument::from_bytes(data).unwrap();
743 let result = doc.resolve(&IndirectRef {
744 obj_num: 999,
745 gen_num: 0,
746 });
747 assert!(result.is_err());
748 }
749
750 #[test]
751 fn test_unencrypted_pdf_is_authenticated() {
752 let data = build_minimal_pdf();
753 let doc = PdfDocument::from_bytes(data).unwrap();
754 assert!(!doc.is_encrypted());
755 assert!(doc.is_authenticated());
756 }
757
758 #[test]
763 fn test_lru_cache_insert_and_get() {
764 let mut cache = LruCache::new(3);
765 cache.insert("a", 1);
766 cache.insert("b", 2);
767 cache.insert("c", 3);
768 assert_eq!(cache.len(), 3);
769 assert_eq!(cache.get(&"a"), Some(&1));
770 assert_eq!(cache.get(&"b"), Some(&2));
771 assert_eq!(cache.get(&"c"), Some(&3));
772 }
773
774 #[test]
775 fn test_lru_cache_eviction() {
776 let mut cache = LruCache::new(3);
777 cache.insert("a", 1);
778 cache.insert("b", 2);
779 cache.insert("c", 3);
780 cache.insert("d", 4);
782 assert_eq!(cache.len(), 3);
783 assert_eq!(cache.get(&"a"), None); assert_eq!(cache.get(&"b"), Some(&2));
785 assert_eq!(cache.get(&"c"), Some(&3));
786 assert_eq!(cache.get(&"d"), Some(&4));
787 }
788
789 #[test]
790 fn test_lru_cache_access_promotes() {
791 let mut cache = LruCache::new(3);
792 cache.insert("a", 1);
793 cache.insert("b", 2);
794 cache.insert("c", 3);
795 assert_eq!(cache.get(&"a"), Some(&1));
797 cache.insert("d", 4);
798 assert_eq!(cache.get(&"b"), None); assert_eq!(cache.get(&"a"), Some(&1));
800 }
801
802 #[test]
803 fn test_lru_cache_update_existing() {
804 let mut cache = LruCache::new(3);
805 cache.insert("a", 1);
806 cache.insert("a", 10);
807 assert_eq!(cache.len(), 1);
808 assert_eq!(cache.get(&"a"), Some(&10));
809 }
810
811 #[test]
812 fn test_lru_cache_clear() {
813 let mut cache = LruCache::new(3);
814 cache.insert("a", 1);
815 cache.insert("b", 2);
816 cache.clear();
817 assert_eq!(cache.len(), 0);
818 assert_eq!(cache.get(&"a"), None);
819 }
820
821 #[test]
822 fn test_lru_cache_set_capacity_shrinks() {
823 let mut cache = LruCache::new(5);
824 for i in 0..5 {
825 cache.insert(i, i * 10);
826 }
827 assert_eq!(cache.len(), 5);
828 cache.set_capacity(2);
830 assert_eq!(cache.len(), 2);
831 assert_eq!(cache.get(&0), None);
832 assert_eq!(cache.get(&1), None);
833 assert_eq!(cache.get(&2), None);
834 assert!(cache.get(&3).is_some() || cache.get(&4).is_some());
836 }
837
838 #[test]
843 fn test_set_cache_capacity() {
844 let data = build_minimal_pdf();
845 let mut doc = PdfDocument::from_bytes(data).unwrap();
846
847 for obj_num in 1..=3u32 {
849 let iref = IndirectRef { obj_num, gen_num: 0 };
850 doc.resolve(&iref).unwrap();
851 }
852 assert_eq!(doc.cached_object_count(), 3);
853
854 doc.set_cache_capacity(1);
856 assert_eq!(doc.cached_object_count(), 1);
857 }
858
859 #[test]
860 fn test_lru_cache_hit_miss_on_document() {
861 let data = build_minimal_pdf();
862 let mut doc = PdfDocument::from_bytes(data).unwrap();
863 doc.set_cache_capacity(2);
864
865 let ref1 = IndirectRef { obj_num: 1, gen_num: 0 };
866 let ref2 = IndirectRef { obj_num: 2, gen_num: 0 };
867 let ref3 = IndirectRef { obj_num: 3, gen_num: 0 };
868
869 doc.resolve(&ref1).unwrap();
871 doc.resolve(&ref2).unwrap();
872 assert_eq!(doc.cached_object_count(), 2);
873
874 doc.resolve(&ref3).unwrap();
876 assert_eq!(doc.cached_object_count(), 2);
877 assert!(!doc.objects.read().unwrap().contains_key(&ref1));
878 assert!(doc.objects.read().unwrap().contains_key(&ref2));
879 assert!(doc.objects.read().unwrap().contains_key(&ref3));
880
881 doc.resolve(&ref1).unwrap();
883 assert!(doc.objects.read().unwrap().contains_key(&ref1));
884 }
885
886 #[test]
887 fn test_object_stream_caching() {
888 let data = build_minimal_pdf();
889 let doc = PdfDocument::from_bytes(data).unwrap();
890 assert_eq!(doc.decoded_obj_streams.read().unwrap().len(), 0);
893
894 doc.decoded_obj_streams.write().unwrap().insert(42, vec![1, 2, 3]);
896 assert!(doc.decoded_obj_streams.read().unwrap().contains_key(&42));
897 assert_eq!(
898 doc.decoded_obj_streams.read().unwrap().get(&42).unwrap(),
899 &[1, 2, 3]
900 );
901
902 doc.decoded_obj_streams.write().unwrap().insert(99, vec![4, 5, 6]);
904 doc.objects.write().unwrap().clear();
906 doc.decoded_obj_streams.write().unwrap().clear();
907 assert_eq!(doc.decoded_obj_streams.read().unwrap().len(), 0);
908 }
909
910 #[cfg(feature = "mmap")]
911 #[test]
912 fn test_mmap_truncated_file() {
913 use std::io::Write;
914 let dir = std::env::temp_dir();
915 let path = dir.join("justpdf_mmap_truncated.pdf");
916 {
917 let mut f = std::fs::File::create(&path).unwrap();
918 f.write_all(b"%PDF-1.4\n").unwrap();
920 }
921 let result = PdfDocument::open_mmap(&path);
922 assert!(result.is_err());
924 let _ = std::fs::remove_file(&path);
925 }
926
927 #[cfg(feature = "mmap")]
928 #[test]
929 fn test_mmap_empty_file() {
930 let dir = std::env::temp_dir();
931 let path = dir.join("justpdf_mmap_empty.pdf");
932 {
933 std::fs::File::create(&path).unwrap();
934 }
935 let result = PdfDocument::open_mmap(&path);
936 assert!(result.is_err());
938 let _ = std::fs::remove_file(&path);
939 }
940
941 #[cfg(feature = "mmap")]
942 #[test]
943 fn test_open_mmap() {
944 use std::io::Write;
945 let data = build_minimal_pdf();
947 let dir = std::env::temp_dir();
948 let path = dir.join("justpdf_mmap_test.pdf");
949 {
950 let mut f = std::fs::File::create(&path).unwrap();
951 f.write_all(&data).unwrap();
952 }
953 let doc = PdfDocument::open_mmap(&path).unwrap();
954 assert_eq!(doc.version, (1, 4));
955 assert!(!doc.is_encrypted());
956
957 let catalog_ref = doc.catalog_ref().unwrap().clone();
958 let catalog = doc.resolve(&catalog_ref).unwrap();
959 match &catalog {
960 PdfObject::Dict(d) => {
961 assert_eq!(d.get_name(b"Type"), Some(b"Catalog".as_slice()));
962 }
963 _ => panic!("expected dict for catalog"),
964 }
965
966 let _ = std::fs::remove_file(&path);
968 }
969}