1use std::collections::{HashMap, HashSet, VecDeque};
2use std::hash::Hash;
3use std::path::Path;
4use std::sync::RwLock;
5
6use crate::crypto;
7use crate::crypto::SecurityState;
8use crate::error::{JustPdfError, Result};
9use crate::object::{self, IndirectRef, PdfDict, PdfObject};
10use crate::stream;
11use crate::tokenizer::Tokenizer;
12use crate::xref::{self, Xref, XrefEntry};
13
14enum PdfData {
20 Owned(Vec<u8>),
21 #[cfg(feature = "mmap")]
22 Mmap(memmap2::Mmap),
23}
24
25impl std::fmt::Debug for PdfData {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 match self {
28 Self::Owned(v) => f.debug_tuple("Owned").field(&v.len()).finish(),
29 #[cfg(feature = "mmap")]
30 Self::Mmap(m) => f.debug_tuple("Mmap").field(&m.len()).finish(),
31 }
32 }
33}
34
35impl PdfData {
36 fn as_bytes(&self) -> &[u8] {
37 match self {
38 Self::Owned(v) => v,
39 #[cfg(feature = "mmap")]
40 Self::Mmap(m) => m,
41 }
42 }
43}
44
45struct LruCache<K: Eq + Hash + Clone, V> {
51 map: HashMap<K, V>,
52 order: VecDeque<K>,
53 capacity: usize,
54}
55
56impl<K: Eq + Hash + Clone + std::fmt::Debug, V: std::fmt::Debug> std::fmt::Debug
57 for LruCache<K, V>
58{
59 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60 f.debug_struct("LruCache")
61 .field("len", &self.map.len())
62 .field("capacity", &self.capacity)
63 .finish()
64 }
65}
66
67impl<K: Eq + Hash + Clone, V> LruCache<K, V> {
68 fn new(capacity: usize) -> Self {
69 assert!(capacity > 0, "LruCache capacity must be > 0");
70 Self {
71 map: HashMap::with_capacity(capacity),
72 order: VecDeque::with_capacity(capacity),
73 capacity,
74 }
75 }
76
77 fn get(&mut self, key: &K) -> Option<&V> {
79 if self.map.contains_key(key) {
80 self.touch(key);
82 self.map.get(key)
83 } else {
84 None
85 }
86 }
87
88 fn insert(&mut self, key: K, value: V) {
91 if self.map.contains_key(&key) {
92 self.map.insert(key.clone(), value);
94 self.touch(&key);
95 return;
96 }
97 if self.map.len() >= self.capacity {
99 if let Some(evicted) = self.order.pop_back() {
100 self.map.remove(&evicted);
101 }
102 }
103 self.order.push_front(key.clone());
104 self.map.insert(key, value);
105 }
106
107 fn contains_key(&self, key: &K) -> bool {
108 self.map.contains_key(key)
109 }
110
111 fn clear(&mut self) {
112 self.map.clear();
113 self.order.clear();
114 }
115
116 fn len(&self) -> usize {
117 self.map.len()
118 }
119
120 fn set_capacity(&mut self, capacity: usize) {
123 assert!(capacity > 0, "LruCache capacity must be > 0");
124 self.capacity = capacity;
125 while self.map.len() > self.capacity {
126 if let Some(evicted) = self.order.pop_back() {
127 self.map.remove(&evicted);
128 }
129 }
130 }
131
132 fn touch(&mut self, key: &K) {
134 if let Some(pos) = self.order.iter().position(|k| k == key) {
135 self.order.remove(pos);
136 }
137 self.order.push_front(key.clone());
138 }
139}
140
141const DEFAULT_CACHE_CAPACITY: usize = 2048;
143
144pub struct PdfDocument {
150 pub version: (u8, u8),
152 pub xref: Xref,
154 data: PdfData,
156 objects: RwLock<LruCache<IndirectRef, PdfObject>>,
158 security: Option<SecurityState>,
160 decoded_obj_streams: RwLock<HashMap<u32, Vec<u8>>>,
162}
163
164impl std::fmt::Debug for PdfDocument {
165 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
166 let obj_cache_len = self
167 .objects
168 .read()
169 .map(|c| c.len())
170 .unwrap_or(0);
171 f.debug_struct("PdfDocument")
172 .field("version", &self.version)
173 .field("xref", &self.xref)
174 .field("data", &self.data)
175 .field("objects_cached", &obj_cache_len)
176 .field("security", &self.security)
177 .finish()
178 }
179}
180
181impl PdfDocument {
182 pub fn open(path: &Path) -> Result<Self> {
184 let data = std::fs::read(path)?;
185 Self::from_bytes(data)
186 }
187
188 pub fn from_bytes(data: Vec<u8>) -> Result<Self> {
190 Self::from_pdf_data(PdfData::Owned(data))
191 }
192
193 fn from_pdf_data(data: PdfData) -> Result<Self> {
195 let bytes = data.as_bytes();
196 if bytes.len() < 8 {
197 return Err(JustPdfError::NotPdf);
198 }
199
200 let version = parse_version(bytes)?;
202
203 let xref = xref::load_xref(bytes)?;
205
206 let mut doc = Self {
207 version,
208 xref,
209 data,
210 objects: RwLock::new(LruCache::new(DEFAULT_CACHE_CAPACITY)),
211 security: None,
212 decoded_obj_streams: RwLock::new(HashMap::new()),
213 };
214
215 doc.detect_encryption()?;
217
218 Ok(doc)
219 }
220
221 #[cfg(feature = "mmap")]
226 pub fn open_mmap(path: &Path) -> Result<Self> {
227 let file = std::fs::File::open(path)?;
228 let mmap = unsafe { memmap2::Mmap::map(&file)? };
231 Self::from_pdf_data(PdfData::Mmap(mmap))
232 }
233
234 pub(crate) fn from_raw_parts(data: Vec<u8>, xref: Xref, version: (u8, u8)) -> Self {
237 Self {
238 version,
239 xref,
240 data: PdfData::Owned(data),
241 objects: RwLock::new(LruCache::new(DEFAULT_CACHE_CAPACITY)),
242 security: None,
243 decoded_obj_streams: RwLock::new(HashMap::new()),
244 }
245 }
246
247 fn detect_encryption(&mut self) -> Result<()> {
249 let encrypt_ref = match self.xref.trailer.get_ref(b"Encrypt") {
251 Some(r) => r.clone(),
252 None => {
253 if self.xref.trailer.get_dict(b"Encrypt").is_some() {
255 return self.detect_encryption_inline();
256 }
257 return Ok(());
258 }
259 };
260
261 let encrypt_obj = self.load_object_raw(&encrypt_ref, &mut HashSet::new())?;
263 let encrypt_dict = match &encrypt_obj {
264 PdfObject::Dict(d) => d,
265 _ => {
266 return Err(JustPdfError::EncryptionError {
267 detail: "encryption object is not a dictionary".into(),
268 });
269 }
270 };
271
272 let ed = crypto::EncryptionDict::from_dict(encrypt_dict)?;
273
274 if ed.filter != b"Standard" {
276 return Err(JustPdfError::UnsupportedEncryption {
277 detail: format!(
278 "unsupported security handler: {}",
279 String::from_utf8_lossy(&ed.filter)
280 ),
281 });
282 }
283
284 let file_id = self.extract_file_id();
286
287 let mut state =
288 SecurityState::new(ed, file_id, Some(encrypt_ref.obj_num));
289
290 if let Ok(key) = crypto::auth::authenticate(&state, b"") {
292 state.file_key = Some(key);
293 }
294
295 self.security = Some(state);
296 Ok(())
297 }
298
299 fn detect_encryption_inline(&mut self) -> Result<()> {
301 let encrypt_dict = self.xref.trailer.get_dict(b"Encrypt").unwrap().clone();
302 let ed = crypto::EncryptionDict::from_dict(&encrypt_dict)?;
303
304 if ed.filter != b"Standard" {
305 return Err(JustPdfError::UnsupportedEncryption {
306 detail: format!(
307 "unsupported security handler: {}",
308 String::from_utf8_lossy(&ed.filter)
309 ),
310 });
311 }
312
313 let file_id = self.extract_file_id();
314 let mut state = SecurityState::new(ed, file_id, None);
315
316 if let Ok(key) = crypto::auth::authenticate(&state, b"") {
317 state.file_key = Some(key);
318 }
319
320 self.security = Some(state);
321 Ok(())
322 }
323
324 fn extract_file_id(&self) -> Vec<u8> {
326 if let Some(PdfObject::Array(arr)) = self.xref.trailer.get(b"ID") {
327 if let Some(PdfObject::String(id)) = arr.first() {
328 return id.clone();
329 }
330 }
331 Vec::new()
332 }
333
334 pub fn is_encrypted(&self) -> bool {
336 self.security.is_some()
337 }
338
339 pub fn is_authenticated(&self) -> bool {
341 match &self.security {
342 Some(s) => s.is_authenticated(),
343 None => true, }
345 }
346
347 pub fn authenticate(&mut self, password: &[u8]) -> Result<()> {
350 let state = match &mut self.security {
351 Some(s) => s,
352 None => return Ok(()), };
354
355 if state.is_authenticated() {
356 return Ok(()); }
358
359 let key = crypto::auth::authenticate(state, password)?;
360 state.file_key = Some(key);
361
362 self.objects.write().unwrap().clear();
364 self.decoded_obj_streams.write().unwrap().clear();
365
366 Ok(())
367 }
368
369 pub fn permissions(&self) -> Option<crypto::Permissions> {
371 self.security.as_ref().map(|s| s.permissions())
372 }
373
374 pub fn security_state(&self) -> Option<&SecurityState> {
376 self.security.as_ref()
377 }
378
379 pub fn object_count(&self) -> usize {
381 self.xref.len()
382 }
383
384 pub fn catalog_ref(&self) -> Option<&IndirectRef> {
386 self.xref.trailer.get_ref(b"Root")
387 }
388
389 pub fn trailer(&self) -> &PdfDict {
391 &self.xref.trailer
392 }
393
394 pub fn resolve(&self, iref: &IndirectRef) -> Result<PdfObject> {
402 {
404 let mut cache = self.objects.write().unwrap();
405 if let Some(obj) = cache.get(iref) {
406 return Ok(obj.clone());
407 }
408 }
409
410 if let Some(ref sec) = self.security {
412 if !sec.is_authenticated() {
413 return Err(JustPdfError::EncryptedDocument);
414 }
415 }
416
417 let obj = self.load_object(iref, &mut HashSet::new())?;
419 let result = obj.clone();
420 self.objects.write().unwrap().insert(iref.clone(), obj);
421 Ok(result)
422 }
423
424 fn load_object(
427 &self,
428 iref: &IndirectRef,
429 visited: &mut HashSet<IndirectRef>,
430 ) -> Result<PdfObject> {
431 let obj = self.load_object_raw(iref, visited)?;
432
433 if let Some(ref sec) = self.security {
435 if sec.is_authenticated() {
436 return crypto::decrypt_object(obj, sec, iref.obj_num, iref.gen_num);
437 }
438 }
439
440 Ok(obj)
441 }
442
443 fn load_object_raw(
445 &self,
446 iref: &IndirectRef,
447 visited: &mut HashSet<IndirectRef>,
448 ) -> Result<PdfObject> {
449 if !visited.insert(iref.clone()) {
450 return Err(JustPdfError::CircularReference {
451 obj_num: iref.obj_num,
452 gen_num: iref.gen_num,
453 });
454 }
455
456 let entry = self
457 .xref
458 .get(iref.obj_num)
459 .ok_or(JustPdfError::ObjectNotFound {
460 obj_num: iref.obj_num,
461 gen_num: iref.gen_num,
462 })?
463 .clone();
464
465 match entry {
466 XrefEntry::InUse { offset, .. } => {
467 let mut tokenizer = Tokenizer::new_at(self.data.as_bytes(), offset as usize);
468 let (_parsed_ref, obj) = object::parse_indirect_object(&mut tokenizer)?;
469 Ok(obj)
470 }
471 XrefEntry::Compressed {
472 obj_stream_num,
473 index_within,
474 } => self.load_compressed_object(obj_stream_num, index_within, visited),
475 XrefEntry::Free { .. } => Ok(PdfObject::Null),
476 }
477 }
478
479 fn load_compressed_object(
482 &self,
483 obj_stream_num: u32,
484 index_within: u16,
485 visited: &mut HashSet<IndirectRef>,
486 ) -> Result<PdfObject> {
487 {
489 let cache = self.decoded_obj_streams.read().unwrap();
490 if !cache.contains_key(&obj_stream_num) {
491 drop(cache); let stream_ref = IndirectRef {
494 obj_num: obj_stream_num,
495 gen_num: 0,
496 };
497
498 let stream_obj = {
500 let raw = self.load_object_raw(&stream_ref, visited)?;
501 if let Some(ref sec) = self.security {
503 if sec.is_authenticated() {
504 crypto::decrypt_object(raw, sec, obj_stream_num, 0)?
505 } else {
506 raw
507 }
508 } else {
509 raw
510 }
511 };
512
513 let (dict, raw_data) = match &stream_obj {
514 PdfObject::Stream { dict, data } => (dict, data),
515 _ => {
516 return Err(JustPdfError::InvalidObject {
517 offset: 0,
518 detail: format!("object stream {obj_stream_num} is not a stream"),
519 });
520 }
521 };
522
523 let decoded = stream::decode_stream(raw_data, dict)?;
524 self.decoded_obj_streams
525 .write()
526 .unwrap()
527 .insert(obj_stream_num, decoded);
528 }
529 }
530
531 let cache = self.decoded_obj_streams.read().unwrap();
532 let decoded = cache.get(&obj_stream_num).unwrap();
533
534 let mut tokenizer = Tokenizer::new(decoded);
541
542 let mut obj_offsets = Vec::new();
546 loop {
547 let saved_pos = tokenizer.pos();
548 let obj_num = match tokenizer.next_token()? {
549 Some(crate::tokenizer::token::Token::Integer(v)) => v as u32,
550 _ => {
551 tokenizer.seek(saved_pos);
552 break;
553 }
554 };
555 let offset = match tokenizer.next_token()? {
556 Some(crate::tokenizer::token::Token::Integer(v)) => v as usize,
557 _ => break,
558 };
559 obj_offsets.push((obj_num, offset));
560 }
561
562 let first = tokenizer.pos();
565
566 let idx = index_within as usize;
567 if idx >= obj_offsets.len() {
568 return Err(JustPdfError::ObjectNotFound {
569 obj_num: 0,
570 gen_num: 0,
571 });
572 }
573
574 let (_obj_num, obj_offset) = obj_offsets[idx];
575 let abs_offset = first + obj_offset;
576
577 let mut tokenizer = Tokenizer::new_at(decoded, abs_offset);
578 object::parse_object(&mut tokenizer)
579 }
580
581 pub fn object_refs(&self) -> impl Iterator<Item = IndirectRef> + '_ {
583 self.xref
584 .entries
585 .iter()
586 .filter_map(|(&obj_num, entry)| match entry {
587 XrefEntry::InUse { gen_num, .. } => Some(IndirectRef {
588 obj_num,
589 gen_num: *gen_num,
590 }),
591 XrefEntry::Compressed { .. } => Some(IndirectRef {
592 obj_num,
593 gen_num: 0,
594 }),
595 XrefEntry::Free { .. } => None,
596 })
597 }
598
599 pub fn decode_stream(&self, dict: &PdfDict, raw_data: &[u8]) -> Result<Vec<u8>> {
601 stream::decode_stream(raw_data, dict)
602 }
603
604 pub fn raw_data(&self) -> &[u8] {
606 self.data.as_bytes()
607 }
608
609 pub fn set_cache_capacity(&mut self, capacity: usize) {
611 self.objects.write().unwrap().set_capacity(capacity);
612 }
613
614 pub fn cached_object_count(&self) -> usize {
616 self.objects.read().unwrap().len()
617 }
618}
619
620fn parse_version(data: &[u8]) -> Result<(u8, u8)> {
622 let search_len = data.len().min(1024);
624 let needle = b"%PDF-";
625
626 for i in 0..search_len.saturating_sub(needle.len() + 3) {
627 if &data[i..i + needle.len()] == needle {
628 let major = data.get(i + 5).copied().unwrap_or(0);
629 let dot = data.get(i + 6).copied().unwrap_or(0);
630 let minor = data.get(i + 7).copied().unwrap_or(0);
631
632 if major.is_ascii_digit() && dot == b'.' && minor.is_ascii_digit() {
633 return Ok((major - b'0', minor - b'0'));
634 }
635 }
636 }
637
638 Err(JustPdfError::NotPdf)
639}
640
641#[cfg(test)]
642mod tests {
643 use super::*;
644
645 #[test]
646 fn test_parse_version() {
647 assert_eq!(parse_version(b"%PDF-1.7\n").unwrap(), (1, 7));
648 assert_eq!(parse_version(b"%PDF-2.0\n").unwrap(), (2, 0));
649 assert_eq!(parse_version(b"%PDF-1.4 stuff").unwrap(), (1, 4));
650 }
651
652 #[test]
653 fn test_parse_version_not_pdf() {
654 assert!(parse_version(b"Hello World").is_err());
655 assert!(parse_version(b"").is_err());
656 }
657
658 #[test]
659 fn test_parse_version_offset() {
660 assert_eq!(parse_version(b"\xEF\xBB\xBF%PDF-1.7\n").unwrap(), (1, 7));
662 }
663
664 fn build_minimal_pdf() -> Vec<u8> {
666 let mut pdf = Vec::new();
667 pdf.extend_from_slice(b"%PDF-1.4\n");
669
670 let obj1_offset = pdf.len();
672 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
673
674 let obj2_offset = pdf.len();
676 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n");
677
678 let obj3_offset = pdf.len();
680 pdf.extend_from_slice(
681 b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\nendobj\n",
682 );
683
684 let xref_offset = pdf.len();
686 pdf.extend_from_slice(b"xref\n");
687 pdf.extend_from_slice(b"0 4\n");
688 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
689 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
690 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
691 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
692
693 pdf.extend_from_slice(b"trailer\n<< /Size 4 /Root 1 0 R >>\n");
695 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF\n").as_bytes());
696
697 pdf
698 }
699
700 #[test]
701 fn test_open_minimal_pdf() {
702 let data = build_minimal_pdf();
703 let doc = PdfDocument::from_bytes(data).unwrap();
704
705 assert_eq!(doc.version, (1, 4));
706 assert!(doc.object_count() > 0);
707 assert!(!doc.is_encrypted());
708
709 let catalog_ref = doc.catalog_ref().unwrap().clone();
711 let catalog = doc.resolve(&catalog_ref).unwrap();
712 match &catalog {
713 PdfObject::Dict(d) => {
714 assert_eq!(d.get_name(b"Type"), Some(b"Catalog".as_slice()));
715 }
716 _ => panic!("expected dict for catalog"),
717 }
718 }
719
720 #[test]
721 fn test_not_pdf() {
722 let result = PdfDocument::from_bytes(b"Hello World, not a PDF".to_vec());
723 assert!(result.is_err());
724 }
725
726 #[test]
727 fn test_empty_file() {
728 let result = PdfDocument::from_bytes(vec![]);
729 assert!(result.is_err());
730 }
731
732 #[test]
733 fn test_truncated_pdf() {
734 let result = PdfDocument::from_bytes(b"%PDF-1.4\n".to_vec());
735 assert!(result.is_err());
736 }
737
738 #[test]
739 fn test_object_not_found() {
740 let data = build_minimal_pdf();
741 let doc = PdfDocument::from_bytes(data).unwrap();
742 let result = doc.resolve(&IndirectRef {
743 obj_num: 999,
744 gen_num: 0,
745 });
746 assert!(result.is_err());
747 }
748
749 #[test]
750 fn test_unencrypted_pdf_is_authenticated() {
751 let data = build_minimal_pdf();
752 let doc = PdfDocument::from_bytes(data).unwrap();
753 assert!(!doc.is_encrypted());
754 assert!(doc.is_authenticated());
755 }
756
757 #[test]
762 fn test_lru_cache_insert_and_get() {
763 let mut cache = LruCache::new(3);
764 cache.insert("a", 1);
765 cache.insert("b", 2);
766 cache.insert("c", 3);
767 assert_eq!(cache.len(), 3);
768 assert_eq!(cache.get(&"a"), Some(&1));
769 assert_eq!(cache.get(&"b"), Some(&2));
770 assert_eq!(cache.get(&"c"), Some(&3));
771 }
772
773 #[test]
774 fn test_lru_cache_eviction() {
775 let mut cache = LruCache::new(3);
776 cache.insert("a", 1);
777 cache.insert("b", 2);
778 cache.insert("c", 3);
779 cache.insert("d", 4);
781 assert_eq!(cache.len(), 3);
782 assert_eq!(cache.get(&"a"), None); assert_eq!(cache.get(&"b"), Some(&2));
784 assert_eq!(cache.get(&"c"), Some(&3));
785 assert_eq!(cache.get(&"d"), Some(&4));
786 }
787
788 #[test]
789 fn test_lru_cache_access_promotes() {
790 let mut cache = LruCache::new(3);
791 cache.insert("a", 1);
792 cache.insert("b", 2);
793 cache.insert("c", 3);
794 assert_eq!(cache.get(&"a"), Some(&1));
796 cache.insert("d", 4);
797 assert_eq!(cache.get(&"b"), None); assert_eq!(cache.get(&"a"), Some(&1));
799 }
800
801 #[test]
802 fn test_lru_cache_update_existing() {
803 let mut cache = LruCache::new(3);
804 cache.insert("a", 1);
805 cache.insert("a", 10);
806 assert_eq!(cache.len(), 1);
807 assert_eq!(cache.get(&"a"), Some(&10));
808 }
809
810 #[test]
811 fn test_lru_cache_clear() {
812 let mut cache = LruCache::new(3);
813 cache.insert("a", 1);
814 cache.insert("b", 2);
815 cache.clear();
816 assert_eq!(cache.len(), 0);
817 assert_eq!(cache.get(&"a"), None);
818 }
819
820 #[test]
821 fn test_lru_cache_set_capacity_shrinks() {
822 let mut cache = LruCache::new(5);
823 for i in 0..5 {
824 cache.insert(i, i * 10);
825 }
826 assert_eq!(cache.len(), 5);
827 cache.set_capacity(2);
829 assert_eq!(cache.len(), 2);
830 assert_eq!(cache.get(&0), None);
831 assert_eq!(cache.get(&1), None);
832 assert_eq!(cache.get(&2), None);
833 assert!(cache.get(&3).is_some() || cache.get(&4).is_some());
835 }
836
837 #[test]
842 fn test_set_cache_capacity() {
843 let data = build_minimal_pdf();
844 let mut doc = PdfDocument::from_bytes(data).unwrap();
845
846 for obj_num in 1..=3u32 {
848 let iref = IndirectRef { obj_num, gen_num: 0 };
849 doc.resolve(&iref).unwrap();
850 }
851 assert_eq!(doc.cached_object_count(), 3);
852
853 doc.set_cache_capacity(1);
855 assert_eq!(doc.cached_object_count(), 1);
856 }
857
858 #[test]
859 fn test_lru_cache_hit_miss_on_document() {
860 let data = build_minimal_pdf();
861 let mut doc = PdfDocument::from_bytes(data).unwrap();
862 doc.set_cache_capacity(2);
863
864 let ref1 = IndirectRef { obj_num: 1, gen_num: 0 };
865 let ref2 = IndirectRef { obj_num: 2, gen_num: 0 };
866 let ref3 = IndirectRef { obj_num: 3, gen_num: 0 };
867
868 doc.resolve(&ref1).unwrap();
870 doc.resolve(&ref2).unwrap();
871 assert_eq!(doc.cached_object_count(), 2);
872
873 doc.resolve(&ref3).unwrap();
875 assert_eq!(doc.cached_object_count(), 2);
876 assert!(!doc.objects.read().unwrap().contains_key(&ref1));
877 assert!(doc.objects.read().unwrap().contains_key(&ref2));
878 assert!(doc.objects.read().unwrap().contains_key(&ref3));
879
880 doc.resolve(&ref1).unwrap();
882 assert!(doc.objects.read().unwrap().contains_key(&ref1));
883 }
884
885 #[test]
886 fn test_object_stream_caching() {
887 let data = build_minimal_pdf();
888 let doc = PdfDocument::from_bytes(data).unwrap();
889 assert_eq!(doc.decoded_obj_streams.read().unwrap().len(), 0);
892
893 doc.decoded_obj_streams.write().unwrap().insert(42, vec![1, 2, 3]);
895 assert!(doc.decoded_obj_streams.read().unwrap().contains_key(&42));
896 assert_eq!(
897 doc.decoded_obj_streams.read().unwrap().get(&42).unwrap(),
898 &[1, 2, 3]
899 );
900
901 doc.decoded_obj_streams.write().unwrap().insert(99, vec![4, 5, 6]);
903 doc.objects.write().unwrap().clear();
905 doc.decoded_obj_streams.write().unwrap().clear();
906 assert_eq!(doc.decoded_obj_streams.read().unwrap().len(), 0);
907 }
908
909 #[cfg(feature = "mmap")]
910 #[test]
911 fn test_mmap_truncated_file() {
912 use std::io::Write;
913 let dir = std::env::temp_dir();
914 let path = dir.join("justpdf_mmap_truncated.pdf");
915 {
916 let mut f = std::fs::File::create(&path).unwrap();
917 f.write_all(b"%PDF-1.4\n").unwrap();
919 }
920 let result = PdfDocument::open_mmap(&path);
921 assert!(result.is_err());
923 let _ = std::fs::remove_file(&path);
924 }
925
926 #[cfg(feature = "mmap")]
927 #[test]
928 fn test_mmap_empty_file() {
929 let dir = std::env::temp_dir();
930 let path = dir.join("justpdf_mmap_empty.pdf");
931 {
932 std::fs::File::create(&path).unwrap();
933 }
934 let result = PdfDocument::open_mmap(&path);
935 assert!(result.is_err());
937 let _ = std::fs::remove_file(&path);
938 }
939
940 #[cfg(feature = "mmap")]
941 #[test]
942 fn test_open_mmap() {
943 use std::io::Write;
944 let data = build_minimal_pdf();
946 let dir = std::env::temp_dir();
947 let path = dir.join("justpdf_mmap_test.pdf");
948 {
949 let mut f = std::fs::File::create(&path).unwrap();
950 f.write_all(&data).unwrap();
951 }
952 let doc = PdfDocument::open_mmap(&path).unwrap();
953 assert_eq!(doc.version, (1, 4));
954 assert!(!doc.is_encrypted());
955
956 let catalog_ref = doc.catalog_ref().unwrap().clone();
957 let catalog = doc.resolve(&catalog_ref).unwrap();
958 match &catalog {
959 PdfObject::Dict(d) => {
960 assert_eq!(d.get_name(b"Type"), Some(b"Catalog".as_slice()));
961 }
962 _ => panic!("expected dict for catalog"),
963 }
964
965 let _ = std::fs::remove_file(&path);
967 }
968}