1use super::encryption_handler::EncryptionHandler;
6use super::header::PdfHeader;
7use super::object_stream::ObjectStream;
8use super::objects::{PdfDictionary, PdfObject};
9use super::stack_safe::StackSafeContext;
10use super::trailer::PdfTrailer;
11use super::xref::XRefTable;
12use super::{ParseError, ParseResult};
13use std::collections::HashMap;
14use std::fs::File;
15use std::io::{BufReader, Read, Seek, SeekFrom};
16use std::path::Path;
17
18pub struct PdfReader<R: Read + Seek> {
20 reader: BufReader<R>,
21 header: PdfHeader,
22 xref: XRefTable,
23 trailer: PdfTrailer,
24 object_cache: HashMap<(u32, u16), PdfObject>,
26 object_stream_cache: HashMap<u32, ObjectStream>,
28 page_tree: Option<super::page_tree::PageTree>,
30 parse_context: StackSafeContext,
32 options: super::ParseOptions,
34 encryption_handler: Option<EncryptionHandler>,
36}
37
38impl<R: Read + Seek> PdfReader<R> {
39 pub fn options(&self) -> &super::ParseOptions {
41 &self.options
42 }
43
44 pub fn is_encrypted(&self) -> bool {
46 self.encryption_handler.is_some()
47 }
48
49 pub fn is_unlocked(&self) -> bool {
51 match &self.encryption_handler {
52 Some(handler) => handler.is_unlocked(),
53 None => true, }
55 }
56
57 pub fn encryption_handler_mut(&mut self) -> Option<&mut EncryptionHandler> {
59 self.encryption_handler.as_mut()
60 }
61
62 pub fn encryption_handler(&self) -> Option<&EncryptionHandler> {
64 self.encryption_handler.as_ref()
65 }
66
67 pub fn unlock_with_password(&mut self, password: &str) -> ParseResult<bool> {
69 match &mut self.encryption_handler {
70 Some(handler) => {
71 if handler.unlock_with_user_password(password).unwrap_or(false) {
73 Ok(true)
74 } else {
75 Ok(handler
77 .unlock_with_owner_password(password)
78 .unwrap_or(false))
79 }
80 }
81 None => Ok(true), }
83 }
84
85 pub fn try_empty_password(&mut self) -> ParseResult<bool> {
87 match &mut self.encryption_handler {
88 Some(handler) => Ok(handler.try_empty_password().unwrap_or(false)),
89 None => Ok(true), }
91 }
92}
93
94impl PdfReader<File> {
95 pub fn open<P: AsRef<Path>>(path: P) -> ParseResult<Self> {
97 use std::io::Write;
98 let mut debug_file = std::fs::File::create("/tmp/pdf_open_debug.log").ok();
99 if let Some(ref mut f) = debug_file {
100 writeln!(f, "Opening file: {:?}", path.as_ref()).ok();
101 }
102 let file = File::open(path)?;
103 if let Some(ref mut f) = debug_file {
104 writeln!(f, "File opened successfully").ok();
105 }
106 let options = super::ParseOptions::lenient();
108 Self::new_with_options(file, options)
109 }
110
111 pub fn open_strict<P: AsRef<Path>>(path: P) -> ParseResult<Self> {
113 let file = File::open(path)?;
114 let options = super::ParseOptions::strict();
115 Self::new_with_options(file, options)
116 }
117
118 pub fn open_document<P: AsRef<Path>>(
120 path: P,
121 ) -> ParseResult<super::document::PdfDocument<File>> {
122 let reader = Self::open(path)?;
123 Ok(reader.into_document())
124 }
125}
126
127impl<R: Read + Seek> PdfReader<R> {
128 pub fn new(reader: R) -> ParseResult<Self> {
130 Self::new_with_options(reader, super::ParseOptions::default())
131 }
132
133 pub fn new_with_options(reader: R, options: super::ParseOptions) -> ParseResult<Self> {
135 let mut buf_reader = BufReader::new(reader);
136
137 let start_pos = buf_reader.stream_position()?;
139 buf_reader.seek(SeekFrom::End(0))?;
140 let file_size = buf_reader.stream_position()?;
141 buf_reader.seek(SeekFrom::Start(start_pos))?;
142
143 if file_size == 0 {
144 return Err(ParseError::EmptyFile);
145 }
146
147 use std::io::Write;
149 let mut debug_file = std::fs::File::create("/tmp/pdf_debug.log").ok();
150 if let Some(ref mut f) = debug_file {
151 writeln!(f, "Parsing PDF header...").ok();
152 }
153 let header = PdfHeader::parse(&mut buf_reader)?;
154 if let Some(ref mut f) = debug_file {
155 writeln!(f, "Header parsed: version {}", header.version).ok();
156 }
157
158 if let Some(ref mut f) = debug_file {
160 writeln!(f, "Parsing XRef table...").ok();
161 }
162 let xref = XRefTable::parse_with_options(&mut buf_reader, &options)?;
163 if let Some(ref mut f) = debug_file {
164 writeln!(f, "XRef table parsed with {} entries", xref.len()).ok();
165 }
166
167 let trailer_dict = xref.trailer().ok_or(ParseError::InvalidTrailer)?.clone();
169
170 let xref_offset = xref.xref_offset();
171 let trailer = PdfTrailer::from_dict(trailer_dict, xref_offset)?;
172
173 trailer.validate()?;
175
176 let encryption_handler = if EncryptionHandler::detect_encryption(trailer.dict()) {
178 if let Ok(Some((encrypt_obj_num, encrypt_gen_num))) = trailer.encrypt() {
179 let mut temp_reader = Self {
181 reader: buf_reader,
182 header: header.clone(),
183 xref: xref.clone(),
184 trailer: trailer.clone(),
185 object_cache: HashMap::new(),
186 object_stream_cache: HashMap::new(),
187 page_tree: None,
188 parse_context: StackSafeContext::new(),
189 options: options.clone(),
190 encryption_handler: None,
191 };
192
193 let encrypt_obj = temp_reader.get_object(encrypt_obj_num, encrypt_gen_num)?;
195 if let Some(encrypt_dict) = encrypt_obj.as_dict() {
196 let file_id = trailer.id().and_then(|id_obj| {
198 if let PdfObject::Array(ref id_array) = id_obj {
199 if let Some(PdfObject::String(ref id_bytes)) = id_array.get(0) {
200 Some(id_bytes.as_bytes().to_vec())
201 } else {
202 None
203 }
204 } else {
205 None
206 }
207 });
208
209 match EncryptionHandler::new(encrypt_dict, file_id) {
210 Ok(handler) => {
211 buf_reader = temp_reader.reader;
213 Some(handler)
214 }
215 Err(_) => {
216 let _ = temp_reader.reader;
218 return Err(ParseError::EncryptionNotSupported);
219 }
220 }
221 } else {
222 let _ = temp_reader.reader;
223 return Err(ParseError::EncryptionNotSupported);
224 }
225 } else {
226 return Err(ParseError::EncryptionNotSupported);
227 }
228 } else {
229 None
230 };
231
232 Ok(Self {
233 reader: buf_reader,
234 header,
235 xref,
236 trailer,
237 object_cache: HashMap::new(),
238 object_stream_cache: HashMap::new(),
239 page_tree: None,
240 parse_context: StackSafeContext::new(),
241 options,
242 encryption_handler,
243 })
244 }
245
246 pub fn version(&self) -> &super::header::PdfVersion {
248 &self.header.version
249 }
250
251 pub fn catalog(&mut self) -> ParseResult<&PdfDictionary> {
253 let (obj_num, gen_num) = match self.trailer.root() {
255 Ok(root) => root,
256 Err(_) => {
257 #[cfg(debug_assertions)]
259 eprintln!("Warning: Trailer missing Root entry, attempting recovery");
260
261 if let Some(root) = self.trailer.find_root_fallback() {
263 root
264 } else {
265 if let Ok(catalog_ref) = self.find_catalog_object() {
267 catalog_ref
268 } else {
269 return Err(ParseError::MissingKey("Root".to_string()));
270 }
271 }
272 }
273 };
274
275 let catalog = self.get_object(obj_num, gen_num)?;
276
277 catalog.as_dict().ok_or_else(|| ParseError::SyntaxError {
278 position: 0,
279 message: "Catalog is not a dictionary".to_string(),
280 })
281 }
282
283 pub fn info(&mut self) -> ParseResult<Option<&PdfDictionary>> {
285 match self.trailer.info() {
286 Some((obj_num, gen_num)) => {
287 let info = self.get_object(obj_num, gen_num)?;
288 Ok(info.as_dict())
289 }
290 None => Ok(None),
291 }
292 }
293
294 pub fn get_object(&mut self, obj_num: u32, gen_num: u16) -> ParseResult<&PdfObject> {
296 self.load_object_from_disk(obj_num, gen_num)
297 }
298
299 fn load_object_from_disk(&mut self, obj_num: u32, gen_num: u16) -> ParseResult<&PdfObject> {
301 let key = (obj_num, gen_num);
302
303 if self.object_cache.contains_key(&key) {
305 return Ok(&self.object_cache[&key]);
306 }
307
308 if let Some(ext_entry) = self.xref.get_extended_entry(obj_num) {
310 if let Some((stream_obj_num, index_in_stream)) = ext_entry.compressed_info {
311 return self.get_compressed_object(
313 obj_num,
314 gen_num,
315 stream_obj_num,
316 index_in_stream,
317 );
318 }
319 }
320
321 let entry = self
323 .xref
324 .get_entry(obj_num)
325 .ok_or(ParseError::InvalidReference(obj_num, gen_num))?;
326
327 if !entry.in_use {
328 self.object_cache.insert(key, PdfObject::Null);
330 return Ok(&self.object_cache[&key]);
331 }
332
333 if entry.generation != gen_num {
334 return Err(ParseError::InvalidReference(obj_num, gen_num));
335 }
336
337 self.reader.seek(std::io::SeekFrom::Start(entry.offset))?;
339
340 let mut lexer =
342 super::lexer::Lexer::new_with_options(&mut self.reader, self.options.clone());
343
344 let token = lexer.next_token()?;
346 let read_obj_num = match token {
347 super::lexer::Token::Integer(n) => n as u32,
348 _ => {
349 if self.options.lenient_syntax {
351 if self.options.collect_warnings {
353 eprintln!(
354 "Warning: Using expected object number {obj_num} instead of parsed token"
355 );
356 }
357 obj_num
358 } else {
359 return Err(ParseError::SyntaxError {
360 position: entry.offset as usize,
361 message: "Expected object number".to_string(),
362 });
363 }
364 }
365 };
366
367 if read_obj_num != obj_num && !self.options.lenient_syntax {
368 return Err(ParseError::SyntaxError {
369 position: entry.offset as usize,
370 message: format!(
371 "Object number mismatch: expected {obj_num}, found {read_obj_num}"
372 ),
373 });
374 }
375
376 let token = lexer.next_token()?;
378 let read_gen_num = match token {
379 super::lexer::Token::Integer(n) => n as u16,
380 _ => {
381 if self.options.lenient_syntax {
382 if self.options.collect_warnings {
384 eprintln!(
385 "Warning: Using generation 0 instead of parsed token for object {obj_num}"
386 );
387 }
388 0
389 } else {
390 return Err(ParseError::SyntaxError {
391 position: entry.offset as usize,
392 message: "Expected generation number".to_string(),
393 });
394 }
395 }
396 };
397
398 if read_gen_num != gen_num && !self.options.lenient_syntax {
399 return Err(ParseError::SyntaxError {
400 position: entry.offset as usize,
401 message: format!(
402 "Generation number mismatch: expected {gen_num}, found {read_gen_num}"
403 ),
404 });
405 }
406
407 let token = lexer.next_token()?;
409 match token {
410 super::lexer::Token::Obj => {}
411 _ => {
412 if self.options.lenient_syntax {
413 if self.options.collect_warnings {
415 eprintln!(
416 "Warning: Expected 'obj' keyword for object {obj_num} {gen_num}, continuing anyway"
417 );
418 }
419 } else {
422 return Err(ParseError::SyntaxError {
423 position: entry.offset as usize,
424 message: "Expected 'obj' keyword".to_string(),
425 });
426 }
427 }
428 };
429
430 self.parse_context.enter()?;
432
433 let obj = match PdfObject::parse_with_options(&mut lexer, &self.options) {
434 Ok(obj) => {
435 self.parse_context.exit();
436 obj
437 }
438 Err(e) => {
439 self.parse_context.exit();
440 return Err(e);
441 }
442 };
443
444 let token = lexer.next_token()?;
446 match token {
447 super::lexer::Token::EndObj => {}
448 _ => {
449 if self.options.lenient_syntax {
450 if self.options.collect_warnings {
452 eprintln!("Warning: Expected 'endobj' keyword after object {obj_num} {gen_num}, continuing anyway");
453 }
454 } else {
455 return Err(ParseError::SyntaxError {
456 position: entry.offset as usize,
457 message: "Expected 'endobj' keyword".to_string(),
458 });
459 }
460 }
461 };
462
463 self.object_cache.insert(key, obj);
465
466 Ok(&self.object_cache[&key])
467 }
468
469 pub fn resolve<'a>(&'a mut self, obj: &'a PdfObject) -> ParseResult<&'a PdfObject> {
471 match obj {
472 PdfObject::Reference(obj_num, gen_num) => self.get_object(*obj_num, *gen_num),
473 _ => Ok(obj),
474 }
475 }
476
477 pub fn resolve_stream_length(&mut self, obj: &PdfObject) -> ParseResult<Option<usize>> {
480 match obj {
481 PdfObject::Integer(len) => {
482 if *len >= 0 {
483 Ok(Some(*len as usize))
484 } else {
485 Ok(None)
487 }
488 }
489 PdfObject::Reference(obj_num, gen_num) => {
490 let resolved = self.get_object(*obj_num, *gen_num)?;
491 match resolved {
492 PdfObject::Integer(len) => {
493 if *len >= 0 {
494 Ok(Some(*len as usize))
495 } else {
496 Ok(None)
497 }
498 }
499 _ => {
500 Ok(None)
502 }
503 }
504 }
505 _ => {
506 Ok(None)
508 }
509 }
510 }
511
512 fn get_compressed_object(
514 &mut self,
515 obj_num: u32,
516 gen_num: u16,
517 stream_obj_num: u32,
518 _index_in_stream: u32,
519 ) -> ParseResult<&PdfObject> {
520 let key = (obj_num, gen_num);
521
522 if !self.object_stream_cache.contains_key(&stream_obj_num) {
524 let stream_obj = self.load_object_from_disk(stream_obj_num, 0)?;
526
527 if let Some(stream) = stream_obj.as_stream() {
528 let obj_stream = ObjectStream::parse(stream.clone(), &self.options)?;
530 self.object_stream_cache.insert(stream_obj_num, obj_stream);
531 } else {
532 return Err(ParseError::SyntaxError {
533 position: 0,
534 message: format!("Object {stream_obj_num} is not a stream"),
535 });
536 }
537 }
538
539 let obj_stream = &self.object_stream_cache[&stream_obj_num];
541 let obj = obj_stream
542 .get_object(obj_num)
543 .ok_or_else(|| ParseError::SyntaxError {
544 position: 0,
545 message: format!("Object {obj_num} not found in object stream {stream_obj_num}"),
546 })?;
547
548 self.object_cache.insert(key, obj.clone());
550 Ok(&self.object_cache[&key])
551 }
552
553 pub fn pages(&mut self) -> ParseResult<&PdfDictionary> {
555 let (pages_obj_num, pages_gen_num) = {
557 let catalog = self.catalog()?;
558
559 if let Some(pages_ref) = catalog.get("Pages") {
561 match pages_ref {
562 PdfObject::Reference(obj_num, gen_num) => (*obj_num, *gen_num),
563 _ => {
564 return Err(ParseError::SyntaxError {
565 position: 0,
566 message: "Pages must be a reference".to_string(),
567 })
568 }
569 }
570 } else {
571 #[cfg(debug_assertions)]
573 eprintln!("Warning: Catalog missing Pages entry, attempting recovery");
574
575 if let Ok(page_refs) = self.find_page_objects() {
577 if !page_refs.is_empty() {
578 return self.create_synthetic_pages_dict(&page_refs);
580 }
581 }
582
583 if self.options.lenient_syntax {
585 if self.options.collect_warnings {
586 eprintln!("Warning: Missing Pages in catalog, searching for page tree");
587 }
588 let mut found_pages = None;
590 for i in 1..self.xref.len() as u32 {
591 if let Ok(obj) = self.get_object(i, 0) {
592 if let Some(dict) = obj.as_dict() {
593 if let Some(obj_type) = dict.get("Type").and_then(|t| t.as_name()) {
594 if obj_type.0 == "Pages" {
595 found_pages = Some((i, 0));
596 break;
597 }
598 }
599 }
600 }
601 }
602 if let Some((obj_num, gen_num)) = found_pages {
603 (obj_num, gen_num)
604 } else {
605 return Err(ParseError::MissingKey("Pages".to_string()));
606 }
607 } else {
608 return Err(ParseError::MissingKey("Pages".to_string()));
609 }
610 }
611 };
612
613 let pages_obj = self.get_object(pages_obj_num, pages_gen_num)?;
615 pages_obj.as_dict().ok_or_else(|| ParseError::SyntaxError {
616 position: 0,
617 message: "Pages is not a dictionary".to_string(),
618 })
619 }
620
621 pub fn page_count(&mut self) -> ParseResult<u32> {
623 let pages = self.pages()?;
624
625 if let Some(count_obj) = pages.get("Count") {
627 if let Some(count) = count_obj.as_integer() {
628 return Ok(count as u32);
629 }
630 }
631
632 if let Some(kids_obj) = pages.get("Kids") {
634 if let Some(kids_array) = kids_obj.as_array() {
635 return Ok(kids_array.len() as u32);
638 }
639 }
640
641 Ok(0)
644 }
645
646 pub fn metadata(&mut self) -> ParseResult<DocumentMetadata> {
648 let mut metadata = DocumentMetadata::default();
649
650 if let Some(info_dict) = self.info()? {
651 if let Some(title) = info_dict.get("Title").and_then(|o| o.as_string()) {
652 metadata.title = title.as_str().ok().map(|s| s.to_string());
653 }
654 if let Some(author) = info_dict.get("Author").and_then(|o| o.as_string()) {
655 metadata.author = author.as_str().ok().map(|s| s.to_string());
656 }
657 if let Some(subject) = info_dict.get("Subject").and_then(|o| o.as_string()) {
658 metadata.subject = subject.as_str().ok().map(|s| s.to_string());
659 }
660 if let Some(keywords) = info_dict.get("Keywords").and_then(|o| o.as_string()) {
661 metadata.keywords = keywords.as_str().ok().map(|s| s.to_string());
662 }
663 if let Some(creator) = info_dict.get("Creator").and_then(|o| o.as_string()) {
664 metadata.creator = creator.as_str().ok().map(|s| s.to_string());
665 }
666 if let Some(producer) = info_dict.get("Producer").and_then(|o| o.as_string()) {
667 metadata.producer = producer.as_str().ok().map(|s| s.to_string());
668 }
669 }
670
671 metadata.version = self.version().to_string();
672 metadata.page_count = self.page_count().ok();
673
674 Ok(metadata)
675 }
676
677 fn ensure_page_tree(&mut self) -> ParseResult<()> {
679 if self.page_tree.is_none() {
680 let page_count = self.page_count()?;
681 self.page_tree = Some(super::page_tree::PageTree::new(page_count));
682 }
683 Ok(())
684 }
685
686 pub fn get_page(&mut self, _index: u32) -> ParseResult<&super::page_tree::ParsedPage> {
692 self.ensure_page_tree()?;
693
694 Err(ParseError::SyntaxError {
698 position: 0,
699 message: "get_page not implemented due to borrow checker constraints. Use PdfDocument instead.".to_string(),
700 })
701 }
702
703 pub fn get_all_pages(&mut self) -> ParseResult<Vec<super::page_tree::ParsedPage>> {
705 let page_count = self.page_count()?;
706 let mut pages = Vec::with_capacity(page_count as usize);
707
708 for i in 0..page_count {
709 let page = self.get_page(i)?.clone();
710 pages.push(page);
711 }
712
713 Ok(pages)
714 }
715
716 pub fn into_document(self) -> super::document::PdfDocument<R> {
718 super::document::PdfDocument::new(self)
719 }
720
721 pub fn clear_parse_context(&mut self) {
723 self.parse_context = StackSafeContext::new();
724 }
725
726 pub fn parse_context_mut(&mut self) -> &mut StackSafeContext {
728 &mut self.parse_context
729 }
730
731 fn find_page_objects(&mut self) -> ParseResult<Vec<(u32, u16)>> {
733 let mut page_refs = Vec::new();
734
735 let obj_nums: Vec<u32> = self.xref.entries().keys().cloned().collect();
737
738 for obj_num in obj_nums {
739 if let Ok(obj) = self.get_object(obj_num, 0) {
741 if let Some(dict) = obj.as_dict() {
742 if let Some(PdfObject::Name(type_name)) = dict.get("Type") {
744 if type_name.0 == "Page" {
745 page_refs.push((obj_num, 0));
746 }
747 }
748 }
749 }
750 }
751
752 Ok(page_refs)
753 }
754
755 fn find_catalog_object(&mut self) -> ParseResult<(u32, u16)> {
757 Ok((1, 0))
763 }
764
765 fn create_synthetic_pages_dict(
767 &mut self,
768 page_refs: &[(u32, u16)],
769 ) -> ParseResult<&PdfDictionary> {
770 use super::objects::{PdfArray, PdfName};
771
772 let mut kids = PdfArray::new();
774 for (obj_num, gen_num) in page_refs {
775 kids.push(PdfObject::Reference(*obj_num, *gen_num));
776 }
777
778 let mut pages_dict = PdfDictionary::new();
780 pages_dict.insert(
781 "Type".to_string(),
782 PdfObject::Name(PdfName("Pages".to_string())),
783 );
784 pages_dict.insert("Kids".to_string(), PdfObject::Array(kids));
785 pages_dict.insert(
786 "Count".to_string(),
787 PdfObject::Integer(page_refs.len() as i64),
788 );
789
790 let mut media_box = None;
792 for (obj_num, gen_num) in page_refs.iter().take(1) {
793 if let Ok(page_obj) = self.get_object(*obj_num, *gen_num) {
794 if let Some(page_dict) = page_obj.as_dict() {
795 if let Some(mb) = page_dict.get("MediaBox") {
796 media_box = Some(mb.clone());
797 }
798 }
799 }
800 }
801
802 if let Some(mb) = media_box {
804 pages_dict.insert("MediaBox".to_string(), mb);
805 } else {
806 let mut mb_array = PdfArray::new();
807 mb_array.push(PdfObject::Integer(0));
808 mb_array.push(PdfObject::Integer(0));
809 mb_array.push(PdfObject::Integer(612));
810 mb_array.push(PdfObject::Integer(792));
811 pages_dict.insert("MediaBox".to_string(), PdfObject::Array(mb_array));
812 }
813
814 let synthetic_key = (u32::MAX - 1, 0);
816 self.object_cache
817 .insert(synthetic_key, PdfObject::Dictionary(pages_dict));
818
819 if let PdfObject::Dictionary(dict) = &self.object_cache[&synthetic_key] {
821 Ok(dict)
822 } else {
823 unreachable!("Just inserted dictionary")
824 }
825 }
826}
827
828#[derive(Debug, Default, Clone)]
830pub struct DocumentMetadata {
831 pub title: Option<String>,
832 pub author: Option<String>,
833 pub subject: Option<String>,
834 pub keywords: Option<String>,
835 pub creator: Option<String>,
836 pub producer: Option<String>,
837 pub creation_date: Option<String>,
838 pub modification_date: Option<String>,
839 pub version: String,
840 pub page_count: Option<u32>,
841}
842
843pub struct EOLIter<'s> {
844 remainder: &'s str,
845}
846impl<'s> Iterator for EOLIter<'s> {
847 type Item = &'s str;
848
849 fn next(&mut self) -> Option<Self::Item> {
850 if self.remainder.is_empty() {
851 return None;
852 }
853
854 if let Some((i, sep)) = ["\r\n", "\n", "\r"]
855 .iter()
856 .filter_map(|&sep| self.remainder.find(sep).map(|i| (i, sep)))
857 .min_by_key(|(i, _)| *i)
858 {
859 let (line, rest) = self.remainder.split_at(i);
860 self.remainder = &rest[sep.len()..];
861 Some(line)
862 } else {
863 let line = self.remainder;
864 self.remainder = "";
865 Some(line)
866 }
867 }
868}
869pub trait PDFLines: AsRef<str> {
870 fn pdf_lines(&self) -> EOLIter<'_> {
871 EOLIter {
872 remainder: self.as_ref(),
873 }
874 }
875}
876impl PDFLines for &str {}
877impl<'a> PDFLines for std::borrow::Cow<'a, str> {}
878impl PDFLines for String {}
879
880#[cfg(test)]
881mod tests {
882
883 use super::*;
884 use crate::parser::objects::{PdfName, PdfString};
885 use crate::parser::test_helpers::*;
886 use crate::parser::ParseOptions;
887 use std::io::Cursor;
888
889 #[test]
890 fn test_reader_construction() {
891 let pdf_data = create_minimal_pdf();
892 let cursor = Cursor::new(pdf_data);
893 let result = PdfReader::new(cursor);
894 assert!(result.is_ok());
895 }
896
897 #[test]
898 fn test_reader_version() {
899 let pdf_data = create_minimal_pdf();
900 let cursor = Cursor::new(pdf_data);
901 let reader = PdfReader::new(cursor).unwrap();
902 assert_eq!(reader.version().major, 1);
903 assert_eq!(reader.version().minor, 4);
904 }
905
906 #[test]
907 fn test_reader_different_versions() {
908 let versions = vec![
909 "1.0", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "2.0",
910 ];
911
912 for version in versions {
913 let pdf_data = create_pdf_with_version(version);
914 let cursor = Cursor::new(pdf_data);
915 let reader = PdfReader::new(cursor).unwrap();
916
917 let parts: Vec<&str> = version.split('.').collect();
918 assert_eq!(reader.version().major, parts[0].parse::<u8>().unwrap());
919 assert_eq!(reader.version().minor, parts[1].parse::<u8>().unwrap());
920 }
921 }
922
923 #[test]
924 fn test_reader_catalog() {
925 let pdf_data = create_minimal_pdf();
926 let cursor = Cursor::new(pdf_data);
927 let mut reader = PdfReader::new(cursor).unwrap();
928
929 let catalog = reader.catalog();
930 assert!(catalog.is_ok());
931
932 let catalog_dict = catalog.unwrap();
933 assert_eq!(
934 catalog_dict.get("Type"),
935 Some(&PdfObject::Name(PdfName("Catalog".to_string())))
936 );
937 }
938
939 #[test]
940 fn test_reader_info_none() {
941 let pdf_data = create_minimal_pdf();
942 let cursor = Cursor::new(pdf_data);
943 let mut reader = PdfReader::new(cursor).unwrap();
944
945 let info = reader.info().unwrap();
946 assert!(info.is_none());
947 }
948
949 #[test]
950 fn test_reader_info_present() {
951 let pdf_data = create_pdf_with_info();
952 let cursor = Cursor::new(pdf_data);
953 let mut reader = PdfReader::new(cursor).unwrap();
954
955 let info = reader.info().unwrap();
956 assert!(info.is_some());
957
958 let info_dict = info.unwrap();
959 assert_eq!(
960 info_dict.get("Title"),
961 Some(&PdfObject::String(PdfString(
962 "Test PDF".to_string().into_bytes()
963 )))
964 );
965 assert_eq!(
966 info_dict.get("Author"),
967 Some(&PdfObject::String(PdfString(
968 "Test Author".to_string().into_bytes()
969 )))
970 );
971 }
972
973 #[test]
974 fn test_reader_get_object() {
975 let pdf_data = create_minimal_pdf();
976 let cursor = Cursor::new(pdf_data);
977 let mut reader = PdfReader::new(cursor).unwrap();
978
979 let obj = reader.get_object(1, 0);
981 assert!(obj.is_ok());
982
983 let catalog = obj.unwrap();
984 assert!(catalog.as_dict().is_some());
985 }
986
987 #[test]
988 fn test_reader_get_invalid_object() {
989 let pdf_data = create_minimal_pdf();
990 let cursor = Cursor::new(pdf_data);
991 let mut reader = PdfReader::new(cursor).unwrap();
992
993 let obj = reader.get_object(999, 0);
995 assert!(obj.is_err());
996 }
997
998 #[test]
999 fn test_reader_get_free_object() {
1000 let pdf_data = create_minimal_pdf();
1001 let cursor = Cursor::new(pdf_data);
1002 let mut reader = PdfReader::new(cursor).unwrap();
1003
1004 let obj = reader.get_object(0, 65535);
1006 assert!(obj.is_ok());
1007 assert_eq!(obj.unwrap(), &PdfObject::Null);
1008 }
1009
1010 #[test]
1011 fn test_reader_resolve_reference() {
1012 let pdf_data = create_minimal_pdf();
1013 let cursor = Cursor::new(pdf_data);
1014 let mut reader = PdfReader::new(cursor).unwrap();
1015
1016 let ref_obj = PdfObject::Reference(1, 0);
1018 let resolved = reader.resolve(&ref_obj);
1019
1020 assert!(resolved.is_ok());
1021 assert!(resolved.unwrap().as_dict().is_some());
1022 }
1023
1024 #[test]
1025 fn test_reader_resolve_non_reference() {
1026 let pdf_data = create_minimal_pdf();
1027 let cursor = Cursor::new(pdf_data);
1028 let mut reader = PdfReader::new(cursor).unwrap();
1029
1030 let int_obj = PdfObject::Integer(42);
1032 let resolved = reader.resolve(&int_obj).unwrap();
1033
1034 assert_eq!(resolved, &PdfObject::Integer(42));
1035 }
1036
1037 #[test]
1038 fn test_reader_cache_behavior() {
1039 let pdf_data = create_minimal_pdf();
1040 let cursor = Cursor::new(pdf_data);
1041 let mut reader = PdfReader::new(cursor).unwrap();
1042
1043 let obj1 = reader.get_object(1, 0).unwrap();
1045 assert!(obj1.as_dict().is_some());
1046
1047 let obj2 = reader.get_object(1, 0).unwrap();
1049 assert!(obj2.as_dict().is_some());
1050 }
1051
1052 #[test]
1053 fn test_reader_wrong_generation() {
1054 let pdf_data = create_minimal_pdf();
1055 let cursor = Cursor::new(pdf_data);
1056 let mut reader = PdfReader::new(cursor).unwrap();
1057
1058 let obj = reader.get_object(1, 99);
1060 assert!(obj.is_err());
1061 }
1062
1063 #[test]
1064 fn test_reader_invalid_pdf() {
1065 let invalid_data = b"This is not a PDF file";
1066 let cursor = Cursor::new(invalid_data.to_vec());
1067 let result = PdfReader::new(cursor);
1068
1069 assert!(result.is_err());
1070 }
1071
1072 #[test]
1073 fn test_reader_corrupt_xref() {
1074 let corrupt_pdf = b"%PDF-1.4
10751 0 obj
1076<< /Type /Catalog >>
1077endobj
1078xref
1079corrupted xref table
1080trailer
1081<< /Size 2 /Root 1 0 R >>
1082startxref
108324
1084%%EOF"
1085 .to_vec();
1086
1087 let cursor = Cursor::new(corrupt_pdf);
1088 let result = PdfReader::new(cursor);
1089 assert!(result.is_err());
1092 }
1093
1094 #[test]
1095 fn test_reader_missing_trailer() {
1096 let pdf_no_trailer = b"%PDF-1.4
10971 0 obj
1098<< /Type /Catalog >>
1099endobj
1100xref
11010 2
11020000000000 65535 f
11030000000009 00000 n
1104startxref
110524
1106%%EOF"
1107 .to_vec();
1108
1109 let cursor = Cursor::new(pdf_no_trailer);
1110 let result = PdfReader::new(cursor);
1111 assert!(result.is_err());
1114 }
1115
1116 #[test]
1117 fn test_reader_empty_pdf() {
1118 let cursor = Cursor::new(Vec::new());
1119 let result = PdfReader::new(cursor);
1120 assert!(result.is_err());
1121 }
1122
1123 #[test]
1124 fn test_reader_page_count() {
1125 let pdf_data = create_minimal_pdf();
1126 let cursor = Cursor::new(pdf_data);
1127 let mut reader = PdfReader::new(cursor).unwrap();
1128
1129 let count = reader.page_count();
1130 assert!(count.is_ok());
1131 assert_eq!(count.unwrap(), 0); }
1133
1134 #[test]
1135 fn test_reader_into_document() {
1136 let pdf_data = create_minimal_pdf();
1137 let cursor = Cursor::new(pdf_data);
1138 let reader = PdfReader::new(cursor).unwrap();
1139
1140 let document = reader.into_document();
1141 let page_count = document.page_count();
1143 assert!(page_count.is_ok());
1144 }
1145
1146 #[test]
1147 fn test_reader_pages_dict() {
1148 let pdf_data = create_minimal_pdf();
1149 let cursor = Cursor::new(pdf_data);
1150 let mut reader = PdfReader::new(cursor).unwrap();
1151
1152 let pages = reader.pages();
1153 assert!(pages.is_ok());
1154 let pages_dict = pages.unwrap();
1155 assert_eq!(
1156 pages_dict.get("Type"),
1157 Some(&PdfObject::Name(PdfName("Pages".to_string())))
1158 );
1159 }
1160
1161 #[test]
1162 fn test_reader_pdf_with_binary_data() {
1163 let pdf_data = create_pdf_with_binary_marker();
1164
1165 let cursor = Cursor::new(pdf_data);
1166 let result = PdfReader::new(cursor);
1167 assert!(result.is_ok());
1168 }
1169
1170 #[test]
1171 fn test_reader_metadata() {
1172 let pdf_data = create_pdf_with_info();
1173 let cursor = Cursor::new(pdf_data);
1174 let mut reader = PdfReader::new(cursor).unwrap();
1175
1176 let metadata = reader.metadata().unwrap();
1177 assert_eq!(metadata.title, Some("Test PDF".to_string()));
1178 assert_eq!(metadata.author, Some("Test Author".to_string()));
1179 assert_eq!(metadata.subject, Some("Testing".to_string()));
1180 assert_eq!(metadata.version, "1.4".to_string());
1181 }
1182
1183 #[test]
1184 fn test_reader_metadata_empty() {
1185 let pdf_data = create_minimal_pdf();
1186 let cursor = Cursor::new(pdf_data);
1187 let mut reader = PdfReader::new(cursor).unwrap();
1188
1189 let metadata = reader.metadata().unwrap();
1190 assert!(metadata.title.is_none());
1191 assert!(metadata.author.is_none());
1192 assert_eq!(metadata.version, "1.4".to_string());
1193 assert_eq!(metadata.page_count, Some(0));
1194 }
1195
1196 #[test]
1197 fn test_reader_object_number_mismatch() {
1198 let pdf_data = create_minimal_pdf();
1202 let cursor = Cursor::new(pdf_data);
1203 let mut reader = PdfReader::new(cursor).unwrap();
1204
1205 let result = reader.get_object(1, 99);
1208 assert!(result.is_err());
1209
1210 let result2 = reader.get_object(999, 0);
1212 assert!(result2.is_err());
1213 }
1214
1215 #[test]
1216 fn test_document_metadata_struct() {
1217 let metadata = DocumentMetadata {
1218 title: Some("Title".to_string()),
1219 author: Some("Author".to_string()),
1220 subject: Some("Subject".to_string()),
1221 keywords: Some("Keywords".to_string()),
1222 creator: Some("Creator".to_string()),
1223 producer: Some("Producer".to_string()),
1224 creation_date: Some("D:20240101".to_string()),
1225 modification_date: Some("D:20240102".to_string()),
1226 version: "1.5".to_string(),
1227 page_count: Some(10),
1228 };
1229
1230 assert_eq!(metadata.title, Some("Title".to_string()));
1231 assert_eq!(metadata.page_count, Some(10));
1232 }
1233
1234 #[test]
1235 fn test_document_metadata_default() {
1236 let metadata = DocumentMetadata::default();
1237 assert!(metadata.title.is_none());
1238 assert!(metadata.author.is_none());
1239 assert!(metadata.subject.is_none());
1240 assert!(metadata.keywords.is_none());
1241 assert!(metadata.creator.is_none());
1242 assert!(metadata.producer.is_none());
1243 assert!(metadata.creation_date.is_none());
1244 assert!(metadata.modification_date.is_none());
1245 assert_eq!(metadata.version, "".to_string());
1246 assert!(metadata.page_count.is_none());
1247 }
1248
1249 #[test]
1250 fn test_document_metadata_clone() {
1251 let metadata = DocumentMetadata {
1252 title: Some("Test".to_string()),
1253 version: "1.4".to_string(),
1254 ..Default::default()
1255 };
1256
1257 let cloned = metadata.clone();
1258 assert_eq!(cloned.title, Some("Test".to_string()));
1259 assert_eq!(cloned.version, "1.4".to_string());
1260 }
1261
1262 #[test]
1263 fn test_reader_trailer_validation_error() {
1264 let bad_pdf = b"%PDF-1.4
12661 0 obj
1267<< /Type /Catalog >>
1268endobj
1269xref
12700 2
12710000000000 65535 f
12720000000009 00000 n
1273trailer
1274<< /Size 2 >>
1275startxref
127646
1277%%EOF"
1278 .to_vec();
1279
1280 let cursor = Cursor::new(bad_pdf);
1281 let result = PdfReader::new(cursor);
1282 assert!(result.is_err());
1285 }
1286
1287 #[test]
1288 fn test_reader_with_options() {
1289 let pdf_data = create_minimal_pdf();
1290 let cursor = Cursor::new(pdf_data);
1291 let mut options = ParseOptions::default();
1292 options.lenient_streams = true;
1293 options.max_recovery_bytes = 2000;
1294 options.collect_warnings = true;
1295
1296 let reader = PdfReader::new_with_options(cursor, options);
1297 assert!(reader.is_ok());
1298 }
1299
1300 #[test]
1301 fn test_lenient_stream_parsing() {
1302 let pdf_data = b"%PDF-1.4
13041 0 obj
1305<< /Type /Catalog /Pages 2 0 R >>
1306endobj
13072 0 obj
1308<< /Type /Pages /Kids [3 0 R] /Count 1 >>
1309endobj
13103 0 obj
1311<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
1312endobj
13134 0 obj
1314<< /Length 10 >>
1315stream
1316This is a longer stream than 10 bytes
1317endstream
1318endobj
1319xref
13200 5
13210000000000 65535 f
13220000000009 00000 n
13230000000058 00000 n
13240000000116 00000 n
13250000000219 00000 n
1326trailer
1327<< /Size 5 /Root 1 0 R >>
1328startxref
1329299
1330%%EOF"
1331 .to_vec();
1332
1333 let cursor = Cursor::new(pdf_data.clone());
1335 let strict_options = ParseOptions::strict();
1336 let strict_reader = PdfReader::new_with_options(cursor, strict_options);
1337 assert!(strict_reader.is_err());
1339
1340 let cursor = Cursor::new(pdf_data);
1342 let mut options = ParseOptions::default();
1343 options.lenient_streams = true;
1344 options.max_recovery_bytes = 1000;
1345 options.collect_warnings = false;
1346 let lenient_reader = PdfReader::new_with_options(cursor, options);
1347 assert!(lenient_reader.is_err());
1348 }
1349
1350 #[test]
1351 fn test_parse_options_default() {
1352 let options = ParseOptions::default();
1353 assert!(!options.lenient_streams);
1354 assert_eq!(options.max_recovery_bytes, 1000);
1355 assert!(!options.collect_warnings);
1356 }
1357
1358 #[test]
1359 fn test_parse_options_clone() {
1360 let mut options = ParseOptions::default();
1361 options.lenient_streams = true;
1362 options.max_recovery_bytes = 2000;
1363 options.collect_warnings = true;
1364 let cloned = options.clone();
1365 assert_eq!(cloned.lenient_streams, true);
1366 assert_eq!(cloned.max_recovery_bytes, 2000);
1367 assert_eq!(cloned.collect_warnings, true);
1368 }
1369
1370 #[allow(dead_code)]
1373 fn create_encrypted_pdf_dict() -> PdfDictionary {
1374 let mut dict = PdfDictionary::new();
1375 dict.insert(
1376 "Filter".to_string(),
1377 PdfObject::Name(PdfName("Standard".to_string())),
1378 );
1379 dict.insert("V".to_string(), PdfObject::Integer(1));
1380 dict.insert("R".to_string(), PdfObject::Integer(2));
1381 dict.insert("O".to_string(), PdfObject::String(PdfString(vec![0u8; 32])));
1382 dict.insert("U".to_string(), PdfObject::String(PdfString(vec![0u8; 32])));
1383 dict.insert("P".to_string(), PdfObject::Integer(-4));
1384 dict
1385 }
1386
1387 fn create_pdf_with_encryption() -> Vec<u8> {
1388 b"%PDF-1.4
13901 0 obj
1391<< /Type /Catalog /Pages 2 0 R >>
1392endobj
13932 0 obj
1394<< /Type /Pages /Kids [3 0 R] /Count 1 >>
1395endobj
13963 0 obj
1397<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>
1398endobj
13994 0 obj
1400<< /Filter /Standard /V 1 /R 2 /O (32 bytes of owner password hash data) /U (32 bytes of user password hash data) /P -4 >>
1401endobj
1402xref
14030 5
14040000000000 65535 f
14050000000009 00000 n
14060000000058 00000 n
14070000000116 00000 n
14080000000201 00000 n
1409trailer
1410<< /Size 5 /Root 1 0 R /Encrypt 4 0 R /ID [(file id)] >>
1411startxref
1412295
1413%%EOF"
1414 .to_vec()
1415 }
1416
1417 #[test]
1418 fn test_reader_encryption_detection() {
1419 let unencrypted_pdf = create_minimal_pdf();
1421 let cursor = Cursor::new(unencrypted_pdf);
1422 let reader = PdfReader::new(cursor).unwrap();
1423 assert!(!reader.is_encrypted());
1424 assert!(reader.is_unlocked()); let encrypted_pdf = create_pdf_with_encryption();
1428 let cursor = Cursor::new(encrypted_pdf);
1429 let result = PdfReader::new(cursor);
1430 assert!(result.is_err());
1432 }
1433
1434 #[test]
1435 fn test_reader_encryption_methods_unencrypted() {
1436 let pdf_data = create_minimal_pdf();
1437 let cursor = Cursor::new(pdf_data);
1438 let mut reader = PdfReader::new(cursor).unwrap();
1439
1440 assert!(!reader.is_encrypted());
1442 assert!(reader.is_unlocked());
1443 assert!(reader.encryption_handler().is_none());
1444 assert!(reader.encryption_handler_mut().is_none());
1445
1446 assert!(reader.unlock_with_password("any_password").unwrap());
1448 assert!(reader.try_empty_password().unwrap());
1449 }
1450
1451 #[test]
1452 fn test_reader_encryption_handler_access() {
1453 let pdf_data = create_minimal_pdf();
1454 let cursor = Cursor::new(pdf_data);
1455 let mut reader = PdfReader::new(cursor).unwrap();
1456
1457 assert!(reader.encryption_handler().is_none());
1459 assert!(reader.encryption_handler_mut().is_none());
1460
1461 assert!(!reader.is_encrypted());
1463 assert!(reader.is_unlocked());
1464 }
1465
1466 #[test]
1467 fn test_reader_multiple_password_attempts() {
1468 let pdf_data = create_minimal_pdf();
1469 let cursor = Cursor::new(pdf_data);
1470 let mut reader = PdfReader::new(cursor).unwrap();
1471
1472 let passwords = vec!["test1", "test2", "admin", "", "password"];
1474 for password in passwords {
1475 assert!(reader.unlock_with_password(password).unwrap());
1476 }
1477
1478 for _ in 0..5 {
1480 assert!(reader.try_empty_password().unwrap());
1481 }
1482 }
1483
1484 #[test]
1485 fn test_reader_encryption_state_consistency() {
1486 let pdf_data = create_minimal_pdf();
1487 let cursor = Cursor::new(pdf_data);
1488 let mut reader = PdfReader::new(cursor).unwrap();
1489
1490 assert!(!reader.is_encrypted());
1492 assert!(reader.is_unlocked());
1493 assert!(reader.encryption_handler().is_none());
1494
1495 let _ = reader.unlock_with_password("test");
1497 assert!(!reader.is_encrypted());
1498 assert!(reader.is_unlocked());
1499 assert!(reader.encryption_handler().is_none());
1500
1501 let _ = reader.try_empty_password();
1502 assert!(!reader.is_encrypted());
1503 assert!(reader.is_unlocked());
1504 assert!(reader.encryption_handler().is_none());
1505 }
1506
1507 #[test]
1508 fn test_reader_encryption_error_handling() {
1509 let encrypted_pdf = create_pdf_with_encryption();
1511 let cursor = Cursor::new(encrypted_pdf);
1512
1513 let result = PdfReader::new(cursor);
1515 match result {
1516 Err(ParseError::EncryptionNotSupported) => {
1517 }
1519 Err(_) => {
1520 }
1522 Ok(_) => {
1523 panic!("Should not successfully create reader for encrypted PDF without password");
1524 }
1525 }
1526 }
1527
1528 #[test]
1529 fn test_reader_encryption_with_options() {
1530 let pdf_data = create_minimal_pdf();
1531 let cursor = Cursor::new(pdf_data);
1532
1533 let strict_options = ParseOptions::strict();
1535 let strict_reader = PdfReader::new_with_options(cursor, strict_options).unwrap();
1536 assert!(!strict_reader.is_encrypted());
1537 assert!(strict_reader.is_unlocked());
1538
1539 let pdf_data = create_minimal_pdf();
1540 let cursor = Cursor::new(pdf_data);
1541 let lenient_options = ParseOptions::lenient();
1542 let lenient_reader = PdfReader::new_with_options(cursor, lenient_options).unwrap();
1543 assert!(!lenient_reader.is_encrypted());
1544 assert!(lenient_reader.is_unlocked());
1545 }
1546
1547 #[test]
1548 fn test_reader_encryption_integration_edge_cases() {
1549 let pdf_data = create_minimal_pdf();
1550 let cursor = Cursor::new(pdf_data);
1551 let mut reader = PdfReader::new(cursor).unwrap();
1552
1553 assert!(reader.unlock_with_password("").unwrap());
1555 assert!(reader.unlock_with_password(" ").unwrap()); assert!(reader
1557 .unlock_with_password("very_long_password_that_exceeds_normal_length")
1558 .unwrap());
1559 assert!(reader.unlock_with_password("unicode_test_ñáéíóú").unwrap());
1560
1561 assert!(reader.unlock_with_password("pass@#$%^&*()").unwrap());
1563 assert!(reader.unlock_with_password("pass\nwith\nnewlines").unwrap());
1564 assert!(reader.unlock_with_password("pass\twith\ttabs").unwrap());
1565 }
1566}