1use crate::core::context::PdfContext;
2use crate::core::document::PdfHeader;
3use crate::core::errors::{PdfError, Result};
4use crate::core::objects::*;
5use crate::core::syntax::{is_delimiter, is_digit, is_whitespace, CharCodes};
6use super::byte_stream::ByteStream;
7use flate2::read::ZlibDecoder;
8use std::io::Read;
9
10pub struct PdfParser<'a> {
12 bytes: ByteStream<'a>,
13 context: PdfContext,
14 throw_on_invalid_object: bool,
15}
16
17impl<'a> PdfParser<'a> {
18 pub fn for_bytes(pdf_bytes: &'a [u8]) -> Self {
19 Self::for_bytes_with_options(pdf_bytes, false)
20 }
21
22 pub fn for_bytes_with_options(pdf_bytes: &'a [u8], throw_on_invalid_object: bool) -> Self {
23 PdfParser {
24 bytes: ByteStream::of(pdf_bytes),
25 context: PdfContext::create(),
26 throw_on_invalid_object,
27 }
28 }
29
30 pub fn parse_single_object(mut self) -> Result<PdfObject> {
32 self.parse_object()
33 }
34
35 pub fn parse_document(mut self) -> Result<PdfContext> {
37 self.context.header = self.parse_header()?;
38 self.skip_binary_comment();
39
40 let mut prev_offset = None;
41 while !self.bytes.done() {
42 self.parse_document_section()?;
43 let offset = self.bytes.offset();
44 if prev_offset == Some(offset) {
45 let pos = self.bytes.position();
46 return Err(PdfError::StalledParser {
47 line: pos.line,
48 column: pos.column,
49 offset: pos.offset,
50 });
51 }
52 prev_offset = Some(offset);
53 }
54
55 self.context.delete(&PdfRef::of(0, 0));
57
58 Ok(self.context)
59 }
60
61 fn parse_header(&mut self) -> Result<PdfHeader> {
62 self.skip_whitespace();
63
64 while !self.bytes.done() {
66 if self.matches_keyword(b"%PDF-") {
67 break;
68 }
69 self.bytes.next();
70 }
71
72 if self.bytes.done() {
73 return Err(PdfError::MissingPdfHeader);
74 }
75
76 for _ in 0..5 {
78 self.bytes.next();
79 }
80
81 let major = self.parse_raw_int()? as u8;
83 self.expect_byte(CharCodes::Period)?;
84 let minor = self.parse_raw_int()? as u8;
85
86 Ok(PdfHeader::for_version(major, minor))
87 }
88
89 fn skip_binary_comment(&mut self) {
90 self.skip_whitespace();
91 if self.bytes.peek() == Some(CharCodes::Percent) {
93 while !self.bytes.done() {
94 let byte = self.bytes.peek();
95 if byte == Some(CharCodes::Newline) || byte == Some(CharCodes::CarriageReturn) {
96 break;
97 }
98 self.bytes.next();
99 }
100 }
101 }
102
103 fn parse_document_section(&mut self) -> Result<()> {
104 self.skip_whitespace_and_comments();
105
106 if self.bytes.done() {
107 return Ok(());
108 }
109
110 if self.matches_keyword(b"xref") {
112 self.skip_xref_section();
113 } else if self.matches_keyword(b"trailer") {
114 self.skip_trailer();
115 } else if self.matches_keyword(b"startxref") {
116 self.skip_startxref();
117 } else if self.matches_keyword(b"%%EOF") {
118 self.skip_keyword(b"%%EOF");
119 self.skip_junk_after_eof();
120 } else {
121 self.try_parse_indirect_object()?;
122 }
123
124 Ok(())
125 }
126
127 fn try_parse_indirect_object(&mut self) -> Result<()> {
128 let start_offset = self.bytes.offset();
129
130 let obj_num = match self.try_parse_int() {
132 Some(n) => n as u32,
133 None => {
134 self.bytes.next();
136 return Ok(());
137 }
138 };
139
140 self.skip_whitespace();
141 let gen_num = match self.try_parse_int() {
142 Some(n) => n as u16,
143 None => {
144 self.bytes.move_to(start_offset + 1);
145 return Ok(());
146 }
147 };
148
149 self.skip_whitespace();
150 if !self.matches_keyword(b"obj") {
151 self.bytes.move_to(start_offset + 1);
152 return Ok(());
153 }
154 self.skip_keyword(b"obj");
155 self.skip_whitespace_and_comments();
156
157 let object = match self.parse_object() {
159 Ok(obj) => obj,
160 Err(e) => {
161 if self.throw_on_invalid_object {
162 return Err(e);
163 }
164 eprintln!(
165 "Warning: Trying to parse invalid object: {e}"
166 );
167 self.skip_to_endobj();
168 return Ok(());
169 }
170 };
171
172 self.skip_whitespace_and_comments();
173
174 let final_object = if self.matches_keyword(b"stream") {
176 self.parse_stream_after_dict(object)?
177 } else {
178 object
179 };
180
181 let pdf_ref = PdfRef::of(obj_num, gen_num);
182
183 if let PdfObject::Stream(ref stream) = final_object {
185 if let Some(PdfObject::Name(type_name)) = stream.dict.get(&PdfName::of("Type")) {
186 let type_str = type_name.as_string();
187 if type_str == "/XRef" {
188 self.extract_trailer_info_from_dict(&stream.dict);
189 } else if type_str == "/ObjStm" {
190 self.parse_object_stream(stream);
191 }
192 }
193 }
194
195 self.context.assign(&pdf_ref, final_object);
196
197 self.skip_whitespace_and_comments();
198 if self.matches_keyword(b"endobj") {
199 self.skip_keyword(b"endobj");
200 }
201
202 Ok(())
203 }
204
205 fn parse_stream_after_dict(&mut self, dict_object: PdfObject) -> Result<PdfObject> {
206 self.skip_keyword(b"stream");
207
208 if self.bytes.peek() == Some(CharCodes::CarriageReturn) {
210 self.bytes.next();
211 }
212 if self.bytes.peek() == Some(CharCodes::Newline) {
213 self.bytes.next();
214 }
215
216 let start = self.bytes.offset();
218 let end;
219
220 if let PdfObject::Dict(ref dict) = dict_object {
222 if let Some(PdfObject::Number(n)) = dict.get(&PdfName::length()) {
223 let length = n.as_number() as usize;
224 end = start + length;
225 self.bytes.move_to(end);
226 } else {
227 end = self.find_endstream(start);
229 }
230 } else {
231 end = self.find_endstream(start);
232 }
233
234 let contents = self.bytes.slice(start, end).to_vec();
235 self.skip_whitespace();
236 if self.matches_keyword(b"endstream") {
237 self.skip_keyword(b"endstream");
238 }
239
240 let dict = if let PdfObject::Dict(d) = dict_object {
241 d
242 } else {
243 PdfDict::new()
244 };
245
246 Ok(PdfObject::Stream(PdfRawStream::of(dict, contents)))
247 }
248
249 fn find_endstream(&mut self, _start: usize) -> usize {
250 let search_start = self.bytes.offset();
251 while !self.bytes.done() {
252 if self.matches_keyword(b"endstream") {
253 return self.bytes.offset();
254 }
255 self.bytes.next();
256 }
257 self.bytes.move_to(search_start);
259 search_start
260 }
261
262 fn parse_object(&mut self) -> Result<PdfObject> {
264 self.skip_whitespace_and_comments();
265
266 match self.bytes.peek() {
267 None => Err(PdfError::UnexpectedObjectType),
268 Some(b) => match b {
269 CharCodes::ForwardSlash => self.parse_name(),
270 CharCodes::LessThan => {
271 if self.bytes.peek_ahead(1) == Some(CharCodes::LessThan) {
272 self.parse_dict()
273 } else {
274 self.parse_hex_string()
275 }
276 }
277 CharCodes::LeftParen => self.parse_literal_string(),
278 CharCodes::LeftSquareBracket => self.parse_array(),
279 b't' if self.matches_keyword(b"true") => {
280 self.skip_keyword(b"true");
281 Ok(PdfObject::Bool(PdfBool::TRUE))
282 }
283 b'f' if self.matches_keyword(b"false") => {
284 self.skip_keyword(b"false");
285 Ok(PdfObject::Bool(PdfBool::FALSE))
286 }
287 b'n' if self.matches_keyword(b"null") => {
288 self.skip_keyword(b"null");
289 Ok(PdfObject::Null)
290 }
291 _ if is_digit(b) || b == CharCodes::Plus || b == CharCodes::Minus || b == CharCodes::Period => {
292 self.parse_number_or_ref()
293 }
294 _ => {
295 let pos = self.bytes.position();
296 Err(PdfError::InvalidObjectParsing {
297 line: pos.line,
298 column: pos.column,
299 offset: pos.offset,
300 })
301 }
302 },
303 }
304 }
305
306 fn parse_name(&mut self) -> Result<PdfObject> {
307 self.bytes.next(); let mut name = String::new();
309 while !self.bytes.done() {
310 let b = self.bytes.peek().unwrap();
311 if is_whitespace(b) || is_delimiter(b) {
312 break;
313 }
314 name.push(self.bytes.next().unwrap() as char);
315 }
316 Ok(PdfObject::Name(PdfName::of(&name)))
317 }
318
319 fn parse_hex_string(&mut self) -> Result<PdfObject> {
320 self.bytes.next(); let mut hex = String::new();
322 while !self.bytes.done() {
323 let b = self.bytes.peek().unwrap();
324 if b == CharCodes::GreaterThan {
325 self.bytes.next();
326 break;
327 }
328 if !is_whitespace(b) {
329 hex.push(b as char);
330 }
331 self.bytes.next();
332 }
333 Ok(PdfObject::HexString(PdfHexString::of(&hex)))
334 }
335
336 fn parse_literal_string(&mut self) -> Result<PdfObject> {
337 self.bytes.next(); let mut value = String::new();
339 let mut depth = 1;
340 let mut escaped = false;
341
342 while !self.bytes.done() && depth > 0 {
343 let b = self.bytes.next().unwrap();
344
345 if escaped {
346 value.push(b as char);
347 escaped = false;
348 continue;
349 }
350
351 match b {
352 CharCodes::BackSlash => {
353 value.push(b as char);
354 escaped = true;
355 }
356 CharCodes::LeftParen => {
357 depth += 1;
358 value.push(b as char);
359 }
360 CharCodes::RightParen => {
361 depth -= 1;
362 if depth > 0 {
363 value.push(b as char);
364 }
365 }
366 _ => {
367 value.push(b as char);
368 }
369 }
370 }
371
372 Ok(PdfObject::String(PdfString::of(&value)))
373 }
374
375 fn parse_array(&mut self) -> Result<PdfObject> {
376 self.bytes.next(); let mut array = PdfArray::new();
378
379 loop {
380 self.skip_whitespace_and_comments();
381 if self.bytes.done() {
382 break;
383 }
384 if self.bytes.peek() == Some(CharCodes::RightSquareBracket) {
385 self.bytes.next();
386 break;
387 }
388 let obj = self.parse_object()?;
389 array.push(obj);
390 }
391
392 Ok(PdfObject::Array(array))
393 }
394
395 fn parse_dict(&mut self) -> Result<PdfObject> {
396 self.bytes.next(); self.bytes.next(); let mut dict = PdfDict::new();
399
400 loop {
401 self.skip_whitespace_and_comments();
402 if self.bytes.done() {
403 break;
404 }
405 if self.bytes.peek() == Some(CharCodes::GreaterThan)
407 && self.bytes.peek_ahead(1) == Some(CharCodes::GreaterThan)
408 {
409 self.bytes.next();
410 self.bytes.next();
411 break;
412 }
413
414 let key_obj = self.parse_object()?;
416 let key = match key_obj {
417 PdfObject::Name(n) => n,
418 _ => continue, };
420
421 self.skip_whitespace_and_comments();
422
423 if self.bytes.done() {
425 break;
426 }
427 if self.bytes.peek() == Some(CharCodes::GreaterThan)
429 && self.bytes.peek_ahead(1) == Some(CharCodes::GreaterThan)
430 {
431 break;
432 }
433 let value = self.parse_object()?;
434 dict.set(key, value);
435 }
436
437 Ok(PdfObject::Dict(dict))
438 }
439
440 fn parse_number_or_ref(&mut self) -> Result<PdfObject> {
441 let start = self.bytes.offset();
442 let number = self.parse_raw_number()?;
443
444 let after_num = self.bytes.offset();
446 self.skip_whitespace();
447
448 if let Some(gen) = self.try_parse_int() {
449 self.skip_whitespace();
450 if self.bytes.peek() == Some(CharCodes::UpperR) {
451 self.bytes.next();
452 return Ok(PdfObject::Ref(PdfRef::of(
453 number as u32,
454 gen as u16,
455 )));
456 }
457 self.bytes.move_to(after_num);
459 } else {
460 self.bytes.move_to(after_num);
461 }
462
463 let _ = start;
466 Ok(PdfObject::Number(PdfNumber::of(number)))
467 }
468
469 fn parse_raw_int(&mut self) -> Result<i64> {
470 let mut value = String::new();
471 while !self.bytes.done() {
472 let b = self.bytes.peek().unwrap();
473 if !is_digit(b) {
474 break;
475 }
476 value.push(self.bytes.next().unwrap() as char);
477 }
478 if value.is_empty() {
479 let pos = self.bytes.position();
480 return Err(PdfError::InvalidObjectParsing {
481 line: pos.line,
482 column: pos.column,
483 offset: pos.offset,
484 });
485 }
486 value.parse::<i64>().map_err(|_| {
487 let pos = self.bytes.position();
488 PdfError::InvalidObjectParsing {
489 line: pos.line,
490 column: pos.column,
491 offset: pos.offset,
492 }
493 })
494 }
495
496 fn try_parse_int(&mut self) -> Option<i64> {
497 let start = self.bytes.offset();
498 let mut value = String::new();
499 while !self.bytes.done() {
500 let b = self.bytes.peek().unwrap();
501 if !is_digit(b) {
502 break;
503 }
504 value.push(self.bytes.next().unwrap() as char);
505 }
506 if value.is_empty() {
507 self.bytes.move_to(start);
508 return None;
509 }
510 match value.parse::<i64>() {
511 Ok(n) => Some(n),
512 Err(_) => {
513 self.bytes.move_to(start);
514 None
515 }
516 }
517 }
518
519 fn parse_raw_number(&mut self) -> Result<f64> {
520 let mut value = String::new();
521
522 while !self.bytes.done() {
524 let b = self.bytes.peek().unwrap();
525 if is_digit(b) || b == CharCodes::Plus || b == CharCodes::Minus || b == CharCodes::Period {
526 value.push(self.bytes.next().unwrap() as char);
527 if b == CharCodes::Period {
528 break;
529 }
530 } else {
531 break;
532 }
533 }
534
535 while !self.bytes.done() {
537 let b = self.bytes.peek().unwrap();
538 if !is_digit(b) {
539 break;
540 }
541 value.push(self.bytes.next().unwrap() as char);
542 }
543
544 if value.is_empty() || value == "." || value == "+" || value == "-" {
545 let pos = self.bytes.position();
546 return Err(PdfError::InvalidObjectParsing {
547 line: pos.line,
548 column: pos.column,
549 offset: pos.offset,
550 });
551 }
552
553 value.parse::<f64>().map_err(|_| {
554 let pos = self.bytes.position();
555 PdfError::InvalidObjectParsing {
556 line: pos.line,
557 column: pos.column,
558 offset: pos.offset,
559 }
560 })
561 }
562
563 fn expect_byte(&mut self, expected: u8) -> Result<()> {
564 match self.bytes.next() {
565 Some(b) if b == expected => Ok(()),
566 _ => {
567 let pos = self.bytes.position();
568 Err(PdfError::InvalidObjectParsing {
569 line: pos.line,
570 column: pos.column,
571 offset: pos.offset,
572 })
573 }
574 }
575 }
576
577 fn skip_whitespace(&mut self) {
578 while !self.bytes.done() {
579 if let Some(b) = self.bytes.peek() {
580 if is_whitespace(b) {
581 self.bytes.next();
582 } else {
583 break;
584 }
585 }
586 }
587 }
588
589 fn skip_whitespace_and_comments(&mut self) {
590 loop {
591 self.skip_whitespace();
592 if self.bytes.peek() == Some(CharCodes::Percent) {
593 self.skip_comment();
594 } else {
595 break;
596 }
597 }
598 }
599
600 fn skip_comment(&mut self) {
601 while !self.bytes.done() {
602 let b = self.bytes.peek().unwrap();
603 if b == CharCodes::Newline || b == CharCodes::CarriageReturn {
604 return;
605 }
606 self.bytes.next();
607 }
608 }
609
610 fn matches_keyword(&self, keyword: &[u8]) -> bool {
611 let remaining = self.bytes.remaining();
612 if remaining.len() < keyword.len() {
613 return false;
614 }
615 &remaining[..keyword.len()] == keyword
616 }
617
618 fn skip_keyword(&mut self, keyword: &[u8]) {
619 for _ in 0..keyword.len() {
620 self.bytes.next();
621 }
622 }
623
624 fn skip_to_endobj(&mut self) {
625 while !self.bytes.done() {
626 if self.matches_keyword(b"endobj") {
627 self.skip_keyword(b"endobj");
628 return;
629 }
630 self.bytes.next();
631 }
632 }
633
634 fn skip_xref_section(&mut self) {
635 self.skip_keyword(b"xref");
636 self.skip_whitespace();
637
638 while !self.bytes.done() {
640 if self.matches_keyword(b"trailer")
641 || self.matches_keyword(b"startxref")
642 || self.matches_keyword(b"%%EOF")
643 {
644 break;
645 }
646 if let Some(_first_obj) = self.try_parse_int() {
648 self.skip_whitespace();
649 if let Some(count) = self.try_parse_int() {
650 self.skip_whitespace();
651 for _ in 0..count {
653 self.skip_line();
654 self.skip_whitespace();
655 }
656 }
657 } else {
658 self.bytes.next();
659 }
660 }
661 }
662
663 fn skip_line(&mut self) {
664 while !self.bytes.done() {
665 let b = self.bytes.peek().unwrap();
666 self.bytes.next();
667 if b == CharCodes::Newline || b == CharCodes::CarriageReturn {
668 return;
669 }
670 }
671 }
672
673 fn skip_trailer(&mut self) {
674 self.skip_keyword(b"trailer");
675 self.skip_whitespace_and_comments();
676
677 if let Ok(PdfObject::Dict(dict)) = self.parse_object() {
679 if let Some(root) = dict.get(&PdfName::of("Root")) {
681 self.context.trailer_info.root = Some(root.clone());
682 }
683 if let Some(encrypt) = dict.get(&PdfName::of("Encrypt")) {
684 self.context.trailer_info.encrypt = Some(encrypt.clone());
685 }
686 if let Some(info) = dict.get(&PdfName::of("Info")) {
687 self.context.trailer_info.info = Some(info.clone());
688 }
689 if let Some(id) = dict.get(&PdfName::of("ID")) {
690 self.context.trailer_info.id = Some(id.clone());
691 }
692 }
693 }
694
695 fn extract_trailer_info_from_dict(&mut self, dict: &PdfDict) {
696 if self.context.trailer_info.root.is_none() {
697 if let Some(root) = dict.get(&PdfName::of("Root")) {
698 self.context.trailer_info.root = Some(root.clone());
699 }
700 }
701 if self.context.trailer_info.encrypt.is_none() {
702 if let Some(encrypt) = dict.get(&PdfName::of("Encrypt")) {
703 self.context.trailer_info.encrypt = Some(encrypt.clone());
704 }
705 }
706 if self.context.trailer_info.info.is_none() {
707 if let Some(info) = dict.get(&PdfName::of("Info")) {
708 self.context.trailer_info.info = Some(info.clone());
709 }
710 }
711 if self.context.trailer_info.id.is_none() {
712 if let Some(id) = dict.get(&PdfName::of("ID")) {
713 self.context.trailer_info.id = Some(id.clone());
714 }
715 }
716 }
717
718 fn decompress_stream(&self, stream: &PdfRawStream) -> Option<Vec<u8>> {
719 let filter = stream.dict.get(&PdfName::of("Filter"));
720 match filter {
721 Some(PdfObject::Name(n)) if n.as_string() == "/FlateDecode" => {
722 let mut decoder = ZlibDecoder::new(&stream.contents[..]);
723 let mut decompressed = Vec::new();
724 decoder.read_to_end(&mut decompressed).ok()?;
725 Some(decompressed)
726 }
727 None => Some(stream.contents.clone()),
728 _ => None, }
730 }
731
732 fn parse_object_stream(&mut self, stream: &PdfRawStream) {
733 let n = match stream.dict.get(&PdfName::of("N")) {
734 Some(PdfObject::Number(n)) => n.as_number() as usize,
735 _ => return,
736 };
737 let first = match stream.dict.get(&PdfName::of("First")) {
738 Some(PdfObject::Number(n)) => n.as_number() as usize,
739 _ => return,
740 };
741
742 let decompressed = match self.decompress_stream(stream) {
743 Some(d) => d,
744 None => return,
745 };
746
747 let header_bytes = &decompressed[..first.min(decompressed.len())];
749 let header_str = String::from_utf8_lossy(header_bytes);
750 let nums: Vec<usize> = header_str
751 .split_whitespace()
752 .filter_map(|s| s.parse().ok())
753 .collect();
754
755 if nums.len() < n * 2 {
756 return;
757 }
758
759 let mut obj_entries = Vec::with_capacity(n);
760 for i in 0..n {
761 let obj_num = nums[i * 2] as u32;
762 let offset = nums[i * 2 + 1];
763 obj_entries.push((obj_num, first + offset));
764 }
765
766 for (obj_num, offset) in obj_entries {
768 if offset >= decompressed.len() {
769 continue;
770 }
771 let sub_parser = PdfParser::for_bytes(&decompressed[offset..]);
772 if let Ok(obj) = sub_parser.parse_single_object() {
773 let pdf_ref = PdfRef::of(obj_num, 0);
774 if self.context.lookup(&pdf_ref).is_none() {
775 self.context.assign(&pdf_ref, obj);
776 }
777 }
778 }
779 }
780
781 fn skip_startxref(&mut self) {
782 self.skip_keyword(b"startxref");
783 self.skip_whitespace();
784 let _ = self.try_parse_int();
785 }
786
787 fn skip_junk_after_eof(&mut self) {
788 while !self.bytes.done() {
790 self.skip_whitespace();
791 if self.bytes.done() {
792 break;
793 }
794 if self.matches_keyword(b"%PDF-")
795 || self.matches_keyword(b"xref")
796 || self.matches_keyword(b"trailer")
797 || self.matches_keyword(b"startxref")
798 {
799 break;
800 }
801 if let Some(b) = self.bytes.peek() {
803 if is_digit(b) {
804 break;
805 }
806 }
807 self.bytes.next();
808 }
809 }
810}
811
812#[cfg(test)]
813mod tests {
814 use super::*;
815
816 #[test]
817 fn throws_error_when_pdf_missing_header() {
818 let input = b"I_AM_NOT_A_HEADER\n1 0 obj\n(foobar)\nendobj\n";
819 let parser = PdfParser::for_bytes(input);
820 assert!(parser.parse_document().is_err());
821 }
822
823 #[test]
824 fn does_not_throw_when_endobj_missing() {
825 let input = b"%PDF-1.7\n1 0 obj\n(foobar)\nfoo\n";
826 let parser = PdfParser::for_bytes(input);
827 let context = parser.parse_document().unwrap();
828 assert!(context.lookup(&PdfRef::of(1, 0)).is_some());
829 if let Some(PdfObject::String(s)) = context.lookup(&PdfRef::of(1, 0)) {
830 assert_eq!(s.as_string(), "foobar");
831 }
832 }
833
834 #[test]
835 fn handles_invalid_binary_comments_after_header() {
836 let mut input = Vec::new();
837 input.extend_from_slice(b"%PDF-1.7\n");
838 input.extend_from_slice(&[128, 1, 2, 3, 4, 5, 129, 130, 131, CharCodes::Newline]);
839 input.extend_from_slice(b"1 0 obj\n(foobar)\nendobj");
840 let parser = PdfParser::for_bytes(&input);
841 let context = parser.parse_document().unwrap();
842 assert_eq!(context.enumerate_indirect_objects().len(), 1);
843 }
844
845 #[test]
846 fn parses_basic_objects() {
847 let input = b"%PDF-1.7\n\
848 1 0 obj\n42\nendobj\n\
849 2 0 obj\n/Foo\nendobj\n\
850 3 0 obj\n(Hello)\nendobj\n\
851 4 0 obj\ntrue\nendobj\n\
852 5 0 obj\nnull\nendobj\n\
853 6 0 obj\n<48656C6C6F>\nendobj\n\
854 7 0 obj\n[1 2 3]\nendobj\n\
855 8 0 obj\n<< /Type /Page >>\nendobj\n";
856 let parser = PdfParser::for_bytes(input);
857 let context = parser.parse_document().unwrap();
858 assert_eq!(context.enumerate_indirect_objects().len(), 8);
859 }
860
861 #[test]
862 fn parses_indirect_references() {
863 let input = b"%PDF-1.7\n\
864 1 0 obj\n<< /Ref 2 0 R >>\nendobj\n\
865 2 0 obj\n42\nendobj\n";
866 let parser = PdfParser::for_bytes(input);
867 let context = parser.parse_document().unwrap();
868 assert_eq!(context.enumerate_indirect_objects().len(), 2);
869 }
870
871 #[test]
872 fn parses_pdf_with_xref_and_trailer() {
873 let input = b"%PDF-1.7\n\
874 1 0 obj\n(foobar)\nendobj\n\
875 xref\n0 2\n\
876 0000000000 65535 f \n\
877 0000000009 00000 n \n\
878 trailer\n<< /Size 2 /Root 1 0 R >>\n\
879 startxref\n34\n%%EOF\n";
880 let parser = PdfParser::for_bytes(input);
881 let context = parser.parse_document().unwrap();
882 assert_eq!(context.enumerate_indirect_objects().len(), 1);
883 }
884
885 #[test]
886 fn does_not_stall_with_junk_after_eof() {
887 let input = b"%PDF-1.7\n\
888 1 0 obj\n(foobar)\nendobj\n\
889 startxref\n127\n%%EOF\n\
890 @@@@@@@@@@@@@@@@@@\n";
891 let parser = PdfParser::for_bytes(input);
892 let context = parser.parse_document().unwrap();
893 assert_eq!(context.enumerate_indirect_objects().len(), 1);
894 }
895
896 #[test]
897 fn parses_streams() {
898 let input = b"%PDF-1.7\n\
899 1 0 obj\n<< /Length 4 >>\nstream\ntest\nendstream\nendobj\n";
900 let parser = PdfParser::for_bytes(input);
901 let context = parser.parse_document().unwrap();
902 if let Some(PdfObject::Stream(stream)) = context.lookup(&PdfRef::of(1, 0)) {
903 assert_eq!(stream.contents, b"test");
904 } else {
905 panic!("Expected stream object");
906 }
907 }
908
909 #[test]
910 fn loads_real_pdf() {
911 let pdf_bytes = std::fs::read("test_assets/pdfs/normal.pdf").unwrap();
912 let parser = PdfParser::for_bytes(&pdf_bytes);
913 let context = parser.parse_document().unwrap();
914 assert!(context.object_count() > 0);
915 }
916}