1use crate::{PdfObject, PdfObjectIdentifier, PdfStreamObject, PdfString, PdfUntypedDictionary};
2use std::collections::HashMap;
3
4pub struct PdfReaderPosition {
5 index: usize,
6}
7
8impl PdfReaderPosition {
9 #[must_use]
10 pub const fn new() -> Self {
11 Self { index: 0 }
12 }
13}
14
15pub struct PdfParser<'a> {
16 buffer: &'a [u8],
17}
18
19impl<'a> PdfParser<'a> {
20 #[must_use]
21 pub const fn new(buffer: &'a [u8]) -> PdfParser<'a> {
22 PdfParser { buffer }
23 }
24
25 pub fn read_line(&self, position: &mut PdfReaderPosition) -> &[u8] {
27 let start_position = position.index;
28
29 let last_byte = loop {
30 let current_byte = self.current_value(position);
31 if current_byte == b'\n' || current_byte == b'\r' {
32 break current_byte;
33 } else if !self.advance_position(position) {
34 break b'0';
35 };
36 };
38
39 self.advance_position(position);
40
41 let slice = &self.buffer[start_position..position.index];
42
43 if last_byte == b'\r' {
44 self.advance_position_if_next(b'\n', position)
45 }
46
47 slice
48 }
49
50 fn advance_position(&self, position: &mut PdfReaderPosition) -> bool {
51 if position.index < self.buffer.len() - 1 {
52 position.index += 1;
53 true
54 } else {
55 false
56 }
57 }
58
59 fn advance_position_if_next(&self, next: u8, position: &mut PdfReaderPosition) {
60 if position.index < self.buffer.len() - 1 && self.current_value(position) == next {
61 position.index += 1;
62 }
63 }
64
65 pub fn next_object(&self, position: &mut PdfReaderPosition) -> Option<PdfObject<'a>> {
66 let next_word = self.next_word(position)?;
67 if next_word == b"true" {
68 return Some(PdfObject::Boolean(true));
69 } else if next_word == b"false" {
70 return Some(PdfObject::Boolean(false));
71 } else if next_word == b"null" {
72 return Some(PdfObject::Null);
73 } else if next_word
74 .iter()
75 .enumerate()
76 .all(|(idx, b)| b.is_ascii_digit() || (idx == 0 && matches!(b, b'+' | b'-')))
77 {
78 let first_number = std::str::from_utf8(next_word).ok()?.parse::<i32>().ok()?;
81
82 let index_after_first = position.index;
84 if let Some(second_number) = self.parse_next::<u16>(position) {
85 let third_word = self.next_word(position);
86 if third_word == Some(b"R") {
87 return Some(PdfObject::Reference(PdfObjectIdentifier::new(
89 first_number as u16,
90 second_number,
91 )));
92 }
93 position.index = index_after_first;
95 } else {
96 position.index = index_after_first;
97 }
98
99 return Some(PdfObject::Integer(first_number));
100 } else if next_word == b"/" {
101 let current_byte = self.current_value(position);
102 let name_word = if Self::is_whitespace(current_byte) || Self::is_delimiter(current_byte)
103 {
104 return Some(PdfObject::Name(
105 &self.buffer[position.index..position.index],
106 ));
107 } else {
108 self.next_word(position)?
109 };
110 return if Self::is_delimiter(name_word[0]) {
111 None
113 } else {
114 Some(PdfObject::Name(name_word))
115 };
116 } else if next_word == b"[" {
117 let mut array = Vec::new();
118 while let Some(object) = self.next_object(position) {
120 array.push(object);
121 }
122 return Some(PdfObject::Array(array));
123 } else if next_word == b"<<" {
124 let mut map = HashMap::new();
125 while let Some(PdfObject::Name(key)) = self.next_object(position) {
128 let value = self.next_object(position)?;
129 map.insert(key, value);
130 }
131
132 let index_after_dict = position.index;
133 let next_word = self.next_word(position);
134 if next_word == Some(b"stream") {
135 if self.current_value(position) == b'\r' && self.buffer[position.index + 1] == b'\n'
136 {
137 position.index += 2;
138 } else if self.current_value(position) == b'\n' {
139 position.index += 1;
140 } else {
141 unimplemented!("'stream' not followed by \r\n or \n");
142 }
143
144 if let Some(PdfObject::Integer(length)) = map.get(&b"Length"[..]) {
145 let stream_length = *length as usize;
156 let bytes = &self.buffer[position.index..(position.index + stream_length)];
157 position.index += stream_length;
158 let stream_object = PdfStreamObject {
160 dictionary: PdfUntypedDictionary::new(map),
161 bytes,
162 };
163 let next_word = self.next_word(position)?;
164 if next_word == b"endstream" {
165 return Some(PdfObject::Stream(stream_object));
166 }
167 unimplemented!("Stream not ended with 'endstream'");
168 } else {
169 unimplemented!("stream without /Length in dictionary");
170 }
171 } else {
172 position.index = index_after_dict;
173 }
174
175 return Some(PdfObject::Dictionary(PdfUntypedDictionary::new(map)));
176 } else if next_word == b"(" {
177 let start_position = position.index;
179 let mut last_was_slash = false;
180 while (last_was_slash, self.current_value(position)) != (false, b')') {
181 last_was_slash = self.current_value(position) == b'\\';
182 if !self.advance_position(position) {
183 return None;
184 }
185 }
186 let slice = &self.buffer[start_position..position.index];
187 if !self.advance_position(position) {
188 return None;
189 }
190 return Some(PdfObject::String(PdfString::new_literal(slice)));
191 } else if next_word == b"<" {
192 let start_position = position.index;
195 while self.current_value(position) != b'>' {
196 if !self.advance_position(position) {
197 return None;
198 }
199 }
200 let slice = &self.buffer[start_position..position.index];
201 if !self.advance_position(position) {
202 return None;
203 }
204 return Some(PdfObject::String(PdfString::new_hexadecimal(slice)));
205 }
206
207 if let Ok(float_value) = std::str::from_utf8(next_word).ok()?.parse::<f32>() {
208 return Some(PdfObject::Real(float_value));
209 }
210
211 None
212 }
213
214 pub fn next_indirect_object(
215 &self,
216 position: &mut PdfReaderPosition,
217 ) -> Option<(PdfObjectIdentifier, PdfObject<'a>)> {
218 let object_identifier = self.parse_next(position)?;
219 let generation_number = self.parse_next(position)?;
220
221 if self.next_word(position) != Some(b"obj") {
222 return None;
223 }
224 let object = self.next_object(position)?;
225 let w = self.next_word(position);
226 if w != Some(b"endobj") {
227 return None;
228 }
229
230 let object_identifier = PdfObjectIdentifier::new(object_identifier, generation_number);
231 Some((object_identifier, object))
232 }
233
234 fn parse_next<T: core::str::FromStr>(&self, position: &mut PdfReaderPosition) -> Option<T> {
235 let next_word = self.next_word(position)?;
236 let next_str = std::str::from_utf8(next_word).ok()?;
238 next_str.parse::<T>().ok()
239 }
240
241 pub const fn is_whitespace(byte: u8) -> bool {
245 matches!(byte, 0 | 9 | 10 | 12 | 13 | 32)
246 }
247
248 const fn is_delimiter(byte: u8) -> bool {
260 matches!(
261 byte,
262 b'(' | b')' | b'<' | b'>' | b'[' | b']' | b'{' | b'}' | b'/' | b'%'
263 )
264 }
265
266 const fn at_eof(&self, position: &PdfReaderPosition) -> bool {
267 position.index >= self.buffer.len()
268 }
269
270 const fn current_value(&self, position: &PdfReaderPosition) -> u8 {
271 self.buffer[position.index]
272 }
273
274 pub fn next_word(&self, position: &mut PdfReaderPosition) -> Option<&'a [u8]> {
276 loop {
277 let current_char = self.current_value(position);
278 if Self::is_whitespace(current_char) {
279 if !self.advance_position(position) {
280 return None;
281 }
282 } else if current_char == b'%' {
283 self.read_line(position);
284 if self.at_eof(position) {
285 return None;
286 }
287 } else {
288 break;
289 }
290 }
291
292 let start_index = position.index;
293 let start_char = self.current_value(position);
294 let start_char_is_delimiter = Self::is_delimiter(start_char);
295 if start_char_is_delimiter {
296 if !self.advance_position(position) {
297 return None;
299 }
300 let next_char = self.current_value(position);
301 if (start_char == b'<' && next_char == b'<')
302 || (start_char == b'>' && next_char == b'>')
303 {
304 if !self.advance_position(position) {
307 return None;
309 }
310 }
311 return Some(&self.buffer[start_index..position.index]);
312 }
313
314 loop {
315 let current_char = self.current_value(position);
316 if Self::is_whitespace(current_char) || Self::is_delimiter(current_char) {
317 break;
318 } else if !self.advance_position(position) {
319 return None;
320 }
321 }
322
323 Some(&self.buffer[start_index..position.index])
324 }
325}
326
327#[cfg(test)]
328mod tests {
329 use crate::parse::{PdfParser, PdfReaderPosition};
330 use crate::{
331 PdfDocument, PdfDocumentData, PdfFormField, PdfObject, PdfObjectIdentifier, PdfString,
332 PdfVersion,
333 };
334
335 #[test]
336 fn parse_junk() {
337 let pdf_bytes = b"hello world";
338 let parse_result = PdfDocumentData::parse(pdf_bytes);
339 assert_eq!(
340 parse_result.err().unwrap(),
341 "File not starting with '%PDF-'".to_string()
342 );
343 }
344
345 #[test]
346 fn parse_minimal() {
347 fn assert_minimal_first_object(pdf: &PdfDocumentData) {
348 if let Some(PdfObject::Dictionary(dict)) =
349 pdf.objects.get(&PdfObjectIdentifier::new(1, 0))
350 {
351 assert_eq!(dict.map.len(), 2);
352 assert_eq!(
353 dict.map.get(&b"Type"[..]),
354 Some(&PdfObject::Name(b"Catalog"))
355 );
356 assert_eq!(
357 dict.map.get(&b"Pages"[..]),
358 Some(&PdfObject::Reference(PdfObjectIdentifier::new(2, 0)))
359 );
360 } else {
361 panic!("Object 1 0 was not parsed into a dictionary");
362 }
363 }
364
365 fn assert_minimal_second_object(pdf: &PdfDocumentData) {
366 if let Some(PdfObject::Dictionary(dict)) =
367 pdf.objects.get(&PdfObjectIdentifier::new(2, 0))
368 {
369 assert_eq!(dict.map.len(), 4);
370 assert_eq!(dict.map.get(&b"Type"[..]), Some(&PdfObject::Name(b"Pages")));
371 assert_eq!(
372 dict.map.get(&b"Kids"[..]),
373 Some(&PdfObject::Array(vec![PdfObject::Reference(
374 PdfObjectIdentifier::new(3, 0)
375 )]))
376 );
377 assert_eq!(dict.map.get(&b"Count"[..]), Some(&PdfObject::Integer(1)));
378 assert_eq!(
379 dict.map.get(&b"MediaBox"[..]),
380 Some(&PdfObject::Array(vec![
381 PdfObject::Integer(0),
382 PdfObject::Integer(0),
383 PdfObject::Integer(300),
384 PdfObject::Integer(144)
385 ]))
386 );
387 } else {
388 panic!("Object 2 0 was not parsed into a dictionary");
389 }
390 }
391
392 fn assert_minimal_third_object(pdf: &PdfDocumentData) {
393 if let Some(PdfObject::Dictionary(dict)) =
394 pdf.objects.get(&PdfObjectIdentifier::new(3, 0))
395 {
396 assert_eq!(dict.map.len(), 4);
397 assert_eq!(dict.map.get(&b"Type"[..]), Some(&PdfObject::Name(b"Page")));
398 assert_eq!(
399 dict.map.get(&b"Parent"[..]),
400 Some(&PdfObject::Reference(PdfObjectIdentifier::new(2, 0)))
401 );
402 assert_eq!(
403 dict.map.get(&b"Contents"[..]),
404 Some(&PdfObject::Reference(PdfObjectIdentifier::new(4, 0)))
405 );
406 if let Some(PdfObject::Dictionary(resources_dict)) = dict.map.get(&b"Resources"[..])
407 {
408 assert_eq!(resources_dict.map.len(), 1);
409 if let Some(PdfObject::Dictionary(font_dict)) =
410 resources_dict.map.get(&b"Font"[..])
411 {
412 assert_eq!(font_dict.map.len(), 1);
413 if let Some(PdfObject::Dictionary(f1_dict)) = font_dict.map.get(&b"F1"[..])
414 {
415 assert_eq!(f1_dict.map.len(), 3);
416 assert_eq!(
417 f1_dict.map.get(&b"Type"[..]),
418 Some(&PdfObject::Name(b"Font"))
419 );
420 assert_eq!(
421 f1_dict.map.get(&b"Subtype"[..]),
422 Some(&PdfObject::Name(b"Type1"))
423 );
424 assert_eq!(
425 f1_dict.map.get(&b"BaseFont"[..]),
426 Some(&PdfObject::Name(b"Times-Roman"))
427 );
428 } else {
429 panic!("Failed to parse /Resources->/Font->/F1 dictionary");
430 }
431 } else {
432 panic!("Failed to parse /Resources->/Font dictionary");
433 }
434 } else {
435 panic!("Failed to parse /Resources dictionary");
436 }
437 } else {
438 panic!("Object 3 0 was not parsed into a dictionary");
439 }
440 }
441
442 fn assert_minimal_fourth_object(pdf: &PdfDocumentData, unix_line_endings: bool) {
443 if let Some(PdfObject::Stream(stream)) =
444 pdf.objects.get(&PdfObjectIdentifier::new(4, 0))
445 {
446 assert_eq!(stream.dictionary.map.len(), 1);
447 if unix_line_endings {
448 assert_eq!(stream.bytes.len(), 55);
449 assert_eq!(
450 stream.bytes,
451 b" BT
452 /F1 18 Tf
453 0 0 Td
454 (Hello World) Tj
455 ET"
456 );
457 } else {
458 assert_eq!(stream.bytes.len(), 59);
459 assert_eq!(
460 stream.bytes,
461 b" BT\r
462 /F1 18 Tf\r
463 0 0 Td\r
464 (Hello World) Tj\r
465 ET"
466 );
467 }
468 } else {
469 panic!("Object 4 0 was not parsed into a stream");
470 }
471 }
472
473 fn assert_minimal_file(pdf_bytes: &[u8], unix_line_endings: bool) {
474 assert!(pdf_bytes.starts_with(b"%PDF-1.1"));
476
477 let pdf = PdfDocumentData::parse(pdf_bytes).unwrap();
478 assert_eq!(pdf.version, PdfVersion::Version11);
479 assert_eq!(pdf.objects.len(), 4);
480
481 assert_minimal_first_object(&pdf);
482 assert_minimal_second_object(&pdf);
483 assert_minimal_third_object(&pdf);
484 assert_minimal_fourth_object(&pdf, unix_line_endings);
485
486 assert_eq!(
487 pdf.trailer.map.get(&b"Root"[..]),
488 Some(&PdfObject::Reference(PdfObjectIdentifier::new(1, 0)))
489 );
490 assert_eq!(
491 pdf.trailer.map.get(&b"Size"[..]),
492 Some(&PdfObject::Integer(5))
493 );
494 }
495
496 assert_minimal_file(include_bytes!("tests/assets/minimal.pdf"), true);
497 assert_minimal_file(include_bytes!("tests/assets/minimal_crlf_l.pdf"), false);
498 }
499
500 #[test]
501 fn parse_signed_signicat() {
502 let pdf_bytes = include_bytes!("tests/assets/signed-by-signicat-example.pdf");
503 let pdf = PdfDocumentData::parse(pdf_bytes).unwrap();
504 assert_eq!(pdf.version, PdfVersion::Version17);
505 assert_eq!(pdf.objects.len(), 110);
506
507 let pdf = PdfDocument::parse(pdf_bytes).unwrap();
508 let pdf_fields = pdf.catalog.interactive_form.unwrap().fields;
509 assert_eq!(pdf_fields.len(), 1);
510 if let PdfFormField::Signature(signature_field) = &pdf_fields[0] {
511 if let Some(_signature) = &signature_field.signature {
512 return;
513 }
514 }
515 panic!("Failed parsing signature");
516 }
517
518 #[test]
519 fn test_whitespace() {
520 assert!(PdfParser::is_whitespace(b' '));
521 assert!(PdfParser::is_whitespace(b'\n'));
522 assert!(PdfParser::is_whitespace(b'\r'));
523 assert!(!PdfParser::is_whitespace(b'a'));
524 assert!(!PdfParser::is_whitespace(b'.'));
525 assert!(!PdfParser::is_whitespace(b'!'));
526 }
527
528 #[test]
529 fn test_read_line() {
530 let bytes = b"123\n456";
531 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
532 assert_eq!(position.index, 0);
533 parser.read_line(&mut position);
534 assert_eq!(position.index, 4);
535 assert_eq!(parser.current_value(&position), b'4');
536
537 let bytes = b"123\r456";
538 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
539 parser.read_line(&mut position);
540 assert_eq!(position.index, 4);
541 assert_eq!(parser.current_value(&position), b'4');
542
543 let bytes = b"123\r\n456";
544 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
545 parser.read_line(&mut position);
546 assert_eq!(position.index, 5);
547 assert_eq!(parser.current_value(&position), b'4');
548 }
549
550 #[test]
551 fn test_next_word() {
552 let bytes = b"obj ";
553 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
554 assert_eq!(parser.next_word(&mut position), Some(&b"obj"[..]));
555
556 let bytes = b" obj<";
557 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
558 assert_eq!(parser.next_word(&mut position), Some(&b"obj"[..]));
559
560 let bytes = b" obj ";
561 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
562 assert_eq!(parser.next_word(&mut position), Some(&b"obj"[..]));
563
564 let bytes = b" obj<<";
565 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
566 assert_eq!(parser.next_word(&mut position), Some(&b"obj"[..]));
567 assert_eq!(parser.next_word(&mut position), None);
568
569 let bytes = b" obj endobj ";
570 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
571 assert_eq!(parser.next_word(&mut position), Some(&b"obj"[..]));
572 assert_eq!(parser.next_word(&mut position), Some(&b"endobj"[..]));
573 assert_eq!(parser.next_word(&mut position), None);
574
575 let bytes = b" % a comment\n obj endobj ";
576 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
577 assert_eq!(parser.next_word(&mut position), Some(&b"obj"[..]));
578 assert_eq!(parser.next_word(&mut position), Some(&b"endobj"[..]));
579 assert_eq!(parser.next_word(&mut position), None);
580
581 let bytes = b" obj<< ";
582 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
583 assert_eq!(parser.next_word(&mut position), Some(&b"obj"[..]));
584 assert_eq!(parser.next_word(&mut position), Some(&b"<<"[..]));
585 assert_eq!(parser.next_word(&mut position), None);
586
587 let bytes = b" obj \n % a comment\n<< ";
588 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
589 assert_eq!(parser.next_word(&mut position), Some(&b"obj"[..]));
590 assert_eq!(parser.next_word(&mut position), Some(&b"<<"[..]));
591 assert_eq!(parser.next_word(&mut position), None);
592 }
593
594 #[test]
595 fn test_next_object() {
596 let bytes = b"true ";
597 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
598 assert_eq!(
599 parser.next_object(&mut position),
600 Some(PdfObject::Boolean(true))
601 );
602
603 let bytes = b"false ";
604 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
605 assert_eq!(
606 parser.next_object(&mut position),
607 Some(PdfObject::Boolean(false))
608 );
609
610 let bytes = b"6.14 ";
611 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
612 assert_eq!(
613 parser.next_object(&mut position),
614 Some(PdfObject::Real(6.14))
615 );
616
617 let bytes = b"false false ";
618 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
619 assert_eq!(
620 parser.next_object(&mut position),
621 Some(PdfObject::Boolean(false))
622 );
623 assert_eq!(
624 parser.next_object(&mut position),
625 Some(PdfObject::Boolean(false))
626 );
627
628 let bytes = b" 0 ";
629 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
630 assert_eq!(
631 parser.next_object(&mut position),
632 Some(PdfObject::Integer(0))
633 );
634
635 let bytes = b" -1 ";
636 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
637 assert_eq!(
638 parser.next_object(&mut position),
639 Some(PdfObject::Integer(-1))
640 );
641
642 let bytes = b" +1 ";
643 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
644 assert_eq!(
645 parser.next_object(&mut position),
646 Some(PdfObject::Integer(1))
647 );
648
649 let bytes = b" 2147483647 ";
650 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
651 assert_eq!(
652 parser.next_object(&mut position),
653 Some(PdfObject::Integer(2_147_483_647))
654 );
655
656 let bytes = b" -2147483648 ";
657 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
658 assert_eq!(
659 parser.next_object(&mut position),
660 Some(PdfObject::Integer(-2_147_483_648))
661 );
662
663 let bytes = b"/Type ";
664 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
665 assert_eq!(
666 parser.next_object(&mut position),
667 Some(PdfObject::Name(b"Type"))
668 );
669
670 let bytes = b" << /Type /Catalog /Value 1 >> << /Name /Value >> % A comment\n999 << /OtherName /OtherValue >> ";
671 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
672 if let Some(PdfObject::Dictionary(dictionary)) = parser.next_object(&mut position) {
673 assert_eq!(dictionary.map.len(), 2);
674 let entry = dictionary.map.get(&b"Type"[..]);
675 assert_eq!(entry, Some(&PdfObject::Name(b"Catalog")));
676 let entry = dictionary.map.get(&b"Value"[..]);
677 assert_eq!(entry, Some(&PdfObject::Integer(1)));
678 } else {
679 panic!("Failed to parse dict");
680 }
681 if let Some(PdfObject::Dictionary(dictionary)) = parser.next_object(&mut position) {
682 assert_eq!(dictionary.map.len(), 1);
683 let entry = dictionary.map.get(&b"Name"[..]);
684 assert_eq!(entry, Some(&PdfObject::Name(b"Value")));
685 } else {
686 panic!("Failed to parse dict");
687 }
688 assert_eq!(
689 parser.next_object(&mut position),
690 Some(PdfObject::Integer(999))
691 );
692 if let Some(PdfObject::Dictionary(dictionary)) = parser.next_object(&mut position) {
693 assert_eq!(dictionary.map.len(), 1);
694 let entry = dictionary.map.get(&b"OtherName"[..]);
695 assert_eq!(entry, Some(&PdfObject::Name(b"OtherValue")));
696 } else {
697 panic!("Failed to parse dict");
698 }
699 }
700
701 #[test]
702 fn test_parse_dictionaries() {
703 let bytes = b"<</FT/Sig/T(Signature1)/V 1 0 R/F 132/Type/Annot/Subtype/Widget/Rect[0 0 0 0]/AP<</N 2 0 R>>/P 4 0 R/DR<<>>>> ";
704 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
705 if let Some(PdfObject::Dictionary(dictionary)) = parser.next_object(&mut position) {
706 assert_eq!(dictionary.map.len(), 10);
707 assert_eq!(
708 dictionary.map.get(&b"FT"[..]),
709 Some(&PdfObject::Name(b"Sig"))
710 );
711 assert_eq!(
712 dictionary.map.get(&b"T"[..]),
713 Some(&PdfObject::String(PdfString::new_literal(b"Signature1")))
714 );
715 assert_eq!(
716 dictionary.map.get(&b"V"[..]),
717 Some(&PdfObject::Reference(PdfObjectIdentifier::new(1, 0)))
718 );
719 assert_eq!(
720 dictionary.map.get(&b"F"[..]),
721 Some(&PdfObject::Integer(132))
722 );
723 assert_eq!(
724 dictionary.map.get(&b"Type"[..]),
725 Some(&PdfObject::Name(b"Annot"))
726 );
727 } else {
728 panic!("Failed to parse dict");
729 }
730
731 let bytes = b"<</Contents <ff>/Reference 1>> ";
732 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
733 if let Some(PdfObject::Dictionary(dictionary)) = parser.next_object(&mut position) {
734 assert_eq!(dictionary.map.len(), 2);
735 assert_eq!(
736 dictionary.map.get(&b"Contents"[..]),
737 Some(&PdfObject::String(PdfString::new_hexadecimal(b"ff")))
739 );
740 assert_eq!(
741 dictionary.map.get(&b"Reference"[..]),
742 Some(&PdfObject::Integer(1))
743 );
744 } else {
745 panic!("Failed to parse dict");
746 }
747 }
748
749 #[test]
750 fn test_parse_dictionary_mapping_to_empty_name() {
751 for bytes in [
752 &b"<< /App << /Name / >> >> "[..],
753 &b"<</App<</Name/>>>> "[..],
754 ] {
755 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
756 if let Some(PdfObject::Dictionary(dictionary)) = parser.next_object(&mut position) {
757 assert_eq!(dictionary.map.len(), 1);
758 if let Some(PdfObject::Dictionary(dictionary)) = dictionary.map.get(&b"App"[..]) {
759 assert_eq!(dictionary.map.len(), 1);
760 assert_eq!(
761 dictionary.map.get(&b"Name"[..]),
762 Some(&PdfObject::Name(b""))
763 );
764 return;
765 }
766 }
767 panic!("Failed to parse dict");
768 }
769 }
770
771 #[test]
780 #[ignore]
781 fn test_parse_dictionary_with_null_value() {
782 let bytes = b"<< /FirstKey 1 /SecondKey null /ThirdKey 3 >> ";
783 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
784 if let Some(PdfObject::Dictionary(dictionary)) = parser.next_object(&mut position) {
785 assert_eq!(dictionary.map.len(), 2);
786 assert_eq!(
787 dictionary.map.get(&b"FirstKey"[..]),
788 Some(&PdfObject::Integer(1))
789 );
790 assert_eq!(
791 dictionary.map.get(&b"SecondKey"[..]),
792 Some(&PdfObject::Integer(3))
793 );
794 return;
795 }
796 panic!("Failed to parse dict");
797 }
798
799 #[test]
800 fn test_parse_null() {
801 let bytes = b"null ";
802 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
803 assert_eq!(parser.next_object(&mut position), Some(PdfObject::Null));
804 }
805
806 #[test]
807 fn test_parse_names() {
808 fn assert_parsing(from: &[u8], expected_name: &[u8]) {
809 let (parser, mut position) = (PdfParser::new(from), PdfReaderPosition::new());
810 assert_eq!(
811 parser.next_object(&mut position),
812 Some(PdfObject::Name(expected_name))
813 );
814 }
815
816 assert_parsing(b"/Name1 ", b"Name1");
817 assert_parsing(b"/ASomewhatLongerName ", b"ASomewhatLongerName");
818 assert_parsing(
819 b"/A;Name_With-Various***Characters? ",
820 b"A;Name_With-Various***Characters?",
821 );
822 assert_parsing(b"/1.2 ", b"1.2");
823 assert_parsing(b"/$$ ", b"$$");
824 assert_parsing(b"/@pattern ", b"@pattern");
825 assert_parsing(b"/.notdef ", b".notdef");
826 }
833
834 #[test]
835 fn test_parse_strings() {
836 let bytes = b"() ";
837 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
838 assert_eq!(
839 parser.next_object(&mut position),
840 Some(PdfObject::String(PdfString::new_literal(b"")))
841 );
842
843 let bytes = b"(hello, world) (second string)(third) ";
844 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
845 assert_eq!(
846 parser.next_object(&mut position),
847 Some(PdfObject::String(PdfString::new_literal(b"hello, world")))
848 );
849 assert_eq!(
850 parser.next_object(&mut position),
851 Some(PdfObject::String(PdfString::new_literal(b"second string")))
852 );
853 assert_eq!(
854 parser.next_object(&mut position),
855 Some(PdfObject::String(PdfString::new_literal(b"third")))
856 );
857
858 let bytes = b"(hello \\(world\\) bye) ";
859 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
860 assert_eq!(
861 parser.next_object(&mut position),
862 Some(PdfObject::String(PdfString::new_literal(
864 b"hello \\(world\\) bye"
865 )))
866 );
867 }
868
869 #[test]
870 fn test_parse_arrays() {
871 let bytes = b"[549 6.14 false (Ralph) /SomeName ]";
872 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
873 if let Some(PdfObject::Array(array)) = parser.next_object(&mut position) {
874 assert_eq!(array.len(), 5);
875 assert_eq!(array[0], PdfObject::Integer(549));
876 assert_eq!(array[1], PdfObject::Real(6.14));
877 assert_eq!(array[2], PdfObject::Boolean(false));
878 assert_eq!(
879 array[3],
880 PdfObject::String(PdfString::new_literal(b"Ralph"))
881 );
882 assert_eq!(array[4], PdfObject::Name(b"SomeName"));
883 } else {
884 panic!("Failed to parse array");
885 }
886
887 let bytes = b"[] ";
888 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
889 if let Some(PdfObject::Array(array)) = parser.next_object(&mut position) {
890 assert_eq!(array.len(), 0);
891 } else {
892 panic!("Failed to parse empty array");
893 }
894 }
895
896 #[test]
897 fn test_parse_streams() {
898 let bytes = b"4 1 obj
899 << /Length 55 >>
900stream
901 BT
902 /F1 18 Tf
903 0 0 Td
904 (Hello World) Tj
905 ET
906endstream
907endobj ";
908 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
909 let (parsed_id, object) = parser.next_indirect_object(&mut position).unwrap();
910 assert_eq!(parsed_id, PdfObjectIdentifier::new(4, 1));
911 if let PdfObject::Stream(stream) = object {
912 assert_eq!(stream.dictionary.map.len(), 1);
913 assert_eq!(stream.bytes.len(), 55);
914 assert_eq!(
915 stream.bytes,
916 b" BT
917 /F1 18 Tf
918 0 0 Td
919 (Hello World) Tj
920 ET"
921 );
922 } else {
923 panic!("Failed parsing stream");
924 }
925 }
926
927 #[test]
928 fn test_parse_next() {
929 let bytes = b"32 ";
930 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
931 assert_eq!(parser.parse_next(&mut position), Some(32));
932
933 let bytes = b" 32 ";
934 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
935 assert_eq!(parser.parse_next(&mut position), Some(32));
936
937 let bytes = b" 32 % a comment\n 33 34 ";
938 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
939 assert_eq!(parser.parse_next(&mut position), Some(32));
940 assert_eq!(parser.parse_next(&mut position), Some(33));
941 assert_eq!(parser.parse_next(&mut position), Some(34));
942 }
943
944 #[test]
945 fn test_next_indirect_object() {
946 let bytes = b"8 0 obj\n 77\nendobj ";
947 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
948 let id = PdfObjectIdentifier::new(8, 0);
949 let object = PdfObject::Integer(77);
950 assert_eq!(
951 parser.next_indirect_object(&mut position),
952 Some((id, object))
953 );
954
955 let bytes = b"7 2 obj 62 endobj ";
956 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
957 let parsed = parser.next_indirect_object(&mut position);
958 let id = PdfObjectIdentifier::new(7, 2);
959 let object = PdfObject::Integer(62);
960 assert_eq!(parsed, Some((id, object)));
961
962 let bytes = b"7 0 obj 62 endobj\n\n8 0 obj 63 endobj ";
963 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
964 let id = PdfObjectIdentifier::new(7, 0);
965 let object = PdfObject::Integer(62);
966 assert_eq!(
967 parser.next_indirect_object(&mut position),
968 Some((id, object))
969 );
970 let id = PdfObjectIdentifier::new(8, 0);
971 let object = PdfObject::Integer(63);
972 assert_eq!(
973 parser.next_indirect_object(&mut position),
974 Some((id, object))
975 );
976
977 let bytes = b"3 0 obj
978 << /Type /Page
979 /Parent 2 0 R
980 /Resources
981 << /Font
982 << /F1
983 << /Type /Font
984 /Subtype /Type1
985 /BaseFont /Times-Roman
986 >>
987 >>
988 >>
989 /Contents 4 1 R
990 >>
991endobj\n\n4 1 obj\n765\nendobj ";
992 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
993 let expected_id = PdfObjectIdentifier::new(3, 0);
994 let (parsed_id, object) = parser.next_indirect_object(&mut position).unwrap();
995 assert_eq!(parsed_id, expected_id);
996 if let PdfObject::Dictionary(dict) = object {
997 assert_eq!(dict.map.len(), 4);
998 assert_eq!(dict.map.get(&b"Type"[..]), Some(&PdfObject::Name(b"Page")));
999 assert_eq!(
1000 dict.map.get(&b"Parent"[..]),
1001 Some(&PdfObject::Reference(PdfObjectIdentifier::new(2, 0)))
1002 );
1003 assert_eq!(
1004 dict.map.get(&b"Contents"[..]),
1005 Some(&PdfObject::Reference(PdfObjectIdentifier::new(4, 1)))
1006 );
1007 if let Some(PdfObject::Dictionary(resources_dict)) = dict.map.get(&b"Resources"[..]) {
1008 assert_eq!(resources_dict.map.len(), 1);
1009 if let Some(PdfObject::Dictionary(font_dict)) = resources_dict.map.get(&b"Font"[..])
1010 {
1011 assert_eq!(font_dict.map.len(), 1);
1012 if let Some(PdfObject::Dictionary(f1_dict)) = font_dict.map.get(&b"F1"[..]) {
1013 assert_eq!(f1_dict.map.len(), 3);
1014 assert_eq!(
1015 f1_dict.map.get(&b"Type"[..]),
1016 Some(&PdfObject::Name(b"Font"))
1017 );
1018 assert_eq!(
1019 f1_dict.map.get(&b"Subtype"[..]),
1020 Some(&PdfObject::Name(b"Type1"))
1021 );
1022 assert_eq!(
1023 f1_dict.map.get(&b"BaseFont"[..]),
1024 Some(&PdfObject::Name(b"Times-Roman"))
1025 );
1026 } else {
1027 panic!("Failed to parse /Resources->/Font->/F1 dictionary");
1028 }
1029 } else {
1030 panic!("Failed to parse /Resources->/Font dictionary");
1031 }
1032 } else {
1033 panic!("Failed to parse /Resources dictionary");
1034 }
1035 } else {
1036 panic!("Failed parsing dictionary");
1037 }
1038 let expected_id = PdfObjectIdentifier::new(4, 1);
1039 let (parsed_id, object) = parser.next_indirect_object(&mut position).unwrap();
1040 assert_eq!(parsed_id, expected_id);
1041 assert_eq!(object, PdfObject::Integer(765));
1042 }
1043
1044 #[test]
1045 fn test_parsing_indirect_object() {
1046 let bytes = b"2 0 obj
1047 << /Type /Pages
1048 /Kids [3 1 R]
1049 /Count 1
1050 /MediaBox [1 2 300 144]
1051 >>
1052endobj ";
1053 let (parser, mut position) = (PdfParser::new(bytes), PdfReaderPosition::new());
1054 let (parsed_id, object) = parser.next_indirect_object(&mut position).unwrap();
1055 assert_eq!(parsed_id, PdfObjectIdentifier::new(2, 0));
1056 if let PdfObject::Dictionary(dict) = object {
1057 assert_eq!(dict.map.len(), 4);
1058 assert_eq!(dict.map.get(&b"Type"[..]), Some(&PdfObject::Name(b"Pages")));
1059 assert_eq!(
1060 dict.map.get(&b"Kids"[..]),
1061 Some(&PdfObject::Array(vec![PdfObject::Reference(
1062 PdfObjectIdentifier::new(3, 1)
1063 )]))
1064 );
1065 assert_eq!(dict.map.get(&b"Count"[..]), Some(&PdfObject::Integer(1)));
1066 assert_eq!(
1067 dict.map.get(&b"MediaBox"[..]),
1068 Some(&PdfObject::Array(vec![
1069 PdfObject::Integer(1),
1070 PdfObject::Integer(2),
1071 PdfObject::Integer(300),
1072 PdfObject::Integer(144)
1073 ]))
1074 );
1075 } else {
1076 panic!("Failed to parse dict");
1077 }
1078 }
1079}