1use rpdfium_core::error::{ParseError, PdfError};
12use rpdfium_core::fx_system::MAX_OBJECT_NUMBER;
13
14use crate::object::ObjectId;
15use crate::tokenizer::is_whitespace;
16
17#[derive(Debug, Clone, PartialEq, Eq)]
19pub enum XrefEntryType {
20 InUse { offset: u64 },
22 InStream { stream_id: ObjectId, index: u32 },
24 Free,
26}
27
28#[derive(Debug, Clone)]
30pub struct XrefEntry {
31 pub id: ObjectId,
32 pub entry_type: XrefEntryType,
33}
34
35#[derive(Debug, Clone)]
37pub struct XrefSection {
38 pub entries: Vec<XrefEntry>,
39}
40
41#[derive(Debug, Clone)]
43pub struct XrefTable {
44 pub sections: Vec<XrefSection>,
45 pub start_offset: u64,
48}
49
50impl XrefTable {
51 pub fn new() -> Self {
52 Self {
53 sections: Vec::new(),
54 start_offset: 0,
55 }
56 }
57
58 pub fn push(&mut self, section: XrefSection) {
60 self.sections.push(section);
61 }
62}
63
64impl Default for XrefTable {
65 fn default() -> Self {
66 Self::new()
67 }
68}
69
70pub fn parse_xref_table(source: &[u8], offset: u64) -> Result<(XrefSection, u64), PdfError> {
79 let mut pos = offset as usize;
80
81 while pos < source.len() && is_whitespace(source[pos]) {
83 pos += 1;
84 }
85
86 if pos + 4 > source.len() || &source[pos..pos + 4] != b"xref" {
88 return Err(PdfError::InvalidXref);
89 }
90 pos += 4;
91
92 skip_eol(source, &mut pos);
94
95 let mut entries = Vec::new();
96
97 loop {
99 while pos < source.len() && is_whitespace(source[pos]) {
101 pos += 1;
102 }
103
104 if pos >= source.len() {
105 break;
106 }
107
108 if pos + 7 <= source.len() && &source[pos..pos + 7] == b"trailer" {
110 break;
111 }
112
113 let (start_id, count) = parse_subsection_header(source, &mut pos)?;
115
116 if start_id.saturating_add(count) > MAX_OBJECT_NUMBER as u64 {
118 return Err(PdfError::InvalidXref);
119 }
120
121 for i in 0..count {
123 skip_eol_minimal(source, &mut pos);
124
125 let entry_bytes = if pos + 20 <= source.len() {
126 &source[pos..pos + 20]
127 } else {
128 return Err(PdfError::Parse(ParseError::InvalidXrefEntry {
129 offset: pos as u64,
130 }));
131 };
132
133 let entry = parse_xref_entry(entry_bytes, start_id + i, pos as u64)?;
134 pos += 20;
135
136 entries.push(entry);
137 }
138 }
139
140 Ok((XrefSection { entries }, pos as u64))
141}
142
143fn parse_subsection_header(source: &[u8], pos: &mut usize) -> Result<(u64, u64), PdfError> {
145 let start_id = read_u64(source, pos)?;
147
148 while *pos < source.len() && (source[*pos] == b' ' || source[*pos] == b'\t') {
150 *pos += 1;
151 }
152
153 let count = read_u64(source, pos)?;
155
156 skip_eol(source, pos);
158
159 Ok((start_id, count))
160}
161
162fn read_u64(source: &[u8], pos: &mut usize) -> Result<u64, PdfError> {
164 let start = *pos;
165
166 while *pos < source.len() && source[*pos] >= b'0' && source[*pos] <= b'9' {
167 *pos += 1;
168 }
169
170 if *pos == start {
171 return Err(PdfError::InvalidXref);
172 }
173
174 let s = std::str::from_utf8(&source[start..*pos]).map_err(|_| PdfError::InvalidXref)?;
175 s.parse::<u64>().map_err(|_| PdfError::InvalidXref)
176}
177
178fn parse_xref_entry(entry: &[u8], object_number: u64, offset: u64) -> Result<XrefEntry, PdfError> {
183 if entry.len() < 18 {
185 return Err(PdfError::Parse(ParseError::InvalidXrefEntry { offset }));
186 }
187
188 let offset_str = std::str::from_utf8(&entry[0..10])
190 .map_err(|_| PdfError::Parse(ParseError::InvalidXrefEntry { offset }))?;
191 let entry_offset: u64 = offset_str
192 .trim()
193 .parse()
194 .map_err(|_| PdfError::Parse(ParseError::InvalidXrefEntry { offset }))?;
195
196 let gen_str = std::str::from_utf8(&entry[11..16])
198 .map_err(|_| PdfError::Parse(ParseError::InvalidXrefEntry { offset }))?;
199 let generation: u16 = gen_str
200 .trim()
201 .parse()
202 .map_err(|_| PdfError::Parse(ParseError::InvalidXrefEntry { offset }))?;
203
204 let type_marker = entry[17];
206
207 let id = ObjectId::new(object_number as u32, generation);
208
209 let entry_type = match type_marker {
210 b'n' => XrefEntryType::InUse {
211 offset: entry_offset,
212 },
213 b'f' => XrefEntryType::Free,
214 _ => {
215 return Err(PdfError::Parse(ParseError::InvalidXrefEntry { offset }));
216 }
217 };
218
219 Ok(XrefEntry { id, entry_type })
220}
221
222fn skip_eol(source: &[u8], pos: &mut usize) {
224 while *pos < source.len() && is_whitespace(source[*pos]) {
225 *pos += 1;
226 }
227}
228
229fn skip_eol_minimal(source: &[u8], pos: &mut usize) {
231 while *pos < source.len() && (source[*pos] == b'\r' || source[*pos] == b'\n') {
233 *pos += 1;
234 }
235}
236
237pub fn rebuild_xref(source: &[u8]) -> Result<(XrefTable, crate::trailer::TrailerInfo), PdfError> {
248 let mut entries = Vec::new();
249 let mut pos = 0;
250
251 while pos < source.len() {
253 if !source[pos].is_ascii_digit() {
255 pos += 1;
256 continue;
257 }
258
259 let start = pos;
261 if let Some((number, generation, obj_keyword_end)) = try_parse_obj_marker(source, pos) {
262 if number <= MAX_OBJECT_NUMBER {
263 let valid_boundary = start == 0 || is_whitespace(source[start - 1]);
265 if valid_boundary {
266 entries.push(XrefEntry {
267 id: ObjectId::new(number, generation),
268 entry_type: XrefEntryType::InUse {
269 offset: start as u64,
270 },
271 });
272 }
273 }
274 pos = obj_keyword_end;
276 } else {
277 pos += 1;
278 }
279
280 if entries.len() > MAX_OBJECT_NUMBER as usize {
282 break;
283 }
284 }
285
286 if entries.is_empty() {
287 return Err(PdfError::InvalidXref);
288 }
289
290 let section = XrefSection { entries };
291
292 let trailer = rebuild_trailer_info(source, §ion)?;
294
295 let mut table = XrefTable::new();
296 table.push(section);
297
298 Ok((table, trailer))
299}
300
301fn try_parse_obj_marker(source: &[u8], pos: usize) -> Option<(u32, u16, usize)> {
306 let mut p = pos;
307
308 let num_start = p;
310 while p < source.len() && source[p].is_ascii_digit() {
311 p += 1;
312 }
313 if p == num_start || p >= source.len() {
314 return None;
315 }
316 let num_str = std::str::from_utf8(&source[num_start..p]).ok()?;
317 let number: u32 = num_str.parse().ok()?;
318
319 if p >= source.len() || source[p] != b' ' {
321 return None;
322 }
323 p += 1;
324
325 let gen_start = p;
327 while p < source.len() && source[p].is_ascii_digit() {
328 p += 1;
329 }
330 if p == gen_start || p >= source.len() {
331 return None;
332 }
333 let gen_str = std::str::from_utf8(&source[gen_start..p]).ok()?;
334 let generation: u16 = gen_str.parse().ok()?;
335
336 if p >= source.len() || source[p] != b' ' {
338 return None;
339 }
340 p += 1;
341
342 if p + 3 > source.len() || &source[p..p + 3] != b"obj" {
344 return None;
345 }
346 p += 3;
347
348 if p < source.len() && source[p].is_ascii_alphabetic() {
350 return None;
351 }
352
353 Some((number, generation, p))
354}
355
356fn rebuild_trailer_info(
359 source: &[u8],
360 section: &XrefSection,
361) -> Result<crate::trailer::TrailerInfo, PdfError> {
362 if let Some(info) = try_find_trailer_dict(source) {
364 return Ok(info);
365 }
366
367 for entry in §ion.entries {
369 if let XrefEntryType::InUse { offset } = &entry.entry_type {
370 let off = *offset as usize;
371 let search_end = (off + 512).min(source.len());
373 let window = &source[off..search_end];
374 if contains_catalog_marker(window) {
375 let size = section
376 .entries
377 .iter()
378 .map(|e| e.id.number)
379 .max()
380 .unwrap_or(0)
381 + 1;
382 return Ok(crate::trailer::TrailerInfo {
383 root: entry.id,
384 info: None,
385 encrypt: None,
386 id: None,
387 size,
388 prev: None,
389 });
390 }
391 }
392 }
393
394 Err(PdfError::InvalidTrailer)
395}
396
397fn try_find_trailer_dict(source: &[u8]) -> Option<crate::trailer::TrailerInfo> {
400 let marker = b"trailer";
401 let start = source.len().saturating_sub(4096);
403 for i in (start..source.len().saturating_sub(marker.len())).rev() {
404 if &source[i..i + marker.len()] == marker {
405 let mut pos = i + marker.len();
407 while pos < source.len() && is_whitespace(source[pos]) {
408 pos += 1;
409 }
410 if let Ok(crate::object::Object::Dictionary(dict)) = crate::object_parser::parse_object(
411 source,
412 pos as u64,
413 rpdfium_core::ParsingMode::Lenient,
414 ) {
415 return extract_trailer_from_dict(&dict);
416 }
417 }
418 }
419 None
420}
421
422fn extract_trailer_from_dict(
424 dict: &std::collections::HashMap<rpdfium_core::Name, crate::object::Object>,
425) -> Option<crate::trailer::TrailerInfo> {
426 let root = match dict.get(&rpdfium_core::Name::root()) {
427 Some(crate::object::Object::Reference(id)) => *id,
428 _ => return None,
429 };
430 let size = match dict.get(&rpdfium_core::Name::size()) {
431 Some(crate::object::Object::Integer(n)) if *n > 0 => *n as u32,
432 _ => return None,
433 };
434 let info = match dict.get(&rpdfium_core::Name::info()) {
435 Some(crate::object::Object::Reference(id)) => Some(*id),
436 _ => None,
437 };
438 let encrypt = match dict.get(&rpdfium_core::Name::encrypt()) {
439 Some(crate::object::Object::Reference(id)) => Some(*id),
440 _ => None,
441 };
442 Some(crate::trailer::TrailerInfo {
443 root,
444 info,
445 encrypt,
446 id: None,
447 size,
448 prev: None,
449 })
450}
451
452fn contains_catalog_marker(window: &[u8]) -> bool {
454 let type_marker = b"/Type";
456 let catalog_marker = b"/Catalog";
457 for i in 0..window.len().saturating_sub(type_marker.len()) {
458 if &window[i..i + type_marker.len()] == type_marker {
459 let mut j = i + type_marker.len();
461 while j < window.len() && (window[j] == b' ' || window[j] == b'\t') {
462 j += 1;
463 }
464 if j + catalog_marker.len() <= window.len()
465 && &window[j..j + catalog_marker.len()] == catalog_marker
466 {
467 return true;
468 }
469 }
470 }
471 false
472}
473
474#[cfg(test)]
475mod tests {
476 use super::*;
477
478 #[test]
479 fn test_parse_simple_xref_table() {
480 let source = b"xref\n\
481 0 3\n\
482 0000000000 65535 f \r\n\
483 0000000009 00000 n \r\n\
484 0000000074 00000 n \r\n\
485 trailer";
486 let (section, end_pos) = parse_xref_table(source, 0).unwrap();
487 assert_eq!(section.entries.len(), 3);
488
489 assert_eq!(section.entries[0].id.number, 0);
491 assert_eq!(section.entries[0].entry_type, XrefEntryType::Free);
492
493 assert_eq!(section.entries[1].id.number, 1);
495 assert_eq!(
496 section.entries[1].entry_type,
497 XrefEntryType::InUse { offset: 9 }
498 );
499
500 assert_eq!(section.entries[2].id.number, 2);
502 assert_eq!(
503 section.entries[2].entry_type,
504 XrefEntryType::InUse { offset: 74 }
505 );
506
507 assert!(source[end_pos as usize..].starts_with(b"trailer"));
509 }
510
511 #[test]
512 fn test_parse_xref_with_multiple_subsections() {
513 let source = b"xref\n\
514 0 1\n\
515 0000000000 65535 f \r\n\
516 3 1\n\
517 0000025325 00000 n \r\n\
518 trailer";
519 let (section, _) = parse_xref_table(source, 0).unwrap();
520 assert_eq!(section.entries.len(), 2);
521 assert_eq!(section.entries[0].id.number, 0);
522 assert_eq!(section.entries[1].id.number, 3);
523 }
524
525 #[test]
526 fn test_reject_object_number_exceeding_limit() {
527 let source = format!(
529 "xref\n{} 1\n0000000000 00000 n \r\ntrailer",
530 MAX_OBJECT_NUMBER + 1
531 );
532 let result = parse_xref_table(source.as_bytes(), 0);
533 assert!(result.is_err());
534 }
535
536 #[test]
537 fn test_parse_xref_entry_in_use() {
538 let entry = b"0000000009 00000 n \r\n";
539 let result = parse_xref_entry(entry, 1, 0).unwrap();
540 assert_eq!(result.id.number, 1);
541 assert_eq!(result.id.generation, 0);
542 assert_eq!(result.entry_type, XrefEntryType::InUse { offset: 9 });
543 }
544
545 #[test]
546 fn test_parse_xref_entry_free() {
547 let entry = b"0000000000 65535 f \r\n";
548 let result = parse_xref_entry(entry, 0, 0).unwrap();
549 assert_eq!(result.id.number, 0);
550 assert_eq!(result.id.generation, 65535);
551 assert_eq!(result.entry_type, XrefEntryType::Free);
552 }
553
554 #[test]
555 fn test_xref_table_default() {
556 let table = XrefTable::default();
557 assert!(table.sections.is_empty());
558 }
559
560 #[test]
561 fn test_parse_xref_at_offset() {
562 let prefix = b"some garbage before ";
563 let xref = b"xref\n0 1\n0000000000 65535 f \r\ntrailer";
564 let mut source = prefix.to_vec();
565 source.extend_from_slice(xref);
566 let (section, _) = parse_xref_table(&source, prefix.len() as u64).unwrap();
567 assert_eq!(section.entries.len(), 1);
568 }
569
570 #[test]
576 fn test_parser_load_cross_ref_v4_non_contiguous() {
577 let source = b"xref\n\
578 0 1\n\
579 0000000000 65535 f \r\n\
580 3 1\n\
581 0000025325 00000 n \r\n\
582 8 2\n\
583 0000025518 00002 n \r\n\
584 0000025635 00000 n \r\n\
585 12 1\n\
586 0000025777 00000 n \r\n\
587 trailer";
588 let (section, end_pos) = parse_xref_table(source, 0).unwrap();
589 assert_eq!(section.entries.len(), 5);
590
591 assert_eq!(section.entries[0].id.number, 0);
593 assert_eq!(section.entries[0].id.generation, 65535);
594 assert_eq!(section.entries[0].entry_type, XrefEntryType::Free);
595
596 assert_eq!(section.entries[1].id.number, 3);
598 assert_eq!(section.entries[1].id.generation, 0);
599 assert_eq!(
600 section.entries[1].entry_type,
601 XrefEntryType::InUse { offset: 25325 }
602 );
603
604 assert_eq!(section.entries[2].id.number, 8);
606 assert_eq!(section.entries[2].id.generation, 2);
607 assert_eq!(
608 section.entries[2].entry_type,
609 XrefEntryType::InUse { offset: 25518 }
610 );
611
612 assert_eq!(section.entries[3].id.number, 9);
614 assert_eq!(section.entries[3].id.generation, 0);
615 assert_eq!(
616 section.entries[3].entry_type,
617 XrefEntryType::InUse { offset: 25635 }
618 );
619
620 assert_eq!(section.entries[4].id.number, 12);
622 assert_eq!(section.entries[4].id.generation, 0);
623 assert_eq!(
624 section.entries[4].entry_type,
625 XrefEntryType::InUse { offset: 25777 }
626 );
627
628 assert!(source[end_pos as usize..].starts_with(b"trailer"));
630 }
631
632 #[test]
634 fn test_parser_load_cross_ref_v4_large_table() {
635 let count = 2048u64;
636 let mut source = Vec::new();
637 source.extend_from_slice(format!("xref\n0 {count}\n").as_bytes());
638
639 source.extend_from_slice(b"0000000000 65535 f \r\n");
641
642 for i in 1..count {
644 source.extend_from_slice(format!("{:010} 00000 n \r\n", i * 100).as_bytes());
645 }
646 source.extend_from_slice(b"trailer");
647
648 let (section, end_pos) = parse_xref_table(&source, 0).unwrap();
649 assert_eq!(section.entries.len(), count as usize);
650
651 assert_eq!(section.entries[0].id.number, 0);
653 assert_eq!(section.entries[0].entry_type, XrefEntryType::Free);
654
655 let last = §ion.entries[count as usize - 1];
656 assert_eq!(last.id.number, (count - 1) as u32);
657 assert_eq!(
658 last.entry_type,
659 XrefEntryType::InUse {
660 offset: (count - 1) * 100
661 }
662 );
663
664 assert!(source[end_pos as usize..].starts_with(b"trailer"));
665 }
666
667 #[test]
669 fn test_parser_load_cross_ref_v4_free_entry_chain() {
670 let source = b"xref\n\
671 0 4\n\
672 0000000003 65535 f \r\n\
673 0000000100 00000 n \r\n\
674 0000000000 65535 f \r\n\
675 0000000000 65535 f \r\n\
676 trailer";
677 let (section, _) = parse_xref_table(source, 0).unwrap();
678 assert_eq!(section.entries.len(), 4);
679
680 assert_eq!(section.entries[0].entry_type, XrefEntryType::Free);
682 assert_eq!(
684 section.entries[1].entry_type,
685 XrefEntryType::InUse { offset: 100 }
686 );
687 assert_eq!(section.entries[2].entry_type, XrefEntryType::Free);
689 assert_eq!(section.entries[3].entry_type, XrefEntryType::Free);
691 }
692
693 #[test]
695 fn test_parse_xref_single_entry() {
696 let source = b"xref\n\
697 5 1\n\
698 0000012345 00003 n \r\n\
699 trailer";
700 let (section, _) = parse_xref_table(source, 0).unwrap();
701 assert_eq!(section.entries.len(), 1);
702 assert_eq!(section.entries[0].id.number, 5);
703 assert_eq!(section.entries[0].id.generation, 3);
704 assert_eq!(
705 section.entries[0].entry_type,
706 XrefEntryType::InUse { offset: 12345 }
707 );
708 }
709
710 #[test]
712 fn test_parse_xref_zero_entries() {
713 let source = b"xref\n\
714 0 0\n\
715 trailer";
716 let (section, _) = parse_xref_table(source, 0).unwrap();
717 assert!(section.entries.is_empty());
718 }
719
720 #[test]
722 fn test_parse_xref_truncated_entry() {
723 let source = b"xref\n\
724 0 1\n\
725 0000000000 65535";
726 let result = parse_xref_table(source, 0);
727 assert!(result.is_err());
728 }
729
730 #[test]
732 fn test_parse_xref_missing_keyword() {
733 let source = b"0 1\n0000000000 65535 f \r\ntrailer";
734 let result = parse_xref_table(source, 0);
735 assert!(result.is_err());
736 }
737
738 #[test]
740 fn test_parse_xref_entry_invalid_marker() {
741 let entry = b"0000000009 00000 x \r\n";
742 let result = parse_xref_entry(entry, 1, 0);
743 assert!(result.is_err());
744 }
745
746 #[test]
748 fn test_parse_xref_entry_high_generation() {
749 let entry = b"0000000009 12345 n \r\n";
750 let result = parse_xref_entry(entry, 1, 0).unwrap();
751 assert_eq!(result.id.generation, 12345);
752 }
753
754 #[test]
756 fn test_parse_xref_entry_large_offset() {
757 let entry = b"9999999999 00000 n \r\n";
758 let result = parse_xref_entry(entry, 1, 0).unwrap();
759 assert_eq!(
760 result.entry_type,
761 XrefEntryType::InUse { offset: 9999999999 }
762 );
763 }
764
765 #[test]
772 fn test_rebuild_xref_with_trailer() {
773 let mut pdf = Vec::new();
774 pdf.extend_from_slice(b"%PDF-1.4\n");
775
776 let obj1_offset = pdf.len();
777 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
778
779 let obj2_offset = pdf.len();
780 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
781
782 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
784 let (table, trailer) = rebuild_xref(&pdf).unwrap();
787 assert_eq!(table.sections.len(), 1);
788
789 let entries = &table.sections[0].entries;
790 assert_eq!(entries.len(), 2);
791
792 let entry1 = entries.iter().find(|e| e.id.number == 1).unwrap();
794 assert_eq!(
795 entry1.entry_type,
796 XrefEntryType::InUse {
797 offset: obj1_offset as u64
798 }
799 );
800
801 let entry2 = entries.iter().find(|e| e.id.number == 2).unwrap();
803 assert_eq!(
804 entry2.entry_type,
805 XrefEntryType::InUse {
806 offset: obj2_offset as u64
807 }
808 );
809
810 assert_eq!(trailer.root, ObjectId::new(1, 0));
812 assert_eq!(trailer.size, 3);
813 }
814
815 #[test]
817 fn test_rebuild_xref_catalog_fallback() {
818 let mut pdf = Vec::new();
819 pdf.extend_from_slice(b"%PDF-1.4\n");
820
821 let obj1_offset = pdf.len();
822 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
823
824 let obj2_offset = pdf.len();
825 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
826
827 let (table, trailer) = rebuild_xref(&pdf).unwrap();
830 assert_eq!(table.sections.len(), 1);
831
832 let entries = &table.sections[0].entries;
833 assert_eq!(entries.len(), 2);
834
835 let entry1 = entries.iter().find(|e| e.id.number == 1).unwrap();
836 assert_eq!(
837 entry1.entry_type,
838 XrefEntryType::InUse {
839 offset: obj1_offset as u64
840 }
841 );
842
843 let entry2 = entries.iter().find(|e| e.id.number == 2).unwrap();
844 assert_eq!(
845 entry2.entry_type,
846 XrefEntryType::InUse {
847 offset: obj2_offset as u64
848 }
849 );
850
851 assert_eq!(trailer.root, ObjectId::new(1, 0));
853 assert_eq!(trailer.size, 3);
855 }
856
857 #[test]
859 fn test_rebuild_xref_no_objects() {
860 let source = b"%PDF-1.4\nno objects here at all\n%%EOF";
861 let result = rebuild_xref(source);
862 assert!(result.is_err());
863 }
864
865 #[test]
867 fn test_rebuild_xref_ignores_non_obj_keywords() {
868 let source = b"%PDF-1.4\nThis is an object keyword test\n";
869 let result = rebuild_xref(source);
870 assert!(result.is_err());
871 }
872
873 #[test]
875 fn test_try_parse_obj_marker_basic() {
876 let source = b"1 0 obj\n";
877 let result = try_parse_obj_marker(source, 0);
878 assert!(result.is_some());
879 let (number, generation, end) = result.unwrap();
880 assert_eq!(number, 1);
881 assert_eq!(generation, 0);
882 assert_eq!(end, 7); }
884
885 #[test]
887 fn test_try_parse_obj_marker_rejects_object_word() {
888 let source = b"1 0 object\n";
889 let result = try_parse_obj_marker(source, 0);
890 assert!(result.is_none());
891 }
892
893 #[test]
895 fn test_contains_catalog_marker() {
896 assert!(contains_catalog_marker(b"/Type /Catalog"));
897 assert!(contains_catalog_marker(b"/Type/Catalog"));
898 assert!(contains_catalog_marker(
899 b"<< /Type /Catalog /Pages 2 0 R >>"
900 ));
901 assert!(!contains_catalog_marker(b"/Type /Pages"));
902 assert!(!contains_catalog_marker(b"no catalog here"));
903 }
904
905 #[test]
914 fn test_parser_parse_start_xref() {
915 let data = std::fs::read(concat!(
916 env!("CARGO_MANIFEST_DIR"),
917 "/../../pdfium-upstream/testing/resources/annotation_stamp_with_ap.pdf"
918 ))
919 .expect("test PDF file should exist");
920
921 let xref_offset = crate::trailer::find_startxref(&data).unwrap();
922 assert_eq!(xref_offset, 100940);
923 }
924
925 #[test]
930 fn test_parser_parse_start_xref_with_header_offset() {
931 let test_header_offset = 765usize;
932 let original = std::fs::read(concat!(
933 env!("CARGO_MANIFEST_DIR"),
934 "/../../pdfium-upstream/testing/resources/annotation_stamp_with_ap.pdf"
935 ))
936 .expect("test PDF file should exist");
937
938 let mut data = vec![0u8; test_header_offset];
940 data.extend_from_slice(&original);
941
942 let xref_offset = crate::trailer::find_startxref(&data).unwrap();
945 assert_eq!(xref_offset, 100940);
946 }
947
948 #[test]
952 fn test_parser_parse_linearized_with_header_offset() {
953 let test_header_offset = 765usize;
954 let original = std::fs::read(concat!(
955 env!("CARGO_MANIFEST_DIR"),
956 "/../../pdfium-upstream/testing/resources/linearized.pdf"
957 ))
958 .expect("test PDF file should exist");
959
960 let mut data = vec![0u8; test_header_offset];
962 data.extend_from_slice(&original);
963
964 let info = crate::linearized_header::detect_linearized(
967 &original,
968 rpdfium_core::ParsingMode::Lenient,
969 );
970 assert!(
971 info.is_some(),
972 "original PDF should be detected as linearized"
973 );
974 }
975
976 #[test]
981 fn test_parser_bad_start_xref_should_not_build_cross_ref_table() {
982 let data = b"%PDF1-7 0 obj <</Size 2 /W [0 0 0]\n>>\n\
983 stream\n\
984 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\
985 endstream\n\
986 endobj\n\
987 startxref\n\
988 6\n\
989 %%EOF\n";
990
991 let result =
993 crate::store::ObjectStore::open(data.to_vec(), rpdfium_core::ParsingMode::Strict);
994 assert!(result.is_err(), "bad startxref should fail in Strict mode");
995 }
996
997 #[test]
1003 fn test_parser_rebuild_cross_ref_correctly() {
1004 let data = std::fs::read(concat!(
1005 env!("CARGO_MANIFEST_DIR"),
1006 "/../../pdfium-upstream/testing/resources/parser_rebuildxref_correct.pdf"
1007 ))
1008 .expect("test PDF file should exist");
1009
1010 let result = rebuild_xref(&data);
1011 assert!(
1012 result.is_ok(),
1013 "rebuild_xref should succeed on correct file"
1014 );
1015
1016 let (table, _trailer) = result.unwrap();
1017 assert!(!table.sections.is_empty());
1018 let entries = &table.sections[0].entries;
1020 assert!(!entries.is_empty(), "rebuilt xref should contain entries");
1021 }
1022
1023 #[test]
1031 fn test_parser_rebuild_cross_ref_failed() {
1032 let data = std::fs::read(concat!(
1033 env!("CARGO_MANIFEST_DIR"),
1034 "/../../pdfium-upstream/testing/resources/parser_rebuildxref_error_notrailer.pdf"
1035 ))
1036 .expect("test PDF file should exist");
1037
1038 let result =
1040 crate::store::ObjectStore::open(data.clone(), rpdfium_core::ParsingMode::Strict);
1041 assert!(result.is_err(), "Strict open should fail with no trailer");
1042
1043 let _lenient = crate::store::ObjectStore::open(data, rpdfium_core::ParsingMode::Lenient);
1048 }
1049}