1use std::collections::HashSet;
6
7pub struct PdfValidator {
9 strict: bool,
11}
12
13#[derive(Debug, Clone, PartialEq, Eq)]
15pub enum ValidationResult {
16 Valid,
18 Warning(Vec<String>),
20 Error(Vec<String>),
22}
23
24impl ValidationResult {
25 pub fn is_ok(&self) -> bool {
27 matches!(self, ValidationResult::Valid | ValidationResult::Warning(_))
28 }
29
30 pub fn has_errors(&self) -> bool {
32 matches!(self, ValidationResult::Error(_))
33 }
34
35 pub fn issues(&self) -> Vec<String> {
37 match self {
38 ValidationResult::Valid => Vec::new(),
39 ValidationResult::Warning(warnings) => warnings.clone(),
40 ValidationResult::Error(errors) => errors.clone(),
41 }
42 }
43}
44
45impl PdfValidator {
46 pub fn new() -> Self {
48 Self { strict: false }
49 }
50
51 pub fn new_strict() -> Self {
53 Self { strict: true }
54 }
55
56 pub fn validate_pdf(&self, pdf_bytes: &[u8]) -> ValidationResult {
64 let mut warnings = Vec::new();
65 let mut errors = Vec::new();
66
67 if pdf_bytes.len() < 20 {
69 errors.push("PDF file is too small (minimum 20 bytes)".to_string());
70 return ValidationResult::Error(errors);
71 }
72
73 if let Err(e) = self.check_header(pdf_bytes) {
75 errors.push(e);
76 }
77
78 if let Err(e) = self.check_eof(pdf_bytes) {
80 errors.push(e);
81 }
82
83 let xref_offset = match self.find_xref_offset(pdf_bytes) {
85 Ok(offset) => offset,
86 Err(e) => {
87 errors.push(e);
88 return ValidationResult::Error(errors);
89 }
90 };
91
92 match self.validate_xref(pdf_bytes, xref_offset) {
94 Ok(warns) => warnings.extend(warns),
95 Err(e) => errors.push(e),
96 }
97
98 match self.validate_trailer(pdf_bytes, xref_offset) {
100 Ok(warns) => warnings.extend(warns),
101 Err(e) => errors.push(e),
102 }
103
104 match self.validate_objects(pdf_bytes) {
106 Ok(warns) => warnings.extend(warns),
107 Err(e) => errors.push(e),
108 }
109
110 match self.check_catalog(pdf_bytes) {
112 Ok(warns) => warnings.extend(warns),
113 Err(e) => errors.push(e),
114 }
115
116 match self.check_pages(pdf_bytes) {
118 Ok(warns) => warnings.extend(warns),
119 Err(e) => errors.push(e),
120 }
121
122 if let Some(warning) = self.check_size(pdf_bytes) {
124 warnings.push(warning);
125 }
126
127 if !errors.is_empty() {
129 ValidationResult::Error(errors)
130 } else if !warnings.is_empty() {
131 if self.strict {
132 ValidationResult::Error(warnings)
133 } else {
134 ValidationResult::Warning(warnings)
135 }
136 } else {
137 ValidationResult::Valid
138 }
139 }
140
141 fn check_header(&self, pdf_bytes: &[u8]) -> Result<(), String> {
143 if !pdf_bytes.starts_with(b"%PDF-") {
144 return Err("PDF header missing or invalid (expected %PDF-)".to_string());
145 }
146
147 if pdf_bytes.len() < 8 {
149 return Err("PDF header truncated".to_string());
150 }
151
152 let header_line = match find_line_end(pdf_bytes, 0) {
153 Some(end) => &pdf_bytes[0..end],
154 None => return Err("PDF header line incomplete".to_string()),
155 };
156
157 let header_str = String::from_utf8_lossy(header_line);
158
159 if !header_str.starts_with("%PDF-1.") && !header_str.starts_with("%PDF-2.") {
161 return Err(format!("Unsupported PDF version: {}", header_str));
162 }
163
164 Ok(())
165 }
166
167 fn check_eof(&self, pdf_bytes: &[u8]) -> Result<(), String> {
169 let trimmed = trim_end_whitespace(pdf_bytes);
171
172 if !trimmed.ends_with(b"%%EOF") {
173 return Err("PDF file missing %%EOF marker".to_string());
174 }
175
176 Ok(())
177 }
178
179 fn find_xref_offset(&self, pdf_bytes: &[u8]) -> Result<usize, String> {
181 let content = String::from_utf8_lossy(pdf_bytes);
183
184 if let Some(pos) = content.rfind("startxref") {
185 let after_keyword = &content[pos + 9..];
187
188 for line in after_keyword.lines() {
190 let trimmed = line.trim();
191 if !trimmed.is_empty() {
192 if let Ok(offset) = trimmed.parse::<usize>() {
193 return Ok(offset);
194 }
195 }
196 }
197
198 return Err("startxref value not found or invalid".to_string());
199 }
200
201 Err("startxref keyword not found".to_string())
202 }
203
204 fn validate_xref(&self, pdf_bytes: &[u8], xref_offset: usize) -> Result<Vec<String>, String> {
206 let mut warnings = Vec::new();
207
208 if xref_offset >= pdf_bytes.len() {
209 return Err("xref offset points beyond file end".to_string());
210 }
211
212 let xref_section = &pdf_bytes[xref_offset..];
213 let xref_str = String::from_utf8_lossy(xref_section);
214
215 let trimmed_xref = xref_str.trim_start();
217 if !trimmed_xref.starts_with("xref") {
218 return Err(format!(
219 "xref table missing 'xref' keyword (found: {:?})",
220 &xref_str.chars().take(20).collect::<String>()
221 ));
222 }
223
224 let mut lines = xref_str.lines();
226 let _ = lines.next(); if let Some(header_line) = lines.next() {
229 let parts: Vec<&str> = header_line.split_whitespace().collect();
230 if parts.len() != 2 {
231 return Err("xref subsection header invalid".to_string());
232 }
233
234 let count = parts[1]
236 .parse::<usize>()
237 .map_err(|_| "xref object count invalid".to_string())?;
238
239 let mut entry_count = 0;
241 for line in lines {
242 if line.trim().starts_with("trailer") {
243 break;
244 }
245
246 let trimmed = line.trim();
247 if trimmed.is_empty() {
248 continue;
249 }
250
251 let parts: Vec<&str> = trimmed.split_whitespace().collect();
253 if parts.len() != 3 {
254 warnings.push(format!("xref entry malformed: {}", trimmed));
255 continue;
256 }
257
258 if parts[0].len() != 10 {
260 warnings.push(format!("xref offset not 10 digits: {}", parts[0]));
261 }
262
263 if parts[1].len() != 5 {
265 warnings.push(format!("xref generation not 5 digits: {}", parts[1]));
266 }
267
268 if parts[2] != "n" && parts[2] != "f" {
270 warnings.push(format!(
271 "xref flag invalid (expected 'n' or 'f'): {}",
272 parts[2]
273 ));
274 }
275
276 entry_count += 1;
277 }
278
279 if entry_count != count {
281 warnings.push(format!(
282 "xref entry count mismatch (declared: {}, found: {})",
283 count, entry_count
284 ));
285 }
286 } else {
287 return Err("xref subsection header missing".to_string());
288 }
289
290 Ok(warnings)
291 }
292
293 fn validate_trailer(
295 &self,
296 pdf_bytes: &[u8],
297 xref_offset: usize,
298 ) -> Result<Vec<String>, String> {
299 let mut warnings = Vec::new();
300
301 let xref_section = &pdf_bytes[xref_offset..];
302 let xref_str = String::from_utf8_lossy(xref_section);
303
304 if let Some(trailer_pos) = xref_str.find("trailer") {
306 let trailer_section = &xref_str[trailer_pos..];
307
308 if !trailer_section.contains("/Size") {
310 return Err("trailer missing required /Size entry".to_string());
311 }
312
313 if !trailer_section.contains("/Root") {
314 return Err("trailer missing required /Root entry".to_string());
315 }
316
317 if !trailer_section.contains("/Info") {
319 warnings.push("trailer missing /Info dictionary (metadata)".to_string());
320 }
321
322 if !trailer_section.contains("<<") || !trailer_section.contains(">>") {
324 return Err("trailer dictionary malformed".to_string());
325 }
326 } else {
327 return Err("trailer keyword not found".to_string());
328 }
329
330 Ok(warnings)
331 }
332
333 fn validate_objects(&self, pdf_bytes: &[u8]) -> Result<Vec<String>, String> {
335 let mut warnings = Vec::new();
336 let content = String::from_utf8_lossy(pdf_bytes);
337
338 let obj_starts: Vec<_> = content.match_indices(" obj\n").collect();
340 let obj_ends: Vec<_> = content.match_indices("\nendobj\n").collect();
341
342 if obj_starts.len() != obj_ends.len() {
343 return Err(format!(
344 "Mismatched obj/endobj pairs (obj: {}, endobj: {})",
345 obj_starts.len(),
346 obj_ends.len()
347 ));
348 }
349
350 for (pos, _) in &obj_starts {
352 let before = &content[..=*pos];
354 if let Some(line_start) = before.rfind('\n') {
355 let obj_line = &before[line_start + 1..=*pos];
356 let parts: Vec<&str> = obj_line.split_whitespace().collect();
357
358 if parts.len() < 3 {
359 warnings.push(format!("Object header malformed near offset {}", pos));
360 continue;
361 }
362
363 if parts[0].parse::<u32>().is_err() {
365 warnings.push(format!("Invalid object number: {}", parts[0]));
366 }
367
368 if parts[1].parse::<u16>().is_err() {
370 warnings.push(format!("Invalid generation number: {}", parts[1]));
371 }
372 }
373 }
374
375 let stream_count = content.matches("\nstream\n").count();
377 let endstream_count = content.matches("\nendstream\n").count();
378
379 if stream_count != endstream_count {
380 warnings.push(format!(
381 "Mismatched stream/endstream pairs (stream: {}, endstream: {})",
382 stream_count, endstream_count
383 ));
384 }
385
386 Ok(warnings)
387 }
388
389 fn check_catalog(&self, pdf_bytes: &[u8]) -> Result<Vec<String>, String> {
391 let mut warnings = Vec::new();
392 let content = String::from_utf8_lossy(pdf_bytes);
393
394 if !content.contains("/Type /Catalog") {
396 return Err("Catalog object (/Type /Catalog) not found".to_string());
397 }
398
399 if !content.contains("/Pages") {
401 return Err("Catalog missing required /Pages entry".to_string());
402 }
403
404 if !content.contains("/Outlines") {
406 warnings.push("Catalog missing /Outlines (bookmarks not present)".to_string());
407 }
408
409 Ok(warnings)
410 }
411
412 fn check_pages(&self, pdf_bytes: &[u8]) -> Result<Vec<String>, String> {
414 let mut warnings = Vec::new();
415 let content = String::from_utf8_lossy(pdf_bytes);
416
417 if !content.contains("/Type /Pages") {
419 return Err("Pages object (/Type /Pages) not found".to_string());
420 }
421
422 if !content.contains("/Kids") {
424 return Err("Pages object missing required /Kids array".to_string());
425 }
426
427 if !content.contains("/Count") {
428 return Err("Pages object missing required /Count entry".to_string());
429 }
430
431 let page_count = content.matches("/Type /Page\n").count();
433
434 if page_count == 0 {
435 warnings.push("No page objects found in document".to_string());
436 }
437
438 let page_positions: Vec<_> = content.match_indices("/Type /Page\n").collect();
440
441 for (pos, _) in page_positions {
442 let before = &content[..pos];
444 if let Some(obj_start) = before.rfind(" obj\n") {
445 let after = &content[pos..];
446 if let Some(obj_end) = after.find("\nendobj\n") {
447 let page_obj = &content[obj_start..pos + obj_end];
448
449 if !page_obj.contains("/Parent") {
451 warnings.push("Page object missing /Parent reference".to_string());
452 }
453
454 if !page_obj.contains("/MediaBox") && !page_obj.contains("/CropBox") {
455 warnings.push("Page object missing /MediaBox or /CropBox".to_string());
456 }
457
458 if !page_obj.contains("/Resources") {
459 warnings.push("Page object missing /Resources dictionary".to_string());
460 }
461 }
462 }
463 }
464
465 Ok(warnings)
466 }
467
468 #[allow(dead_code)]
470 fn check_resources(&self, pdf_bytes: &[u8]) -> Result<Vec<String>, String> {
471 let mut warnings = Vec::new();
472 let content = String::from_utf8_lossy(pdf_bytes);
473
474 let font_refs: Vec<_> = content.match_indices("/Font").collect();
476 let type1_fonts = content.matches("/Type1").count();
477 let truetype_fonts = content.matches("/TrueType").count();
478
479 if font_refs.is_empty() {
480 warnings.push("No font resources defined".to_string());
481 } else if type1_fonts == 0 && truetype_fonts == 0 {
482 warnings.push("Font resources defined but no font types found".to_string());
483 }
484
485 if content.contains("/XObject") {
487 if !content.contains("/Type /XObject") {
489 warnings.push("XObject referenced but no XObject definitions found".to_string());
490 }
491 }
492
493 Ok(warnings)
494 }
495
496 #[allow(dead_code)]
498 fn validate_stream(&self, pdf_bytes: &[u8]) -> Result<Vec<String>, String> {
499 let mut warnings = Vec::new();
500 let content = String::from_utf8_lossy(pdf_bytes);
501
502 let stream_positions: Vec<_> = content.match_indices("\nstream\n").collect();
504
505 for (pos, _) in stream_positions {
506 let before = &content[..pos];
508
509 if let Some(dict_start) = before.rfind("<<") {
511 let dict_section = &before[dict_start..];
512
513 if !dict_section.contains("/Length") {
514 warnings.push("Stream dictionary missing /Length entry".to_string());
515 }
516 } else {
517 warnings.push("Stream missing dictionary".to_string());
518 }
519 }
520
521 Ok(warnings)
522 }
523
524 #[allow(dead_code)]
526 fn validate_object(&self, pdf_bytes: &[u8]) -> Result<Vec<String>, String> {
527 let mut warnings = Vec::new();
528 let content = String::from_utf8_lossy(pdf_bytes);
529
530 let mut defined_objects = HashSet::new();
532
533 for line in content.lines() {
534 if line.trim().ends_with(" obj") {
535 let parts: Vec<&str> = line.split_whitespace().collect();
536 if parts.len() >= 3 {
537 if let Ok(obj_num) = parts[0].parse::<u32>() {
538 defined_objects.insert(obj_num);
539 }
540 }
541 }
542 }
543
544 for line in content.lines() {
547 for word_group in line.split_whitespace().collect::<Vec<_>>().windows(3) {
548 if word_group.len() == 3 && word_group[1] == "0" && word_group[2] == "R" {
549 if let Ok(obj_num) = word_group[0].parse::<u32>() {
550 if !defined_objects.contains(&obj_num) && obj_num > 0 {
551 warnings
552 .push(format!("Reference to undefined object: {} 0 R", obj_num));
553 }
554 }
555 }
556 }
557 }
558
559 Ok(warnings)
560 }
561
562 fn check_size(&self, pdf_bytes: &[u8]) -> Option<String> {
564 const MAX_REASONABLE_SIZE: usize = 100 * 1024 * 1024; const MIN_REASONABLE_SIZE: usize = 100; let size = pdf_bytes.len();
568
569 if size < MIN_REASONABLE_SIZE {
570 Some(format!(
571 "PDF file is very small ({} bytes), may be incomplete",
572 size
573 ))
574 } else if size > MAX_REASONABLE_SIZE {
575 Some(format!(
576 "PDF file is very large ({} MB), consider optimization",
577 size / (1024 * 1024)
578 ))
579 } else {
580 None
581 }
582 }
583}
584
585impl Default for PdfValidator {
586 fn default() -> Self {
587 Self::new()
588 }
589}
590
591fn find_line_end(data: &[u8], start: usize) -> Option<usize> {
593 for (i, &byte) in data.iter().enumerate().skip(start) {
594 if byte == b'\n' {
595 return Some(i);
596 }
597 }
598 None
599}
600
601fn trim_end_whitespace(data: &[u8]) -> &[u8] {
603 let mut end = data.len();
604
605 while end > 0 && matches!(data[end - 1], b' ' | b'\t' | b'\n' | b'\r') {
606 end -= 1;
607 }
608
609 &data[..end]
610}
611
612#[cfg(test)]
613mod tests {
614 use super::*;
615
616 #[test]
617 fn test_validator_creation() {
618 let validator = PdfValidator::new();
619 assert!(!validator.strict);
620
621 let strict_validator = PdfValidator::new_strict();
622 assert!(strict_validator.strict);
623 }
624
625 #[test]
626 fn test_validation_result() {
627 let valid = ValidationResult::Valid;
628 assert!(valid.is_ok());
629 assert!(!valid.has_errors());
630
631 let warning = ValidationResult::Warning(vec!["test warning".to_string()]);
632 assert!(warning.is_ok());
633 assert!(!warning.has_errors());
634
635 let error = ValidationResult::Error(vec!["test error".to_string()]);
636 assert!(!error.is_ok());
637 assert!(error.has_errors());
638 }
639
640 #[test]
641 fn test_empty_pdf() {
642 let validator = PdfValidator::new();
643 let result = validator.validate_pdf(b"");
644 assert!(result.has_errors());
645 }
646
647 #[test]
648 fn test_minimal_pdf() {
649 let validator = PdfValidator::new();
650
651 let result = validator.validate_pdf(b"%PDF-1.4");
653 assert!(result.has_errors());
654 }
655
656 #[test]
657 fn test_invalid_header() {
658 let validator = PdfValidator::new();
659 let invalid_pdf = b"INVALID HEADER\n%%EOF\n";
660 let result = validator.validate_pdf(invalid_pdf);
661 assert!(result.has_errors());
662 }
663
664 #[test]
665 fn test_missing_eof() {
666 let validator = PdfValidator::new();
667 let pdf = b"%PDF-1.4\n1 0 obj\n<< /Type /Catalog >>\nendobj\n";
668 let result = validator.validate_pdf(pdf);
669 assert!(result.has_errors());
670 }
671
672 #[test]
673 fn test_basic_valid_pdf() {
674 let validator = PdfValidator::new();
675
676 let pdf = b"%PDF-1.4\n\
678 1 0 obj\n\
679 << /Type /Catalog /Pages 2 0 R >>\n\
680 endobj\n\
681 2 0 obj\n\
682 << /Type /Pages /Kids [3 0 R] /Count 1 >>\n\
683 endobj\n\
684 3 0 obj\n\
685 << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 4 0 R >> >> /Contents 5 0 R >>\n\
686 endobj\n\
687 4 0 obj\n\
688 << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>\n\
689 endobj\n\
690 5 0 obj\n\
691 << /Length 44 >>\n\
692 stream\n\
693 BT\n/F1 12 Tf\n100 700 Td\n(Hello World) Tj\nET\n\
694 endstream\n\
695 endobj\n\
696 xref\n\
697 0 6\n\
698 0000000000 65535 f \n\
699 0000000009 00000 n \n\
700 0000000058 00000 n \n\
701 0000000115 00000 n \n\
702 0000000261 00000 n \n\
703 0000000339 00000 n \n\
704 trailer\n\
705 << /Size 6 /Root 1 0 R >>\n\
706 startxref\n\
707 404\n\
708 %%EOF\n";
709
710 let result = validator.validate_pdf(pdf);
711
712 if !result.is_ok() {
714 eprintln!("Validation result: {:?}", result);
715 for issue in result.issues() {
716 eprintln!(" - {}", issue);
717 }
718 }
719
720 assert!(result.is_ok());
721 }
722
723 #[test]
724 fn test_find_line_end() {
725 let data = b"Hello\nWorld";
726 assert_eq!(find_line_end(data, 0), Some(5));
727
728 let no_newline = b"Hello";
729 assert_eq!(find_line_end(no_newline, 0), None);
730 }
731
732 #[test]
733 fn test_trim_end_whitespace() {
734 let data = b"Hello \n\r\t";
735 let trimmed = trim_end_whitespace(data);
736 assert_eq!(trimmed, b"Hello");
737
738 let no_whitespace = b"Hello";
739 let trimmed2 = trim_end_whitespace(no_whitespace);
740 assert_eq!(trimmed2, b"Hello");
741 }
742
743 #[test]
744 fn test_check_header() {
745 let validator = PdfValidator::new();
746
747 assert!(validator.check_header(b"%PDF-1.4\n").is_ok());
749 assert!(validator.check_header(b"%PDF-1.7\n").is_ok());
750 assert!(validator.check_header(b"%PDF-2.0\n").is_ok());
751
752 assert!(validator.check_header(b"PDF-1.4\n").is_err());
754 assert!(validator.check_header(b"%PDF-\n").is_err());
755 }
756
757 #[test]
758 fn test_check_size() {
759 let validator = PdfValidator::new();
760
761 let small = vec![0u8; 50];
763 assert!(validator.check_size(&small).is_some());
764
765 let normal = vec![0u8; 1024];
767 assert!(validator.check_size(&normal).is_none());
768
769 }
771}
772
773#[cfg(test)]
774mod tests_extended {
775 use super::*;
776
777 #[test]
780 fn test_valid_is_ok() {
781 assert!(ValidationResult::Valid.is_ok());
782 }
783
784 #[test]
785 fn test_valid_has_no_errors() {
786 assert!(!ValidationResult::Valid.has_errors());
787 }
788
789 #[test]
790 fn test_valid_issues_empty() {
791 assert!(ValidationResult::Valid.issues().is_empty());
792 }
793
794 #[test]
795 fn test_warning_is_ok() {
796 let w = ValidationResult::Warning(vec!["w1".to_string()]);
797 assert!(w.is_ok());
798 }
799
800 #[test]
801 fn test_warning_has_no_errors() {
802 let w = ValidationResult::Warning(vec!["w1".to_string()]);
803 assert!(!w.has_errors());
804 }
805
806 #[test]
807 fn test_warning_issues_returns_messages() {
808 let w = ValidationResult::Warning(vec!["a".to_string(), "b".to_string()]);
809 let issues = w.issues();
810 assert_eq!(issues.len(), 2);
811 assert_eq!(issues[0], "a");
812 assert_eq!(issues[1], "b");
813 }
814
815 #[test]
816 fn test_error_not_ok() {
817 let e = ValidationResult::Error(vec!["bad".to_string()]);
818 assert!(!e.is_ok());
819 }
820
821 #[test]
822 fn test_error_has_errors() {
823 let e = ValidationResult::Error(vec!["bad".to_string()]);
824 assert!(e.has_errors());
825 }
826
827 #[test]
828 fn test_error_issues_returns_messages() {
829 let e = ValidationResult::Error(vec!["e1".to_string(), "e2".to_string()]);
830 let issues = e.issues();
831 assert_eq!(issues.len(), 2);
832 assert_eq!(issues[0], "e1");
833 }
834
835 #[test]
838 fn test_default_is_non_strict() {
839 let v = PdfValidator::default();
840 assert!(!v.strict);
841 }
842
843 #[test]
844 fn test_new_strict_is_strict() {
845 let v = PdfValidator::new_strict();
846 assert!(v.strict);
847 }
848
849 #[test]
852 fn test_pdf_1_0_header_unsupported() {
853 let v = PdfValidator::new();
856 assert!(v.check_header(b"%PDF-1.0\n").is_ok());
857 }
858
859 #[test]
860 fn test_pdf_1_7_header_valid() {
861 let v = PdfValidator::new();
862 assert!(v.check_header(b"%PDF-1.7\n").is_ok());
863 }
864
865 #[test]
866 fn test_pdf_2_0_header_valid() {
867 let v = PdfValidator::new();
868 assert!(v.check_header(b"%PDF-2.0\n").is_ok());
869 }
870
871 #[test]
872 fn test_missing_percent_header_invalid() {
873 let v = PdfValidator::new();
874 assert!(v.check_header(b"PDF-1.4\n").is_err());
875 }
876
877 #[test]
878 fn test_garbage_header_invalid() {
879 let v = PdfValidator::new();
880 assert!(v.check_header(b"garbage\n").is_err());
881 }
882
883 #[test]
886 fn test_check_eof_present() {
887 let v = PdfValidator::new();
888 let data = b"...content...%%EOF";
890 assert!(v.check_eof(data).is_ok());
891 }
892
893 #[test]
894 fn test_check_eof_missing() {
895 let v = PdfValidator::new();
896 let data = b"...content...no-eof-marker";
897 assert!(v.check_eof(data).is_err());
898 }
899
900 #[test]
901 fn test_check_eof_with_trailing_whitespace() {
902 let v = PdfValidator::new();
903 let data = b"%%EOF\n\r\n";
904 assert!(v.check_eof(data).is_ok());
905 }
906
907 #[test]
910 fn test_tiny_pdf_triggers_size_warning() {
911 let v = PdfValidator::new();
912 let tiny = vec![0u8; 10];
913 assert!(v.check_size(&tiny).is_some());
914 }
915
916 #[test]
917 fn test_reasonable_size_no_warning() {
918 let v = PdfValidator::new();
919 let normal = vec![0u8; 4096];
920 assert!(v.check_size(&normal).is_none());
921 }
922
923 fn minimal_valid_pdf() -> Vec<u8> {
931 let obj1: &[u8] = b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n";
932 let obj2: &[u8] = b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n";
933 let obj3: &[u8] = b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 4 0 R >> >> >>\nendobj\n";
934 let obj4: &[u8] =
935 b"4 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>\nendobj\n";
936
937 let header: &[u8] = b"%PDF-1.4\n";
938 let o1_off = header.len(); let o2_off = o1_off + obj1.len(); let o3_off = o2_off + obj2.len(); let o4_off = o3_off + obj3.len(); let xref_off = o4_off + obj4.len(); let xref = format!(
945 "xref\n 0 5\n {o0:010} 65535 f \n {o1:010} 00000 n \n {o2:010} 00000 n \n {o3:010} 00000 n \n {o4:010} 00000 n \n trailer\n << /Size 5 /Root 1 0 R >>\n startxref\n {xref_off}\n %%EOF\n",
946 o0 = 0,
947 o1 = o1_off,
948 o2 = o2_off,
949 o3 = o3_off,
950 o4 = o4_off,
951 xref_off = xref_off,
952 );
953
954 let mut pdf = Vec::new();
955 pdf.extend_from_slice(header);
956 pdf.extend_from_slice(obj1);
957 pdf.extend_from_slice(obj2);
958 pdf.extend_from_slice(obj3);
959 pdf.extend_from_slice(obj4);
960 pdf.extend_from_slice(xref.as_bytes());
961 pdf
962 }
963
964 #[test]
965 fn test_valid_pdf_passes_validation() {
966 let v = PdfValidator::new();
967 let result = v.validate_pdf(&minimal_valid_pdf());
968 assert!(result.is_ok(), "validation failed: {:?}", result);
969 }
970
971 #[test]
972 fn test_invalid_pdf_too_short() {
973 let v = PdfValidator::new();
974 let result = v.validate_pdf(b"%PDF-1");
975 assert!(result.has_errors());
976 }
977
978 #[test]
979 fn test_invalid_pdf_no_header() {
980 let v = PdfValidator::new();
981 let result = v.validate_pdf(b"JUNK JUNK JUNK JUNK JUNK\n%%EOF\n");
982 assert!(result.has_errors());
983 }
984
985 #[test]
986 fn test_invalid_pdf_no_eof() {
987 let v = PdfValidator::new();
988 let result = v.validate_pdf(b"%PDF-1.4\nsome content without eof marker");
989 assert!(result.has_errors());
990 }
991
992 #[test]
995 fn test_strict_mode_treats_warnings_as_errors() {
996 let v_strict = PdfValidator::new_strict();
999 let pdf = minimal_valid_pdf(); let result = v_strict.validate_pdf(&pdf);
1001 assert!(
1005 result.has_errors(),
1006 "strict mode should have errors (warnings promoted): {:?}",
1007 result
1008 );
1009 }
1010
1011 #[test]
1012 fn test_non_strict_warnings_not_errors() {
1013 let v = PdfValidator::new();
1014 let pdf = minimal_valid_pdf();
1015 let result = v.validate_pdf(&pdf);
1016 assert!(result.is_ok(), "non-strict should be ok: {:?}", result);
1018 }
1019
1020 #[test]
1023 fn test_trim_end_whitespace_empty() {
1024 assert_eq!(trim_end_whitespace(b""), b"");
1025 }
1026
1027 #[test]
1028 fn test_trim_end_whitespace_only_spaces() {
1029 assert_eq!(trim_end_whitespace(b" "), b"");
1030 }
1031
1032 #[test]
1033 fn test_trim_end_whitespace_preserves_content() {
1034 assert_eq!(trim_end_whitespace(b"abc\n\r"), b"abc");
1035 }
1036
1037 #[test]
1040 fn test_find_line_end_at_start() {
1041 assert_eq!(find_line_end(b"\nrest", 0), Some(0));
1042 }
1043
1044 #[test]
1045 fn test_find_line_end_mid_string() {
1046 assert_eq!(find_line_end(b"ab\ncd", 0), Some(2));
1047 }
1048
1049 #[test]
1050 fn test_find_line_end_none_without_newline() {
1051 assert_eq!(find_line_end(b"abcdef", 0), None);
1052 }
1053
1054 #[test]
1055 fn test_find_line_end_with_offset() {
1056 assert_eq!(find_line_end(b"a\nb\nc", 2), Some(3));
1058 }
1059}