1use zpdf_core::{Error, Result};
2
3#[derive(Debug, Clone, Copy)]
4pub struct PdfHeader {
5 pub major: u8,
6 pub minor: u8,
7}
8
9const DEFAULT_VERSION: PdfHeader = PdfHeader { major: 1, minor: 7 };
13
14pub fn parse_header(data: &[u8]) -> Result<PdfHeader> {
23 let marker = b"%PDF";
24 let pos = data
25 .windows(marker.len())
26 .position(|w| w == marker)
27 .ok_or(Error::NotAPdf)?;
28
29 let rest = &data[pos + marker.len()..];
34 let rest = rest.strip_prefix(b"-").unwrap_or(rest);
35 match parse_version(rest) {
36 Some(h) => Ok(h),
37 None => {
38 let shown = String::from_utf8_lossy(&rest[..rest.len().min(8)]);
39 tracing::warn!(
40 "malformed PDF header version {shown:?}; assuming PDF {}.{}",
41 DEFAULT_VERSION.major,
42 DEFAULT_VERSION.minor
43 );
44 Ok(DEFAULT_VERSION)
45 }
46 }
47}
48
49fn parse_version(rest: &[u8]) -> Option<PdfHeader> {
53 let major = rest.first()?.checked_sub(b'0').filter(|&v| v <= 9)?;
54 if rest.get(1).copied()? != b'.' {
55 return None;
56 }
57 let minor = rest.get(2)?.checked_sub(b'0').filter(|&v| v <= 9)?;
58 Some(PdfHeader { major, minor })
59}
60
61#[cfg(test)]
62mod tests {
63 use super::*;
64
65 #[test]
66 fn valid_header() {
67 let data = b"%PDF-1.7\n";
68 let h = parse_header(data).unwrap();
69 assert_eq!(h.major, 1);
70 assert_eq!(h.minor, 7);
71 }
72
73 #[test]
74 fn pdf_2_0() {
75 let data = b"%PDF-2.0\n";
76 let h = parse_header(data).unwrap();
77 assert_eq!(h.major, 2);
78 assert_eq!(h.minor, 0);
79 }
80
81 #[test]
82 fn garbage_before_header() {
83 let data = b"\xef\xbb\xbf%PDF-1.4\n";
84 let h = parse_header(data).unwrap();
85 assert_eq!(h.major, 1);
86 assert_eq!(h.minor, 4);
87 }
88
89 #[test]
90 fn not_a_pdf() {
91 assert!(parse_header(b"not a pdf").is_err());
92 }
93
94 #[test]
95 fn marker_without_hyphen_defaults_version() {
96 let h = parse_header(b"%PDF/DA2 \x1d\n").unwrap();
98 assert_eq!((h.major, h.minor), (1, 7));
99 }
100
101 #[test]
102 fn malformed_version_defaults() {
103 for bytes in [
106 &b"%PDF-a.4\n"[..],
107 &b"%PDF-1.)"[..],
108 &b"%PDF-0000000"[..],
109 &b"%PDF-/Si3/De"[..],
110 &b"%PDF-1e66666"[..],
111 &b"%PDF-{<~00~"[..],
112 &b"%PDF-\n2 0 obj"[..],
113 ] {
114 let h = parse_header(bytes).expect("marker present => header parses");
115 assert_eq!((h.major, h.minor), (1, 7), "input {bytes:?}");
116 }
117 }
118
119 #[test]
120 fn truncated_version_defaults() {
121 let h = parse_header(b"%PDF-").unwrap();
122 assert_eq!((h.major, h.minor), (1, 7));
123 }
124
125 #[test]
126 fn no_marker_at_all_is_err() {
127 assert!(parse_header(b"1 0 obj<</Type/Catalog>>endobj").is_err());
130 }
131}