1use rpdfium_core::ParsingMode;
9use rpdfium_core::error::PdfError;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub struct PdfVersion {
14 pub major: u8,
15 pub minor: u8,
16}
17
18impl PdfVersion {
19 pub fn new(major: u8, minor: u8) -> Self {
20 Self { major, minor }
21 }
22}
23
24impl std::fmt::Display for PdfVersion {
25 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
26 write!(f, "{}.{}", self.major, self.minor)
27 }
28}
29
30const MAX_HEADER_SEARCH: usize = 1024;
33
34pub fn parse_header(source: &[u8], mode: ParsingMode) -> Result<(PdfVersion, u64), PdfError> {
42 if source.len() < 8 {
43 return Err(PdfError::InvalidHeader);
44 }
45
46 let header_pos = find_header(source, mode)?;
47 let remaining = &source[header_pos..];
48
49 if remaining.len() < 8 {
51 return Err(PdfError::InvalidHeader);
52 }
53
54 let major = match remaining[5] {
56 b @ b'0'..=b'9' => b - b'0',
57 _ => return Err(PdfError::InvalidHeader),
58 };
59
60 if remaining[6] != b'.' {
62 return Err(PdfError::InvalidHeader);
63 }
64
65 let minor = match remaining[7] {
67 b @ b'0'..=b'9' => b - b'0',
68 _ => return Err(PdfError::InvalidHeader),
69 };
70
71 let mut pos = header_pos + 8;
73
74 while pos < source.len() && source[pos] != b'\r' && source[pos] != b'\n' {
76 pos += 1;
77 }
78
79 if pos < source.len() && source[pos] == b'\r' {
81 pos += 1;
82 }
83 if pos < source.len() && source[pos] == b'\n' {
84 pos += 1;
85 }
86
87 Ok((PdfVersion::new(major, minor), pos as u64))
88}
89
90fn find_header(source: &[u8], mode: ParsingMode) -> Result<usize, PdfError> {
92 let marker = b"%PDF-";
93 let search_limit = source.len().min(MAX_HEADER_SEARCH);
94
95 match mode {
96 ParsingMode::Strict => {
97 if source.starts_with(marker) {
98 Ok(0)
99 } else {
100 Err(PdfError::InvalidHeader)
101 }
102 }
103 ParsingMode::Lenient => {
104 for i in 0..search_limit.saturating_sub(marker.len()) {
106 if source[i..].starts_with(marker) {
107 if i > 0 {
108 tracing::warn!(
109 offset = i,
110 "PDF header not at byte 0; found garbage before %PDF-"
111 );
112 }
113 return Ok(i);
114 }
115 }
116 Err(PdfError::InvalidHeader)
117 }
118 }
119}
120
121#[cfg(test)]
122mod tests {
123 use super::*;
124
125 #[test]
126 fn test_parse_valid_header_1_7() {
127 let source = b"%PDF-1.7\n";
128 let (version, offset) = parse_header(source, ParsingMode::Strict).unwrap();
129 assert_eq!(version, PdfVersion::new(1, 7));
130 assert_eq!(offset, 9);
131 }
132
133 #[test]
134 fn test_parse_valid_header_2_0() {
135 let source = b"%PDF-2.0\r\n";
136 let (version, offset) = parse_header(source, ParsingMode::Strict).unwrap();
137 assert_eq!(version, PdfVersion::new(2, 0));
138 assert_eq!(offset, 10);
139 }
140
141 #[test]
142 fn test_parse_header_1_4() {
143 let source = b"%PDF-1.4\n%\xe2\xe3\xcf\xd3\n";
144 let (version, offset) = parse_header(source, ParsingMode::Strict).unwrap();
145 assert_eq!(version, PdfVersion::new(1, 4));
146 assert_eq!(offset, 9);
147 }
148
149 #[test]
150 fn test_parse_header_with_cr() {
151 let source = b"%PDF-1.5\r";
152 let (version, offset) = parse_header(source, ParsingMode::Strict).unwrap();
153 assert_eq!(version, PdfVersion::new(1, 5));
154 assert_eq!(offset, 9);
155 }
156
157 #[test]
158 fn test_parse_header_no_newline() {
159 let source = b"%PDF-1.6 rest of file";
160 let (version, _) = parse_header(source, ParsingMode::Strict).unwrap();
161 assert_eq!(version, PdfVersion::new(1, 6));
162 }
163
164 #[test]
165 fn test_strict_rejects_garbage_before_header() {
166 let source = b"garbage%PDF-1.7\n";
167 let result = parse_header(source, ParsingMode::Strict);
168 assert!(result.is_err());
169 }
170
171 #[test]
172 fn test_lenient_accepts_garbage_before_header() {
173 let source = b"\0\0\0%PDF-1.7\n";
174 let (version, _) = parse_header(source, ParsingMode::Lenient).unwrap();
175 assert_eq!(version, PdfVersion::new(1, 7));
176 }
177
178 #[test]
179 fn test_too_short() {
180 let source = b"%PDF-1";
181 let result = parse_header(source, ParsingMode::Strict);
182 assert!(result.is_err());
183 }
184
185 #[test]
186 fn test_invalid_version_char() {
187 let source = b"%PDF-X.Y\n";
188 let result = parse_header(source, ParsingMode::Strict);
189 assert!(result.is_err());
190 }
191
192 #[test]
193 fn test_version_display() {
194 let v = PdfVersion::new(1, 7);
195 assert_eq!(format!("{}", v), "1.7");
196 }
197
198 #[test]
199 fn test_empty_source() {
200 let result = parse_header(b"", ParsingMode::Lenient);
201 assert!(result.is_err());
202 }
203}