1use super::{ParseError, ParseResult};
6use std::io::{BufRead, BufReader, Read};
7
8#[derive(Debug, Clone, PartialEq)]
10pub struct PdfVersion {
11 pub major: u8,
12 pub minor: u8,
13}
14
15impl PdfVersion {
16 pub fn new(major: u8, minor: u8) -> Self {
18 Self { major, minor }
19 }
20
21 pub fn is_supported(&self) -> bool {
23 matches!((self.major, self.minor), (1, 0..=7) | (2, 0))
25 }
26}
27
28impl std::fmt::Display for PdfVersion {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 write!(f, "{}.{}", self.major, self.minor)
31 }
32}
33
34#[derive(Debug, Clone)]
36pub struct PdfHeader {
37 pub version: PdfVersion,
38 pub has_binary_marker: bool,
39}
40
41impl PdfHeader {
42 pub fn parse<R: Read>(reader: R) -> ParseResult<Self> {
44 let mut buf_reader = BufReader::new(reader);
45 let mut header = Self::parse_version_line(&mut buf_reader)?;
46
47 header.has_binary_marker = Self::check_binary_marker(&mut buf_reader)?;
49
50 Ok(header)
51 }
52
53 fn parse_version_line<R: BufRead>(reader: &mut R) -> ParseResult<Self> {
55 let mut line_bytes = Vec::new();
57 let mut consecutive_nulls = 0;
58
59 loop {
60 let mut byte = [0u8; 1];
61 match reader.read_exact(&mut byte) {
62 Ok(_) => {
63 if byte[0] == 0 {
65 consecutive_nulls += 1;
66 if consecutive_nulls > 10 {
67 return Err(ParseError::InvalidHeader);
68 }
69 } else {
70 consecutive_nulls = 0;
71 }
72
73 if byte[0] == b'\n' || byte[0] == b'\r' {
74 if byte[0] == b'\r' {
76 let mut next_byte = [0u8; 1];
78 if reader.read_exact(&mut next_byte).is_ok() && next_byte[0] != b'\n' {
79 line_bytes.push(byte[0]);
81 }
82 }
83 break;
84 }
85 line_bytes.push(byte[0]);
86 if line_bytes.len() > 200 {
88 return Err(ParseError::InvalidHeader);
89 }
90 }
91 Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
92 if line_bytes.is_empty() {
93 return Err(ParseError::InvalidHeader);
94 }
95 break;
96 }
97 Err(e) => return Err(e.into()),
98 }
99 }
100
101 let line = String::from_utf8_lossy(&line_bytes).into_owned();
103
104 let (pdf_start, pdf_prefix_len) = if let Some(pos) = line.find("%PDF-") {
106 (pos, 5) } else {
108 let lower_line = line.to_lowercase();
110 if let Some(pos) = lower_line.find("%pdf-") {
111 (pos, 5) } else {
113 return Err(ParseError::InvalidHeader);
114 }
115 };
116
117 let pdf_line = &line[pdf_start..];
119 if pdf_line.len() < 7 {
120 return Err(ParseError::InvalidHeader);
122 }
123
124 let version_part = &pdf_line[pdf_prefix_len..]; let mut version_chars = String::new();
129 for ch in version_part.chars() {
130 if ch.is_ascii_digit() || ch == '.' {
131 version_chars.push(ch);
132 } else if ch.is_whitespace() && !version_chars.is_empty() {
133 continue;
135 } else if !version_chars.is_empty() {
136 break;
138 }
139 }
141
142 let version_str = version_chars.trim();
143
144 let (major, minor) = if version_str.contains('.') {
146 let parts: Vec<&str> = version_str.split('.').collect();
148 if parts.len() != 2 {
149 return Err(ParseError::InvalidHeader);
150 }
151
152 let major = parts[0]
153 .trim()
154 .parse::<u8>()
155 .map_err(|_| ParseError::InvalidHeader)?;
156 let minor = parts[1]
157 .trim()
158 .parse::<u8>()
159 .map_err(|_| ParseError::InvalidHeader)?;
160
161 (major, minor)
162 } else {
163 let clean_version = version_str
165 .chars()
166 .filter(|c| c.is_ascii_digit())
167 .collect::<String>();
168
169 if clean_version.len() >= 2 {
170 let major_str = &clean_version[0..1];
171 let minor_str = &clean_version[1..2];
172
173 let major = major_str
174 .parse::<u8>()
175 .map_err(|_| ParseError::InvalidHeader)?;
176 let minor = minor_str
177 .parse::<u8>()
178 .map_err(|_| ParseError::InvalidHeader)?;
179
180 (major, minor)
181 } else {
182 return Err(ParseError::InvalidHeader);
183 }
184 };
185
186 let version = PdfVersion::new(major, minor);
187
188 if !version.is_supported() {
189 return Err(ParseError::UnsupportedVersion(version.to_string()));
190 }
191
192 Ok(PdfHeader {
193 version,
194 has_binary_marker: false,
195 })
196 }
197
198 fn check_binary_marker<R: BufRead>(reader: &mut R) -> ParseResult<bool> {
200 let mut buffer = Vec::new();
201
202 loop {
204 let mut byte = [0u8; 1];
205 match reader.read_exact(&mut byte) {
206 Ok(_) => {
207 buffer.push(byte[0]);
208 if byte[0] == b'\n' || byte[0] == b'\r' {
209 break;
210 }
211 if buffer.len() > 1024 {
213 break;
214 }
215 }
216 Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
217 break;
218 }
219 Err(e) => return Err(e.into()),
220 }
221 }
222
223 if buffer.is_empty() {
224 return Ok(false);
225 }
226
227 if buffer.first() == Some(&b'%') {
229 let binary_count = buffer
230 .iter()
231 .skip(1) .filter(|&&b| b >= 128)
233 .count();
234
235 Ok(binary_count >= 4)
236 } else {
237 Ok(false)
239 }
240 }
241}
242
243#[cfg(test)]
244mod tests {
245 use super::*;
246 use std::io::Cursor;
247
248 #[test]
249 fn test_parse_pdf_header_basic() {
250 let input = b"%PDF-1.7\n";
251 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
252
253 assert_eq!(header.version.major, 1);
254 assert_eq!(header.version.minor, 7);
255 assert!(!header.has_binary_marker);
256 }
257
258 #[test]
259 fn test_parse_pdf_header_with_binary_marker() {
260 let input = b"%PDF-1.4\n%\xE2\xE3\xCF\xD3\n";
261 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
262
263 assert_eq!(header.version.major, 1);
264 assert_eq!(header.version.minor, 4);
265 assert!(header.has_binary_marker);
266 }
267
268 #[test]
269 fn test_parse_pdf_20() {
270 let input = b"%PDF-2.0\n";
271 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
272
273 assert_eq!(header.version.major, 2);
274 assert_eq!(header.version.minor, 0);
275 }
276
277 #[test]
278 fn test_invalid_header() {
279 let input = b"Not a PDF\n";
280 let result = PdfHeader::parse(Cursor::new(input));
281
282 assert!(matches!(result, Err(ParseError::InvalidHeader)));
283 }
284
285 #[test]
286 fn test_unsupported_version() {
287 let input = b"%PDF-3.0\n";
288 let result = PdfHeader::parse(Cursor::new(input));
289
290 assert!(matches!(result, Err(ParseError::UnsupportedVersion(_))));
291 }
292
293 #[test]
294 fn test_pdf_version_new() {
295 let version = PdfVersion::new(1, 5);
296 assert_eq!(version.major, 1);
297 assert_eq!(version.minor, 5);
298 }
299
300 #[test]
301 fn test_pdf_version_display() {
302 let version = PdfVersion::new(1, 7);
303 assert_eq!(version.to_string(), "1.7");
304 assert_eq!(format!("{version}"), "1.7");
305 }
306
307 #[test]
308 fn test_pdf_version_is_supported() {
309 assert!(PdfVersion::new(1, 0).is_supported());
311 assert!(PdfVersion::new(1, 1).is_supported());
312 assert!(PdfVersion::new(1, 4).is_supported());
313 assert!(PdfVersion::new(1, 7).is_supported());
314 assert!(PdfVersion::new(2, 0).is_supported());
315
316 assert!(!PdfVersion::new(0, 9).is_supported());
318 assert!(!PdfVersion::new(1, 8).is_supported());
319 assert!(!PdfVersion::new(2, 1).is_supported());
320 assert!(!PdfVersion::new(3, 0).is_supported());
321 }
322
323 #[test]
324 fn test_pdf_version_equality() {
325 let v1 = PdfVersion::new(1, 5);
326 let v2 = PdfVersion::new(1, 5);
327 let v3 = PdfVersion::new(1, 6);
328
329 assert_eq!(v1, v2);
330 assert_ne!(v1, v3);
331 }
332
333 #[test]
334 fn test_header_with_crlf() {
335 let input = b"%PDF-1.6\r\n";
336 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
337
338 assert_eq!(header.version.major, 1);
339 assert_eq!(header.version.minor, 6);
340 }
341
342 #[test]
343 fn test_header_with_cr_only() {
344 let input = b"%PDF-1.3\r";
345 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
346
347 assert_eq!(header.version.major, 1);
348 assert_eq!(header.version.minor, 3);
349 }
350
351 #[test]
352 fn test_header_with_extra_whitespace() {
353 let input = b"%PDF-1.5 \n";
354 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
355
356 assert_eq!(header.version.major, 1);
357 assert_eq!(header.version.minor, 5);
358 }
359
360 #[test]
361 fn test_header_no_newline() {
362 let input = b"%PDF-1.2";
363 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
364
365 assert_eq!(header.version.major, 1);
366 assert_eq!(header.version.minor, 2);
367 }
368
369 #[test]
370 fn test_malformed_version_single_digit() {
371 let input = b"%PDF-1\n";
372 let result = PdfHeader::parse(Cursor::new(input));
373
374 assert!(matches!(result, Err(ParseError::InvalidHeader)));
375 }
376
377 #[test]
378 fn test_malformed_version_too_many_parts() {
379 let input = b"%PDF-1.4.2\n";
380 let result = PdfHeader::parse(Cursor::new(input));
381
382 assert!(matches!(result, Err(ParseError::InvalidHeader)));
383 }
384
385 #[test]
386 fn test_malformed_version_non_numeric() {
387 let input = b"%PDF-1.x\n";
388 let result = PdfHeader::parse(Cursor::new(input));
389
390 assert!(matches!(result, Err(ParseError::InvalidHeader)));
391 }
392
393 #[test]
394 fn test_empty_input() {
395 let input = b"";
396 let result = PdfHeader::parse(Cursor::new(input));
397
398 assert!(matches!(result, Err(ParseError::InvalidHeader)));
399 }
400
401 #[test]
402 fn test_header_too_long() {
403 let long_header = format!("%PDF-1.0{}", "x".repeat(200));
405 let result = PdfHeader::parse(Cursor::new(long_header.as_bytes()));
406
407 assert!(matches!(result, Err(ParseError::InvalidHeader)));
408 }
409
410 #[test]
411 fn test_binary_marker_insufficient_bytes() {
412 let input = b"%PDF-1.4\n%\xE2\xE3\n";
413 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
414
415 assert!(!header.has_binary_marker); }
417
418 #[test]
419 fn test_binary_marker_exact_threshold() {
420 let input = b"%PDF-1.4\n%\xE2\xE3\xCF\xD3\n";
421 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
422
423 assert!(header.has_binary_marker); }
425
426 #[test]
427 fn test_binary_marker_more_than_threshold() {
428 let input = b"%PDF-1.4\n%\xE2\xE3\xCF\xD3\x80\x81\n";
429 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
430
431 assert!(header.has_binary_marker); }
433
434 #[test]
435 fn test_binary_marker_no_comment() {
436 let input = b"%PDF-1.4\n1 0 obj\n";
437 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
438
439 assert!(!header.has_binary_marker); }
441
442 #[test]
443 fn test_binary_marker_ascii_only() {
444 let input = b"%PDF-1.4\n%This is a comment\n";
445 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
446
447 assert!(!header.has_binary_marker); }
449
450 #[test]
451 fn test_binary_marker_mixed_content() {
452 let input = b"%PDF-1.4\n%Some text \xE2\xE3\xCF\xD3 more text\n";
453 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
454
455 assert!(header.has_binary_marker); }
457
458 #[test]
459 fn test_binary_marker_very_long_line() {
460 let mut long_line = b"%PDF-1.4\n%".to_vec();
461 for _ in 0..2000 {
463 long_line.push(0x80);
464 }
465 long_line.push(b'\n');
466
467 let header = PdfHeader::parse(Cursor::new(long_line)).unwrap();
468
469 assert!(header.has_binary_marker); }
471
472 #[test]
473 fn test_version_all_supported_ranges() {
474 let supported_versions = vec![
475 (1, 0),
476 (1, 1),
477 (1, 2),
478 (1, 3),
479 (1, 4),
480 (1, 5),
481 (1, 6),
482 (1, 7),
483 (2, 0),
484 ];
485
486 for (major, minor) in supported_versions {
487 let input = format!("%PDF-{major}.{minor}\n");
488 let header = PdfHeader::parse(Cursor::new(input.as_bytes())).unwrap();
489
490 assert_eq!(header.version.major, major);
491 assert_eq!(header.version.minor, minor);
492 assert!(header.version.is_supported());
493 }
494 }
495
496 #[test]
497 fn test_clone_and_debug() {
498 let version = PdfVersion::new(1, 4);
499 let cloned_version = version.clone();
500
501 assert_eq!(version, cloned_version);
502 assert_eq!(format!("{version:?}"), "PdfVersion { major: 1, minor: 4 }");
503
504 let header = PdfHeader {
505 version: version.clone(),
506 has_binary_marker: true,
507 };
508 let cloned_header = header.clone();
509
510 assert_eq!(header.version, cloned_header.version);
511 assert_eq!(header.has_binary_marker, cloned_header.has_binary_marker);
512 }
513
514 #[test]
517 fn test_header_with_leading_garbage() {
518 let input = b"junk%PDF-1.4\n";
519 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
520
521 assert_eq!(header.version.major, 1);
522 assert_eq!(header.version.minor, 4);
523 }
524
525 #[test]
526 fn test_header_case_insensitive() {
527 let input = b"%pdf-1.5\n";
528 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
529
530 assert_eq!(header.version.major, 1);
531 assert_eq!(header.version.minor, 5);
532 }
533
534 #[test]
535 fn test_header_version_without_dot() {
536 let input = b"%PDF-14\n";
537 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
538
539 assert_eq!(header.version.major, 1);
540 assert_eq!(header.version.minor, 4);
541 }
542
543 #[test]
544 fn test_header_longer_line_limit() {
545 let mut long_header = b"%PDF-1.7".to_vec();
547 long_header.extend(vec![b' '; 150]); long_header.push(b'\n');
549
550 let header = PdfHeader::parse(Cursor::new(long_header)).unwrap();
551 assert_eq!(header.version.major, 1);
552 assert_eq!(header.version.minor, 7);
553 }
554
555 #[test]
556 fn test_header_with_multiple_spaces() {
557 let input = b"%PDF- 1 . 7 \n";
558 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
559
560 assert_eq!(header.version.major, 1);
561 assert_eq!(header.version.minor, 7);
562 }
563
564 #[test]
565 fn test_header_null_byte_protection() {
566 let input = b"\0\0%PDF-1.6\n";
568 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
569
570 assert_eq!(header.version.major, 1);
571 assert_eq!(header.version.minor, 6);
572 }
573
574 #[test]
575 fn test_header_too_many_nulls() {
576 let mut input = vec![0u8; 15]; input.extend_from_slice(b"%PDF-1.4\n");
579
580 let result = PdfHeader::parse(Cursor::new(input));
581 assert!(matches!(result, Err(ParseError::InvalidHeader)));
582 }
583
584 #[test]
585 fn test_header_minimal_length() {
586 let input = b"%PDF-1.0";
587 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
588
589 assert_eq!(header.version.major, 1);
590 assert_eq!(header.version.minor, 0);
591 }
592
593 #[test]
594 fn test_header_too_short() {
595 let input = b"%PDF-1";
596 let result = PdfHeader::parse(Cursor::new(input));
597 assert!(matches!(result, Err(ParseError::InvalidHeader)));
598 }
599
600 #[test]
601 fn test_header_version_extraction_edge_cases() {
602 let test_cases = vec![("prefix%PDF-1.7\n", (1, 7))];
604
605 for (input, expected) in test_cases {
606 let header = PdfHeader::parse(Cursor::new(input.as_bytes())).unwrap();
607 assert_eq!(header.version.major, expected.0);
608 assert_eq!(header.version.minor, expected.1);
609 }
610 }
611
612 #[test]
613 fn test_header_with_extra_text() {
614 let input = b"%PDF-1.4 extra text\n";
616 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
617
618 assert_eq!(header.version.major, 1);
619 assert_eq!(header.version.minor, 4);
620 }
621}