use zpdf_core::{Error, Result};
#[derive(Debug, Clone, Copy)]
pub struct PdfHeader {
pub major: u8,
pub minor: u8,
}
const DEFAULT_VERSION: PdfHeader = PdfHeader { major: 1, minor: 7 };
pub fn parse_header(data: &[u8]) -> Result<PdfHeader> {
let marker = b"%PDF";
let pos = data
.windows(marker.len())
.position(|w| w == marker)
.ok_or(Error::NotAPdf)?;
let rest = &data[pos + marker.len()..];
let rest = rest.strip_prefix(b"-").unwrap_or(rest);
match parse_version(rest) {
Some(h) => Ok(h),
None => {
let shown = String::from_utf8_lossy(&rest[..rest.len().min(8)]);
tracing::warn!(
"malformed PDF header version {shown:?}; assuming PDF {}.{}",
DEFAULT_VERSION.major,
DEFAULT_VERSION.minor
);
Ok(DEFAULT_VERSION)
}
}
}
fn parse_version(rest: &[u8]) -> Option<PdfHeader> {
let major = rest.first()?.checked_sub(b'0').filter(|&v| v <= 9)?;
if rest.get(1).copied()? != b'.' {
return None;
}
let minor = rest.get(2)?.checked_sub(b'0').filter(|&v| v <= 9)?;
Some(PdfHeader { major, minor })
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn valid_header() {
let data = b"%PDF-1.7\n";
let h = parse_header(data).unwrap();
assert_eq!(h.major, 1);
assert_eq!(h.minor, 7);
}
#[test]
fn pdf_2_0() {
let data = b"%PDF-2.0\n";
let h = parse_header(data).unwrap();
assert_eq!(h.major, 2);
assert_eq!(h.minor, 0);
}
#[test]
fn garbage_before_header() {
let data = b"\xef\xbb\xbf%PDF-1.4\n";
let h = parse_header(data).unwrap();
assert_eq!(h.major, 1);
assert_eq!(h.minor, 4);
}
#[test]
fn not_a_pdf() {
assert!(parse_header(b"not a pdf").is_err());
}
#[test]
fn marker_without_hyphen_defaults_version() {
let h = parse_header(b"%PDF/DA2 \x1d\n").unwrap();
assert_eq!((h.major, h.minor), (1, 7));
}
#[test]
fn malformed_version_defaults() {
for bytes in [
&b"%PDF-a.4\n"[..],
&b"%PDF-1.)"[..],
&b"%PDF-0000000"[..],
&b"%PDF-/Si3/De"[..],
&b"%PDF-1e66666"[..],
&b"%PDF-{<~00~"[..],
&b"%PDF-\n2 0 obj"[..],
] {
let h = parse_header(bytes).expect("marker present => header parses");
assert_eq!((h.major, h.minor), (1, 7), "input {bytes:?}");
}
}
#[test]
fn truncated_version_defaults() {
let h = parse_header(b"%PDF-").unwrap();
assert_eq!((h.major, h.minor), (1, 7));
}
#[test]
fn no_marker_at_all_is_err() {
assert!(parse_header(b"1 0 obj<</Type/Catalog>>endobj").is_err());
}
}