use crate::error::{Error, Result};
use std::fs::File;
use std::io::{BufReader, Read};
use std::path::Path;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PdfFormat {
pub version: String,
pub linearized: bool,
}
impl std::fmt::Display for PdfFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "PDF {}", self.version)
}
}
const PDF_MAGIC: &[u8] = b"%PDF-";
const PDF_MAGIC_LEN: usize = 5;
const VERSION_LEN: usize = 3;
pub fn detect_format_from_path<P: AsRef<Path>>(path: P) -> Result<PdfFormat> {
let file = File::open(path)?;
let mut reader = BufReader::new(file);
let mut header = [0u8; 16];
reader.read_exact(&mut header)?;
detect_format_from_bytes(&header)
}
pub fn detect_format_from_bytes(data: &[u8]) -> Result<PdfFormat> {
if data.len() < PDF_MAGIC_LEN + VERSION_LEN {
return Err(Error::UnknownFormat);
}
if !data.starts_with(PDF_MAGIC) {
return Err(Error::UnknownFormat);
}
let version_bytes = &data[PDF_MAGIC_LEN..PDF_MAGIC_LEN + VERSION_LEN];
let version = String::from_utf8_lossy(version_bytes).to_string();
if !is_valid_version(&version) {
return Err(Error::UnsupportedVersion(version));
}
Ok(PdfFormat {
version,
linearized: false, })
}
fn is_valid_version(version: &str) -> bool {
let chars: Vec<char> = version.chars().collect();
if chars.len() != 3 {
return false;
}
chars[0].is_ascii_digit() && chars[1] == '.' && chars[2].is_ascii_digit()
}
pub fn is_pdf<P: AsRef<Path>>(path: P) -> bool {
detect_format_from_path(path).is_ok()
}
pub fn is_pdf_bytes(data: &[u8]) -> bool {
detect_format_from_bytes(data).is_ok()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_valid_pdf() {
let data = b"%PDF-1.7\n%\xe2\xe3\xcf\xd3";
let format = detect_format_from_bytes(data).unwrap();
assert_eq!(format.version, "1.7");
}
#[test]
fn test_detect_pdf_2_0() {
let data = b"%PDF-2.0\n%\xe2\xe3\xcf\xd3";
let format = detect_format_from_bytes(data).unwrap();
assert_eq!(format.version, "2.0");
}
#[test]
fn test_detect_invalid_format() {
let data = b"<!DOCTYPE html>";
let result = detect_format_from_bytes(data);
assert!(matches!(result, Err(Error::UnknownFormat)));
}
#[test]
fn test_detect_too_short() {
let data = b"%PDF";
let result = detect_format_from_bytes(data);
assert!(matches!(result, Err(Error::UnknownFormat)));
}
#[test]
fn test_is_pdf_bytes() {
assert!(is_pdf_bytes(b"%PDF-1.4\n"));
assert!(!is_pdf_bytes(b"Not a PDF"));
}
#[test]
fn test_version_validation() {
assert!(is_valid_version("1.0"));
assert!(is_valid_version("1.7"));
assert!(is_valid_version("2.0"));
assert!(!is_valid_version("10.0"));
assert!(!is_valid_version("abc"));
}
}