use oxidize_pdf::parser::PdfReader;
use proptest::prelude::*;
use std::io::Cursor;
#[allow(dead_code)]
fn pdf_string_strategy() -> impl Strategy<Value = String> {
prop_oneof![
"[a-zA-Z0-9 ]{0,100}",
"[a-zA-Z0-9]{0,50}".prop_map(|s| format!("({s})")),
"[0-9A-Fa-f]{0,100}".prop_map(|s| format!("<{s}>")),
"[a-zA-Z0-9]{0,20}".prop_map(|s| format!("(Hello {s} World)")),
]
}
#[allow(dead_code)]
fn pdf_name_strategy() -> impl Strategy<Value = String> {
"[a-zA-Z][a-zA-Z0-9._-]{0,50}".prop_map(|s| format!("/{s}"))
}
#[allow(dead_code)]
fn simple_pdf_strategy() -> impl Strategy<Value = Vec<u8>> {
(
any::<u32>().prop_map(|n| n % 100 + 1), any::<bool>(), )
.prop_map(|(num_objects, _compressed)| {
let mut pdf = Vec::new();
pdf.extend_from_slice(b"%PDF-1.7\n");
pdf.extend_from_slice(b"%\xE2\xE3\xCF\xD3\n");
for i in 1..=num_objects.min(10) {
pdf.extend_from_slice(format!("{i} 0 obj\n").as_bytes());
pdf.extend_from_slice(b"<<\n");
pdf.extend_from_slice("/Type /Page\n".to_string().as_bytes());
pdf.extend_from_slice("/Parent 2 0 R\n".to_string().as_bytes());
pdf.extend_from_slice(b">>\n");
pdf.extend_from_slice(b"endobj\n");
}
let xref_pos = pdf.len();
pdf.extend_from_slice(b"xref\n");
pdf.extend_from_slice(format!("0 {}\n", num_objects + 1).as_bytes());
pdf.extend_from_slice(b"0000000000 65535 f \n");
for i in 1..=num_objects.min(10) {
pdf.extend_from_slice(format!("{:010} 00000 n \n", 15 + i * 50).as_bytes());
}
pdf.extend_from_slice(b"trailer\n");
pdf.extend_from_slice(b"<<\n");
pdf.extend_from_slice(format!("/Size {}\n", num_objects + 1).as_bytes());
pdf.extend_from_slice(b"/Root 1 0 R\n");
pdf.extend_from_slice(b">>\n");
pdf.extend_from_slice(b"startxref\n");
pdf.extend_from_slice(format!("{xref_pos}\n").as_bytes());
pdf.extend_from_slice(b"%%EOF\n");
pdf
})
}
#[test]
fn test_parser_handles_empty_input() {
let empty = Vec::new();
let cursor = Cursor::new(empty);
match PdfReader::new(cursor) {
Ok(_) => panic!("Parser should fail on empty input"),
Err(e) => {
assert!(
e.to_string().contains("PDF")
|| e.to_string().contains("empty")
|| e.to_string().contains("header")
);
}
}
}
proptest! {
fn test_parser_handles_invalid_header(header in any::<[u8; 32]>()) {
let mut data = Vec::from(&header[..]);
data.extend_from_slice(b"\n%%EOF\n");
let cursor = Cursor::new(data);
match PdfReader::new(cursor) {
Ok(_) => {
prop_assert!(header.starts_with(b"%PDF"));
}
Err(_) => {
prop_assert!(true);
}
}
}
fn test_parser_handles_truncated_files(pdf in simple_pdf_strategy(), truncate_at in 0..100usize) {
let truncated = if truncate_at < pdf.len() {
&pdf[..truncate_at]
} else {
&pdf[..]
};
let cursor = Cursor::new(truncated.to_vec());
match PdfReader::new(cursor) {
Ok(_) => {
prop_assert!(truncate_at >= pdf.len() || truncated.ends_with(b"%%EOF\n"));
}
Err(e) => {
let error_str = e.to_string();
prop_assert!(
error_str.contains("EOF") ||
error_str.contains("truncated") ||
error_str.contains("unexpected") ||
error_str.contains("xref") ||
error_str.contains("header") ||
error_str.contains("PDF")
);
}
}
}
fn test_string_parsing_preserves_content(content in "[a-zA-Z0-9 !@#$%^&*()]{0,100}") {
let literal = format!("({})", content.replace("(", "\\(").replace(")", "\\)"));
let hex: String = content.bytes()
.map(|b| format!("{b:02X}"))
.collect();
let hex_string = format!("<{hex}>");
prop_assert!(literal.starts_with('(') && literal.ends_with(')'));
prop_assert!(hex_string.starts_with('<') && hex_string.ends_with('>'));
prop_assert!(hex.len() == content.len() * 2);
}
fn test_number_parsing_ranges(n in any::<i64>()) {
let int_str = format!("{n}");
let float_str = format!("{n}.0");
prop_assert!(int_str.parse::<i64>().is_ok());
prop_assert!(float_str.parse::<f64>().is_ok());
}
fn test_name_validity(name in pdf_name_strategy()) {
prop_assert!(name.starts_with('/'));
let name_part = &name[1..];
prop_assert!(!name_part.contains(' '));
prop_assert!(!name_part.contains('\n'));
prop_assert!(!name_part.contains('\r'));
prop_assert!(!name_part.contains('\t'));
}
fn test_dictionary_key_uniqueness(
keys in prop::collection::vec(pdf_name_strategy(), 1..20)
) {
let unique_keys: std::collections::HashSet<_> = keys.iter().collect();
if unique_keys.len() < keys.len() {
prop_assert!(true); }
}
fn test_object_reference_validity(
num in 1u32..=999999u32,
gen in 0u16..=65535u16
) {
let ref_str = format!("{num} {gen} R");
let parts: Vec<&str> = ref_str.split_whitespace().collect();
prop_assert_eq!(parts.len(), 3);
prop_assert_eq!(parts[0].parse::<u32>().unwrap(), num);
prop_assert_eq!(parts[1].parse::<u16>().unwrap(), gen);
prop_assert_eq!(parts[2], "R");
}
fn test_stream_length_consistency(
data in prop::collection::vec(any::<u8>(), 0..1000),
declared_length in 0..2000usize
) {
if declared_length == data.len() {
prop_assert!(true);
} else {
prop_assert!(true);
}
}
}
#[cfg(test)]
mod regression_tests {
use super::*;
#[test]
fn test_parser_handles_binary_in_comment() {
let mut pdf = Vec::new();
pdf.extend_from_slice(b"%PDF-1.7\n");
pdf.extend_from_slice(b"%\xFF\xFE\xFD\xFC\n");
pdf.extend_from_slice(b"1 0 obj\n<< >>\nendobj\n");
pdf.extend_from_slice(b"xref\n0 2\n");
pdf.extend_from_slice(b"0000000000 65535 f \n");
pdf.extend_from_slice(b"0000000015 00000 n \n");
pdf.extend_from_slice(b"trailer\n<< /Size 2 /Root 1 0 R >>\n");
pdf.extend_from_slice(b"startxref\n36\n%%EOF\n");
let cursor = Cursor::new(pdf);
let result = PdfReader::new(cursor);
assert!(result.is_ok() || result.is_err()); }
#[test]
fn test_parser_handles_cr_lf_endings() {
let mut pdf = Vec::new();
pdf.extend_from_slice(b"%PDF-1.7\r\n");
pdf.extend_from_slice(b"%%EOF\r\n");
let cursor = Cursor::new(pdf);
let result = PdfReader::new(cursor);
assert!(result.is_err()); }
#[test]
fn test_parser_handles_incremental_updates() {
let mut pdf = Vec::new();
pdf.extend_from_slice(b"%PDF-1.7\n");
pdf.extend_from_slice(b"1 0 obj\n<< >>\nendobj\n");
pdf.extend_from_slice(b"xref\n0 2\n");
pdf.extend_from_slice(b"0000000000 65535 f \n");
pdf.extend_from_slice(b"0000000015 00000 n \n");
pdf.extend_from_slice(b"trailer\n<< /Size 2 /Root 1 0 R >>\n");
pdf.extend_from_slice(b"startxref\n36\n%%EOF\n");
pdf.extend_from_slice(b"2 0 obj\n<< >>\nendobj\n");
pdf.extend_from_slice(b"xref\n2 1\n");
pdf.extend_from_slice(b"0000000150 00000 n \n");
pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R /Prev 36 >>\n");
pdf.extend_from_slice(b"startxref\n171\n%%EOF\n");
let cursor = Cursor::new(pdf);
let result = PdfReader::new(cursor);
assert!(result.is_ok() || result.is_err()); }
}