use oxidize_pdf::parser::{PdfDocument, PdfReader};
use std::time::{Duration, Instant};
#[test]
fn test_circular_page_tree_no_hang() {
let path = "tests/fixtures/Pages-tree-refs.pdf";
let reader = PdfReader::open(path).expect("Failed to open Pages-tree-refs.pdf");
let doc = PdfDocument::new(reader);
let start = Instant::now();
let count_result = doc.page_count();
let elapsed = start.elapsed();
assert!(
elapsed < Duration::from_secs(2),
"page_count() took {:?} — cycle detection likely failed",
elapsed
);
match count_result {
Ok(count) => {
assert!(
count <= 10,
"Circular page tree should yield few pages, got {}",
count
);
}
Err(_) => {
}
}
let start = Instant::now();
let _text_result = doc.extract_text();
let elapsed = start.elapsed();
assert!(
elapsed < Duration::from_secs(2),
"extract_text() took {:?} on circular page tree — likely hung",
elapsed
);
}
#[test]
fn test_absurd_count_10b_capped() {
let path = "tests/fixtures/poppler-67295-0.pdf";
let reader = PdfReader::open(path).expect("Failed to open poppler-67295-0.pdf");
let doc = PdfDocument::new(reader);
let start = Instant::now();
let count = doc.page_count().expect("page_count() should succeed");
let elapsed = start.elapsed();
assert!(
elapsed < Duration::from_secs(2),
"page_count() took {:?} — /Count cap likely failed",
elapsed
);
assert!(
count <= 10,
"Expected ≤10 actual pages, got {} (absurd /Count not capped)",
count
);
let start = Instant::now();
let result = doc.extract_text();
let elapsed = start.elapsed();
assert!(
elapsed < Duration::from_secs(2),
"extract_text() took {:?} — looping over absurd count",
elapsed
);
if let Ok(pages) = &result {
assert!(
pages.len() <= 10,
"extract_text() returned {} pages for a 1-page PDF",
pages.len()
);
}
}
#[test]
fn test_absurd_count_214m_capped() {
let path = "tests/fixtures/poppler-85140-0.pdf";
let reader = PdfReader::open(path).expect("Failed to open poppler-85140-0.pdf");
let doc = PdfDocument::new(reader);
let start = Instant::now();
let count = doc.page_count().expect("page_count() should succeed");
let elapsed = start.elapsed();
assert!(
elapsed < Duration::from_secs(2),
"page_count() took {:?} — /Count cap likely failed",
elapsed
);
assert!(
count <= 10,
"Expected ≤10 actual pages, got {} (absurd /Count not capped)",
count
);
let start = Instant::now();
let result = doc.extract_text();
let elapsed = start.elapsed();
assert!(
elapsed < Duration::from_secs(2),
"extract_text() took {:?} — looping over absurd count",
elapsed
);
if let Ok(pages) = &result {
assert!(
pages.len() <= 10,
"extract_text() returned {} pages for a 1-page PDF",
pages.len()
);
}
}
#[test]
fn test_10k_pages_extract_text_under_30s() {
let path = "tests/fixtures/hang_4mb_10kpages.pdf";
let reader = PdfReader::open(path).expect("Failed to open hang_4mb_10kpages.pdf");
let doc = PdfDocument::new(reader);
let start = Instant::now();
let count = doc.page_count().expect("page_count() should succeed");
let elapsed = start.elapsed();
assert!(
elapsed < Duration::from_secs(5),
"page_count() took {:?} for 10K pages",
elapsed
);
assert!(
count >= 100,
"Expected many pages (10K), got only {}",
count
);
let start = Instant::now();
let result = doc.extract_text();
let elapsed = start.elapsed();
assert!(
elapsed < Duration::from_secs(30),
"extract_text() on 10K pages took {:?} — flat index likely not working",
elapsed
);
match result {
Ok(pages) => {
assert_eq!(
pages.len() as u32,
count,
"extract_text() should return one entry per page"
);
}
Err(e) => {
eprintln!("extract_text() returned error (acceptable): {}", e);
}
}
}
#[test]
fn test_flat_index_normal_pdf() {
let path = "tests/fixtures/Cold_Email_Hacks.pdf";
let reader = PdfReader::open(path).expect("Failed to open Cold_Email_Hacks.pdf");
let doc = PdfDocument::new(reader);
let count = doc.page_count().expect("page_count() should succeed");
assert!(count > 0, "PDF should have at least 1 page");
let first_page = doc.get_page(0);
assert!(first_page.is_ok(), "First page should be accessible");
let last_page = doc.get_page(count - 1);
assert!(last_page.is_ok(), "Last page should be accessible");
let page = first_page.unwrap();
assert!(page.width() > 0.0, "Page width should be positive");
assert!(page.height() > 0.0, "Page height should be positive");
let oob = doc.get_page(count);
assert!(oob.is_err(), "Out-of-bounds page should return error");
let text_result = doc.extract_text();
assert!(text_result.is_ok(), "extract_text() should succeed");
let text_pages = text_result.unwrap();
assert_eq!(
text_pages.len() as u32,
count,
"extract_text() should return one entry per page"
);
}