use std::path::Path;
use pdf_extract::extract_text;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let path = Path::new("data/1706.03762v7.pdf");
println!("🔍 Testing pdf-extract on Attention paper...\n");
let text = extract_text(path)?;
println!("📊 Total characters extracted: {}", text.len());
if text.contains("Abstract") {
println!("✅ Found 'Abstract'");
if let Some(pos) = text.find("Abstract") {
let start = pos.saturating_sub(100);
let end = (pos + 600).min(text.len());
println!("\n📄 Context around Abstract:\n{}", &text[start..end]);
}
} else {
println!("❌ 'Abstract' NOT FOUND");
}
if text.contains("dominant") {
println!("\n✅ Found 'dominant' (first word of Abstract text)");
} else {
println!("\n❌ 'dominant' NOT FOUND");
}
println!(
"\n📝 First 2000 characters:\n{}",
&text[..text.len().min(2000)]
);
Ok(())
}