crw-pdf 0.2.2

Fast PDF inspection, classification, and text extraction — vendored from firecrawl/pdf-inspector
Documentation

Smart PDF detection and text extraction using lopdf

Quick start

// Full processing (detect + extract + markdown) with defaults
let result = crw_pdf::process_pdf("document.pdf").unwrap();
println!("type: {:?}, pages: {}", result.pdf_type, result.page_count);
if let Some(md) = &result.markdown {
    println!("{md}");
}

// Fast metadata-only detection (no text extraction)
let info = crw_pdf::detect_pdf("document.pdf").unwrap();
println!("type: {:?}, pages: {}", info.pdf_type, info.page_count);

// Custom options via builder
use crw_pdf::{PdfOptions, ProcessMode};
let result = crw_pdf::process_pdf_with_options(
    "document.pdf",
    PdfOptions::new().mode(ProcessMode::Analyze),
).unwrap();