pdf_oxide 0.3.59

The fastest Rust PDF library with text extraction: 0.8ms mean, 100% pass rate on 3,830 PDFs. 5× faster than pdf_extract, 17× faster than oxidize_pdf. Extract, create, and edit PDFs.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
fn main() {
    let args: Vec<_> = std::env::args().collect();
    let path = &args[1];
    let doc = pdf_oxide::document::PdfDocument::open(path).unwrap();
    let text = doc.extract_text(0).unwrap();
    println!("extract_text len={}", text.len());
    if !text.is_empty() {
        let n = text.len().min(300);
        println!("first {}: {:?}", n, &text[..n]);
    }
    let chars = doc.extract_chars(0).unwrap();
    println!("chars total={}", chars.len());
    let fffd = chars.iter().filter(|c| c.char == '\u{FFFD}').count();
    println!("FFFD in chars: {}", fffd);
}