pdfsink-rs 0.2.5

Fast pure-Rust PDF extraction library and CLI — ~10-50x faster than pdfplumber for text, word, table, layout, image, and metadata extraction from PDFs. By Clark Labs Inc.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
use pdfsink_rs::{PdfDocument, TableSettings};

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let pdf = PdfDocument::open("tests/fixtures/simple_text.pdf")?;
    let page = pdf.page(1)?;

    println!("text:\n{}", page.extract_text());
    println!("word count: {}", page.extract_words().len());

    let tables = page.extract_tables(TableSettings::default())?;
    println!("table count: {}", tables.len());

    Ok(())
}