pdfvec 0.1.1

High-performance PDF text extraction library for vectorization pipelines
Documentation
[package]
name = "pdfvec"
version = "0.1.1"
edition = "2024"
description = "High-performance PDF text extraction library for vectorization pipelines"
license = "MIT OR Apache-2.0"
repository = "https://github.com/copyleftdev/pdfvec"
readme = "README.md"
keywords = ["pdf", "text-extraction", "vectorization", "nlp"]
categories = ["text-processing", "parsing"]

[lib]
name = "pdfvec"
path = "src/lib.rs"

[[bin]]
name = "pdfvec"
path = "src/bin/pdfvec.rs"

[dependencies]
pdf = "0.9"
thiserror = "2.0"
rayon = "1.10"
memmap2 = "0.9"
clap = { version = "4.5", features = ["derive"] }
indicatif = "0.17"
walkdir = "2.5"
unicode-segmentation = "1.12"
chrono = { version = "0.4", default-features = false, features = ["std"] }

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
pdf-extract = "0.7"

[[bench]]
name = "extraction"
harness = false

[profile.release]
lto = true
codegen-units = 1
panic = "abort"