pdfvec 0.1.1

High-performance PDF text extraction library for vectorization pipelines
Documentation
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2024"
name = "pdfvec"
version = "0.1.1"
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "High-performance PDF text extraction library for vectorization pipelines"
readme = "README.md"
keywords = [
    "pdf",
    "text-extraction",
    "vectorization",
    "nlp",
]
categories = [
    "text-processing",
    "parsing",
]
license = "MIT OR Apache-2.0"
repository = "https://github.com/copyleftdev/pdfvec"

[lib]
name = "pdfvec"
path = "src/lib.rs"

[[bin]]
name = "pdfvec"
path = "src/bin/pdfvec.rs"

[[test]]
name = "integration"
path = "tests/integration.rs"

[[bench]]
name = "extraction"
path = "benches/extraction.rs"
harness = false

[dependencies.chrono]
version = "0.4"
features = ["std"]
default-features = false

[dependencies.clap]
version = "4.5"
features = ["derive"]

[dependencies.indicatif]
version = "0.17"

[dependencies.memmap2]
version = "0.9"

[dependencies.pdf]
version = "0.9"

[dependencies.rayon]
version = "1.10"

[dependencies.thiserror]
version = "2.0"

[dependencies.unicode-segmentation]
version = "1.12"

[dependencies.walkdir]
version = "2.5"

[dev-dependencies.criterion]
version = "0.5"
features = ["html_reports"]

[dev-dependencies.pdf-extract]
version = "0.7"

[profile.release]
lto = true
codegen-units = 1
panic = "abort"