[package]
edition = "2021"
name = "pdf_oxide"
version = "0.3.32"
authors = ["Yury Fedoseev <yfedoseev@gmail.com>"]
build = false
include = [
"/src/**/*",
"/benches/**/*",
"/Cargo.toml",
"/LICENSE-MIT",
"/LICENSE-APACHE",
"/README.md",
"/include/**/*",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "The fastest Rust PDF library with text extraction: 0.8ms mean, 100% pass rate on 3,830 PDFs. 5× faster than pdf_extract, 17× faster than oxidize_pdf. Extract, create, and edit PDFs."
homepage = "https://oxide.fyi"
documentation = "https://pdf.oxide.fyi/docs/getting-started/rust"
readme = "README.md"
keywords = [
"pdf",
"text-extraction",
"pdf-parser",
"pdf-to-markdown",
"document-parser",
]
categories = [
"parsing",
"text-processing",
]
license = "MIT OR Apache-2.0"
repository = "https://github.com/yfedoseev/pdf_oxide"
[package.metadata.docs.rs]
features = [
"python",
"logging",
"debug-span-merging",
"parallel",
]
rustdoc-args = [
"--cfg",
"docsrs",
]
[package.metadata.maturin]
name = "pdf_oxide"
python-source = "python"
[features]
barcodes = [
"dep:qrcode",
"dep:barcoders",
]
debug-span-merging = []
default = []
gpu = [
"dep:ort",
"ml",
]
logging = []
ml = [
"dep:tract-onnx",
"dep:ndarray",
"dep:linfa",
"dep:linfa-clustering",
"dep:tokenizers",
]
ocr = [
"dep:ort",
"dep:imageproc",
"dep:ndarray",
]
office = [
"dep:calamine",
"dep:zip",
]
parallel = ["dep:rayon"]
python = [
"pyo3",
"pyo3-log",
"rendering",
"parallel",
"logging",
"signatures",
"barcodes",
"office",
]
rendering = [
"dep:tiny-skia",
"dep:fontdb",
"dep:rustybuzz",
]
signatures = [
"dep:x509-parser",
"dep:cms",
"dep:rsa",
"dep:der",
"dep:spki",
"dep:pkcs1",
"dep:pkcs8",
"dep:signature",
"dep:sha1",
]
table-ml = [
"ml",
"dep:pdfium-render",
]
wasm = [
"dep:wasm-bindgen",
"dep:web-sys",
"dep:js-sys",
"dep:console_error_panic_hook",
"dep:console_log",
"dep:getrandom",
"dep:getrandom_02",
"dep:serde-wasm-bindgen",
"logging",
"signatures",
"barcodes",
"office",
]
wasm-ml = [
"wasm",
"ml",
]
[lib]
name = "pdf_oxide"
crate-type = [
"cdylib",
"rlib",
"staticlib",
]
path = "src/lib.rs"
[[bin]]
name = "analyze_gaps"
path = "src/bin/analyze_gaps.rs"
[[bin]]
name = "analyze_pdf_features"
path = "src/bin/analyze_pdf_features.rs"
[[bin]]
name = "benchmark_all_pdfs"
path = "src/bin/benchmark_all_pdfs.rs"
[[bin]]
name = "debug_extraction"
path = "src/bin/debug_extraction.rs"
[[bin]]
name = "export_to_html"
path = "src/bin/export_to_html.rs"
[[bin]]
name = "export_to_markdown"
path = "src/bin/export_to_markdown.rs"
[[bin]]
name = "export_to_text"
path = "src/bin/export_to_text.rs"
[[bin]]
name = "extract_with_pipeline"
path = "src/bin/extract_with_pipeline.rs"
[[bin]]
name = "extract_with_published_crate"
path = "src/bin/extract_with_published_crate.rs"
[[bin]]
name = "validate_content"
path = "src/bin/validate_content.rs"
[[bin]]
name = "validate_dataset"
path = "src/bin/validate_dataset.rs"
[[bin]]
name = "validate_structured"
path = "src/bin/validate_structured.rs"
[[bench]]
name = "cjk_benchmarks"
path = "benches/cjk_benchmarks.rs"
harness = false
[[bench]]
name = "complex_script_benchmarks"
path = "benches/complex_script_benchmarks.rs"
harness = false
[[bench]]
name = "full_pipeline_benchmarks"
path = "benches/full_pipeline_benchmarks.rs"
harness = false
[[bench]]
name = "ligature_benchmarks"
path = "benches/ligature_benchmarks.rs"
harness = false
[[bench]]
name = "pdf_extraction_performance"
path = "benches/pdf_extraction_performance.rs"
harness = false
[[bench]]
name = "rtl_benchmarks"
path = "benches/rtl_benchmarks.rs"
harness = false
[[bench]]
name = "script_detection_benchmarks"
path = "benches/script_detection_benchmarks.rs"
harness = false
[[bench]]
name = "word_boundary_benchmarks"
path = "benches/word_boundary_benchmarks.rs"
harness = false
[dependencies.aes]
version = "0.8"
[dependencies.barcoders]
version = "2.0"
features = ["image"]
optional = true
[dependencies.base64]
version = "0.22"
[dependencies.bitflags]
version = "2"
[dependencies.brotli]
version = "8"
[dependencies.byteorder]
version = "1.5"
[dependencies.bytes]
version = "1.5"
[dependencies.calamine]
version = "0.34"
optional = true
[dependencies.cbc]
version = "0.1"
[dependencies.chrono]
version = "0.4"
[dependencies.cms]
version = "0.2"
optional = true
[dependencies.console_error_panic_hook]
version = "0.1"
optional = true
[dependencies.console_log]
version = "1"
features = ["color"]
optional = true
[dependencies.der]
version = "0.8"
optional = true
[dependencies.encoding_rs]
version = "0.8"
[dependencies.env_logger]
version = "0.11"
[dependencies.fax]
version = "0.2"
[dependencies.flate2]
version = "1.1"
features = ["zlib-rs"]
default-features = false
[dependencies.fontdb]
version = "0.23"
optional = true
[dependencies.getrandom]
version = "0.4"
features = ["wasm_js"]
optional = true
[dependencies.getrandom_02]
version = "0.2"
features = ["js"]
optional = true
package = "getrandom"
[dependencies.image]
version = "0.24"
features = [
"png",
"jpeg",
"tiff",
]
default-features = false
[dependencies.imageproc]
version = "0.26"
optional = true
[dependencies.indexmap]
version = "2.2"
[dependencies.jpeg-decoder]
version = "0.3"
[dependencies.js-sys]
version = "0.3"
optional = true
[dependencies.lazy_static]
version = "1.4"
[dependencies.linfa]
version = "0.7"
optional = true
[dependencies.linfa-clustering]
version = "0.8"
optional = true
[dependencies.log]
version = "0.4"
[dependencies.lzw]
version = "0.10.0"
[dependencies.md-5]
version = "0.10"
[dependencies.memchr]
version = "2"
[dependencies.ndarray]
version = "0.17"
features = ["std"]
optional = true
[dependencies.nom]
version = "8.0"
[dependencies.ort]
version = "2.0.0-rc.11"
features = ["ndarray"]
optional = true
default-features = false
[dependencies.pdfium-render]
version = "0.9"
optional = true
[dependencies.phf]
version = "0.13"
features = ["macros"]
[dependencies.pkcs1]
version = "0.7"
optional = true
[dependencies.pkcs8]
version = "0.10"
optional = true
[dependencies.pyo3]
version = "0.28"
features = [
"abi3-py38",
"generate-import-lib",
]
optional = true
[dependencies.pyo3-log]
version = "0.13"
optional = true
[dependencies.qrcode]
version = "0.14"
optional = true
[dependencies.quick-xml]
version = "0.39"
[dependencies.rayon]
version = "1.12"
optional = true
[dependencies.regex]
version = "1.12"
[dependencies.rsa]
version = "0.9"
optional = true
[dependencies.rustybuzz]
version = "0.20"
optional = true
[dependencies.serde]
version = "1.0"
features = ["derive"]
[dependencies.serde-wasm-bindgen]
version = "0.6"
optional = true
[dependencies.serde_json]
version = "1.0"
[dependencies.sha1]
version = "0.10"
optional = true
[dependencies.sha2]
version = "0.10"
[dependencies.signature]
version = "2.2"
optional = true
[dependencies.smallvec]
version = "1.13"
[dependencies.spki]
version = "0.7"
optional = true
[dependencies.stringprep]
version = "0.1"
[dependencies.thiserror]
version = "2.0"
[dependencies.tiff]
version = "0.11"
[dependencies.tiny-skia]
version = "0.12"
optional = true
[dependencies.tokenizers]
version = "0.22"
features = ["onig"]
optional = true
default-features = false
[dependencies.tract-onnx]
version = "0.22"
optional = true
[dependencies.ttf-parser]
version = "0.25"
[dependencies.uuid]
version = "1.0"
features = [
"v4",
"js",
]
[dependencies.wasm-bindgen]
version = "0.2"
optional = true
[dependencies.web-sys]
version = "0.3"
features = ["console"]
optional = true
[dependencies.weezl]
version = "0.1"
[dependencies.x509-parser]
version = "0.18"
optional = true
[dependencies.zip]
version = "8.5"
features = ["deflate"]
optional = true
default-features = false
[dev-dependencies.crc32fast]
version = "1.3"
[dev-dependencies.criterion]
version = "0.8"
[dev-dependencies.proptest]
version = "1.10"
[dev-dependencies.regex]
version = "1.12"
[dev-dependencies.tempfile]
version = "3.27"
[dev-dependencies.wasm-bindgen-test]
version = "0.3"
[lints.clippy]
collapsible_match = "allow"
doc_overindented_list_items = "allow"
enum_variant_names = "allow"
explicit_counter_loop = "allow"
manual_checked_ops = "allow"
manual_find = "allow"
match_like_matches_macro = "allow"
needless_range_loop = "allow"
redundant_guards = "allow"
regex_creation_in_loops = "allow"
should_implement_trait = "allow"
too_many_arguments = "allow"
type_complexity = "allow"
wrong_self_convention = "allow"
[profile.bench]
opt-level = 3
lto = true
[profile.release]
opt-level = 3
lto = true
codegen-units = 1
strip = true
[profile.release-small]
opt-level = "z"
lto = true
codegen-units = 1
panic = "abort"
inherits = "release"
strip = true