[package]
name = "deformat"
version = "0.4.2"
edition = "2021"
rust-version = "1.80.0"
license = "MIT OR Apache-2.0"
description = "Extract plain text from HTML, PDF, and other document formats"
repository = "https://github.com/arclabs561/deformat"
readme = "README.md"
keywords = ["text-extraction", "html", "pdf", "document", "nlp"]
categories = ["text-processing", "parser-implementations"]
include = [
"/src/**/*.rs",
"/tests/**/*.rs",
"/examples/**/*.rs",
"/benches/**/*.rs",
"/Cargo.toml",
"/LICENSE-*",
"/README.md",
]
[dependencies]
memchr = "2"
html2text = { version = "0.16", optional = true }
dom_smoothie = { version = "0.15", optional = true }
pdf-extract = { version = "0.10", optional = true }
[features]
default = []
html2text = ["dep:html2text"]
readability = ["dep:dom_smoothie"]
pdf = ["dep:pdf-extract"]
[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
proptest = "1"
[[bench]]
name = "strip"
harness = false