deformat 0.4.2

Extract plain text from HTML, PDF, and other document formats
Documentation
[package]
name = "deformat"
version = "0.4.2"
edition = "2021"
rust-version = "1.80.0"
license = "MIT OR Apache-2.0"
description = "Extract plain text from HTML, PDF, and other document formats"
repository = "https://github.com/arclabs561/deformat"
readme = "README.md"
keywords = ["text-extraction", "html", "pdf", "document", "nlp"]
categories = ["text-processing", "parser-implementations"]
include = [
    "/src/**/*.rs",
    "/tests/**/*.rs",
    "/examples/**/*.rs",
    "/benches/**/*.rs",
    "/Cargo.toml",
    "/LICENSE-*",
    "/README.md",
]

[dependencies]
memchr = "2"

# Optional: DOM-based HTML-to-text conversion
html2text = { version = "0.16", optional = true }

# Optional: readability-style article extraction
dom_smoothie = { version = "0.15", optional = true }

# Optional: PDF text extraction
pdf-extract = { version = "0.10", optional = true }

[features]
default = []
html2text = ["dep:html2text"]
readability = ["dep:dom_smoothie"]
pdf = ["dep:pdf-extract"]

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
proptest = "1"

[[bench]]
name = "strip"
harness = false