[package]
edition = "2021"
rust-version = "1.80.0"
name = "deformat"
version = "0.3.1"
build = false
include = [
"/src/**/*.rs",
"/tests/**/*.rs",
"/examples/**/*.rs",
"/Cargo.toml",
"/LICENSE-*",
"/README.md",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Extract plain text from HTML, PDF, and other document formats"
readme = "README.md"
keywords = [
"text-extraction",
"html",
"pdf",
"document",
"nlp",
]
categories = [
"text-processing",
"parser-implementations",
]
license = "MIT OR Apache-2.0"
repository = "https://github.com/arclabs561/deformat"
[features]
default = []
html2text = ["dep:html2text"]
pdf = ["dep:pdf-extract"]
readability = ["dep:dom_smoothie"]
[lib]
name = "deformat"
path = "src/lib.rs"
[[example]]
name = "strip"
path = "examples/strip.rs"
[[test]]
name = "integration"
path = "tests/integration.rs"
[[test]]
name = "proptest"
path = "tests/proptest.rs"
[dependencies.dom_smoothie]
version = "0.15"
optional = true
[dependencies.html2text]
version = "0.16"
optional = true
[dependencies.memchr]
version = "2"
[dependencies.once_cell]
version = "1"
[dependencies.pdf-extract]
version = "0.10"
optional = true
[dependencies.regex]
version = "1"
[dev-dependencies.criterion]
version = "0.5"
features = ["html_reports"]
[dev-dependencies.proptest]
version = "1"