libreadability 0.2.0

Rust port of go-readability — extract readable content from HTML
Documentation
[package]
name = "libreadability"
version = "0.2.0"
edition = "2021"
description = "Rust port of go-readability — extract readable content from HTML"
license = "MIT"
rust-version = "1.83"
readme = "README.md"
repository = "https://github.com/nchapman/readability-rs"
keywords = ["readability", "html", "extraction", "article", "scraping"]
categories = ["parser-implementations", "text-processing", "web-programming"]
exclude = ["test-pages/", "benches/", "CLAUDE.md", "PLAN.md"]

[lib]
name = "libreadability"

[dependencies]
# HTML parsing + CSS selectors (html5ever-based, spec-compliant)
scraper = "0.25"
ego-tree = "0.10"

# HTML/DOM internals (QualName, LocalName, StrTendril — used in DOM mutation)
html5ever = "0.36"
markup5ever = "0.36"

# Regex (RE2-compatible, linear time — same engine family as go-readability's re2go patterns)
regex = "1"

# URL parsing/resolution (used in fixRelativeURIs)
url = "2"

# JSON-LD metadata parsing
serde = { version = "1", features = ["derive"] }
serde_json = "1"

# Date parsing (port of go-readability's araddon/dateparse dependency)
dateparser = "0.2"
chrono = { version = "0.4", features = ["serde"] }

# Error types
thiserror = "2"

# Logging (optional — zero-cost when disabled)
tracing = { version = "0.1", optional = true }

[features]
default = []
tracing = ["dep:tracing"]

[dev-dependencies]
pretty_assertions = "1"
criterion = { version = "0.8", features = ["html_reports"] }

[[bench]]
name = "extraction"
harness = false