[package]
name = "rust_scraper"
version = "1.0.0"
edition = "2021"
description = "Production-ready web scraper with Clean Architecture, TUI selector, and sitemap support"
authors = ["GazaDev"]
license = "MIT OR Apache-2.0"
repository = "https://github.com/XaviCode1000/rust-scraper"
homepage = "https://github.com/XaviCode1000/rust-scraper"
documentation = "https://docs.rs/rust_scraper"
keywords = ["scraper", "web", "crawler", "rag", "tui"]
categories = ["command-line-utilities", "web-programming"]
rust-version = "1.80"
[features]
default = []
images = ["dep:mimetype-detector"]
documents = ["dep:mimetype-detector"]
full = ["images", "documents", "zvec"]
zvec = ["dep:zvec-sys"]
[dependencies]
clap = { version = "4", features = ["derive"] }
reqwest = { version = "0.12", features = ["rustls-tls-native-roots", "gzip", "brotli", "stream", "json"] }
bytes = "1"
reqwest-middleware = "0.4"
reqwest-retry = "0.7"
retry-policies = "0.4"
legible = "0.4"
htmd = "0.5"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
scraper = "0.22"
tokio = { version = "1", features = ["full"] }
anyhow = "1"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
tracing-appender = "0.2"
url = { version = "2", features = ["serde"] }
futures = "0.3"
html-to-markdown-rs = "2.3"
syntect = "5"
serde_yaml = "0.9"
chrono = { version = "0.4", features = ["serde"] }
regex = "1"
md5 = "0.7"
sha2 = "0.10"
thiserror = "2"
rand = "0.8"
dirs = "5"
walkdir = "2"
mimetype-detector = { version = "0.3", optional = true }
zvec-sys = { version = "0.3", optional = true }
governor = "0.6"
dashmap = "6"
ratatui = "0.29"
crossterm = "0.28"
quick-xml = "0.37"
flate2 = "1"
num_cpus = "1"
uuid = { version = "1", features = ["v4", "serde"] }
async-compression = { version = "0.4.41", features = ["tokio", "gzip"] }
tokio-util = { version = "0.7.18", features = ["io"] }
[dev-dependencies]
mockall = "0.12"
tokio-test = "0.4"
tempfile = "3"
walkdir = "2"
[profile.release]
opt-level = 3
lto = "fat"
codegen-units = 1
panic = "abort"
strip = true
[profile.bench]
inherits = "release"
debug = true