[package]
edition = "2021"
rust-version = "1.80"
name = "rust_scraper"
version = "1.0.0"
authors = ["GazaDev"]
build = "build.rs"
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Production-ready web scraper with Clean Architecture, TUI selector, and sitemap support"
homepage = "https://github.com/XaviCode1000/rust-scraper"
documentation = "https://docs.rs/rust_scraper"
readme = "README.md"
keywords = [
"scraper",
"web",
"crawler",
"rag",
"tui",
]
categories = [
"command-line-utilities",
"web-programming",
]
license = "MIT OR Apache-2.0"
repository = "https://github.com/XaviCode1000/rust-scraper"
[features]
default = []
documents = ["dep:mimetype-detector"]
full = [
"images",
"documents",
"zvec",
]
images = ["dep:mimetype-detector"]
zvec = ["dep:zvec-sys"]
[lib]
name = "rust_scraper"
path = "src/lib.rs"
[[bin]]
name = "rust_scraper"
path = "src/main.rs"
[[test]]
name = "crawler_integration"
path = "tests/crawler_integration.rs"
[[test]]
name = "integration_test"
path = "tests/integration_test.rs"
[dependencies.anyhow]
version = "1"
[dependencies.async-compression]
version = "0.4.41"
features = [
"tokio",
"gzip",
]
[dependencies.bytes]
version = "1"
[dependencies.chrono]
version = "0.4"
features = ["serde"]
[dependencies.clap]
version = "4"
features = ["derive"]
[dependencies.crossterm]
version = "0.28"
[dependencies.dashmap]
version = "6"
[dependencies.dirs]
version = "5"
[dependencies.flate2]
version = "1"
[dependencies.futures]
version = "0.3"
[dependencies.governor]
version = "0.6"
[dependencies.htmd]
version = "0.5"
[dependencies.html-to-markdown-rs]
version = "2.3"
[dependencies.legible]
version = "0.4"
[dependencies.md5]
version = "0.7"
[dependencies.mimetype-detector]
version = "0.3"
optional = true
[dependencies.num_cpus]
version = "1"
[dependencies.quick-xml]
version = "0.37"
[dependencies.rand]
version = "0.8"
[dependencies.ratatui]
version = "0.29"
[dependencies.regex]
version = "1"
[dependencies.reqwest]
version = "0.12"
features = [
"rustls-tls-native-roots",
"gzip",
"brotli",
"stream",
"json",
]
[dependencies.reqwest-middleware]
version = "0.4"
[dependencies.reqwest-retry]
version = "0.7"
[dependencies.retry-policies]
version = "0.4"
[dependencies.scraper]
version = "0.22"
[dependencies.serde]
version = "1"
features = ["derive"]
[dependencies.serde_json]
version = "1"
[dependencies.serde_yaml]
version = "0.9"
[dependencies.sha2]
version = "0.10"
[dependencies.syntect]
version = "5"
[dependencies.thiserror]
version = "2"
[dependencies.tokio]
version = "1"
features = ["full"]
[dependencies.tokio-util]
version = "0.7.18"
features = ["io"]
[dependencies.tracing]
version = "0.1"
[dependencies.tracing-appender]
version = "0.2"
[dependencies.tracing-subscriber]
version = "0.3"
features = [
"env-filter",
"fmt",
]
[dependencies.url]
version = "2"
features = ["serde"]
[dependencies.uuid]
version = "1"
features = [
"v4",
"serde",
]
[dependencies.walkdir]
version = "2"
[dependencies.zvec-sys]
version = "0.3"
optional = true
[dev-dependencies.mockall]
version = "0.12"
[dev-dependencies.tempfile]
version = "3"
[dev-dependencies.tokio-test]
version = "0.4"
[dev-dependencies.walkdir]
version = "2"
[profile.bench]
debug = 2
inherits = "release"
[profile.release]
opt-level = 3
lto = "fat"
codegen-units = 1
panic = "abort"
strip = true