[[bin]]
name = "docrawl"
path = "src/bin/docrawl/main.rs"
[dependencies.async-trait]
version = "0.1"
[dependencies.bytes]
version = "1"
[dependencies.chrono]
features = ["serde"]
version = "0.4"
[dependencies.clap]
features = ["derive"]
version = "4"
[dependencies.futures]
version = "0.3.31"
[dependencies.governor]
version = "0.6"
[dependencies.html2md]
version = "0.2"
[dependencies.indicatif]
version = "0.18"
[dependencies.lol_html]
version = "2.6"
[dependencies.pathdiff]
version = "0.2"
[dependencies.regex]
version = "1"
[dependencies.reqwest]
features = ["json", "gzip", "brotli", "deflate", "cookies", "stream"]
version = "0.12"
[dependencies.reqwest-middleware]
version = "0.4"
[dependencies.reqwest-retry]
version = "0.7"
[dependencies.robotstxt]
version = "0.3"
[dependencies.scraper]
version = "0.24"
[dependencies.serde]
features = ["derive"]
version = "1"
[dependencies.serde_json]
version = "1"
[dependencies.sitemap]
version = "0.4"
[dependencies.sled]
version = "0.34"
[dependencies.soup]
version = "0.5"
[dependencies.tokio]
features = ["full"]
version = "1"
[dependencies.tracing]
version = "0.1"
[dependencies.tracing-subscriber]
version = "0.3"
[dependencies.url]
version = "2.5"
[dependencies.uuid]
features = ["v4"]
version = "1"
[dependencies.xxhash-rust]
features = ["xxh3"]
version = "0.8"
[[example]]
name = "programmatic"
path = "examples/programmatic.rs"
[lib]
name = "docrawl"
path = "src/lib.rs"
[package]
autobenches = false
autobins = false
autoexamples = false
autolib = false
autotests = false
build = false
categories = ["command-line-utilities", "web-programming::http-client", "parsing", "text-processing"]
description = "Docs-focused crawler library and CLI: crawl documentation sites, extract main content, convert to Markdown, mirror paths, and save with frontmatter."
documentation = "https://docs.rs/docrawl"
edition = "2021"
exclude = [".git*", ".github", "target", "out", "out_*", "*.log"]
homepage = "https://github.com/neur0map/docrawl"
keywords = ["crawler", "docs", "markdown", "scraping", "http"]
license = "MIT"
name = "docrawl"
readme = "README.md"
repository = "https://github.com/neur0map/docrawl"
version = "0.1.1"