[package]
edition = "2024"
name = "spider-lib"
version = "0.3.2"
authors = ["mzyui <mzyui@proton.me>"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "A Rust-based web scraping framework inspired by Scrapy (Python)."
homepage = "https://github.com/mzyui/spider-lib"
readme = "README.md"
keywords = [
"web-scraping",
"crawler",
"scraper",
"async",
"rust",
]
categories = [
"web-programming",
"development-tools::debugging",
"parsing",
]
license = "MIT"
repository = "https://github.com/mzyui/spider-lib"
[features]
checkpoint = ["bincode"]
default = []
middleware-http-cache = [
"bincode",
"dirs",
]
middleware-robots-txt = ["robotstxt"]
pipeline-csv = ["csv"]
pipeline-json = []
pipeline-sqlite = ["rusqlite"]
[lib]
name = "spider_lib"
path = "src/lib.rs"
[[example]]
name = "quotes"
path = "examples/quotes_scraper.rs"
required-features = [
"pipeline-csv",
"middleware-http-cache",
"middleware-robots-txt",
"checkpoint",
]
[dependencies.anyhow]
version = "1.0"
[dependencies.async-trait]
version = "0.1"
[dependencies.bincode]
version = "1.3"
optional = true
[dependencies.bytes]
version = "1.0"
features = ["serde"]
[dependencies.csv]
version = "1.1"
optional = true
[dependencies.dashmap]
version = "5.4"
features = ["serde"]
[dependencies.dirs]
version = "5.0"
optional = true
[dependencies.futures-util]
version = "0.3"
[dependencies.governor]
version = "0.3"
[dependencies.hex]
version = "0.4"
[dependencies.http]
version = "1.4.0"
[dependencies.kanal]
version = "0.1.1"
[dependencies.linkify]
version = "0.10.0"
[dependencies.moka]
version = "0.12"
features = [
"future",
"sync",
]
[dependencies.num_cpus]
version = "1.16"
[dependencies.psl]
version = "2.1.183"
[dependencies.rand]
version = "0.8"
[dependencies.reqwest]
version = "0.12"
features = [
"json",
"rustls-tls",
]
default-features = false
[dependencies.rmp-serde]
version = "1.3.1"
[dependencies.robotstxt]
version = "0.3.0"
optional = true
[dependencies.rusqlite]
version = "0.31"
optional = true
[dependencies.scraper]
version = "0.19"
[dependencies.seahash]
version = "4.1.0"
[dependencies.serde]
version = "1.0"
features = [
"derive",
"rc",
]
[dependencies.serde_json]
version = "1.0"
[dependencies.serde_with]
version = "3.8.3"
[dependencies.sha2]
version = "0.10"
[dependencies.spider-macro]
version = "0.1.0"
[dependencies.thiserror]
version = "1.0"
[dependencies.tokio]
version = "1.0"
features = ["full"]
[dependencies.tracing]
version = "0.1"
[dependencies.tracing-subscriber]
version = "0.3"
features = ["env-filter"]
[dependencies.url]
version = "2.0"
features = ["serde"]
[dev-dependencies]