[package]
name = "spider"
version = "2.37.135"
authors = ["j-mendez <jeff@spider.cloud>"]
description = "A web crawler and scraper, building blocks for data curation workloads."
repository = "https://github.com/spider-rs/spider"
readme = "README.md"
keywords = ["crawler", "spider", "scraper"]
categories = ["web-programming", "command-line-utilities"]
license = "MIT"
documentation = "https://docs.rs/spider"
edition = "2021"
[badges]
maintenance = { status = "as-is" }
[dependencies]
url = "2"
tokio-stream = "0.1"
hashbrown = { version = "0.15", default-features = true }
log = "0.4"
percent-encoding = "2"
regex = { version = "1" }
ua_generator = { version = "^0.5", optional = true }
string_concat = "0.0.1"
lazy_static = "1"
ahash = { version = "0.8", default-features = false, features = ["std"] }
smallvec = "1"
num_cpus = "1"
bytes = { version = "1", features = ["serde"] }
serde = { version = "1", optional = true, features = ["derive"] }
flexbuffers = { version = "2", optional = true }
itertools = { version = "0.14", optional = true }
case_insensitive_string = { version = "0.2", features = ["compact", "serde"] }
sitemap = { version = "0.4", optional = true }
chrono = { version = "0.4", optional = true }
cron = { version = "0.15", optional = true }
async-trait = { version = "0.1", optional = true }
strum = { version = "0.26", features = ["derive"] }
async_job = { version = "0.1", optional = true }
reqwest-middleware = { version = "0.4", optional = true, default-features = false }
http-cache-reqwest = { version = "0.15", optional = true, default-features = false }
const_format = { version = "0.2", optional = true }
async-openai = { version = "0.29", optional = true }
tiktoken-rs = { version = "0.7", optional = true }
lol_html = { version = "2" }
cookie = { version = "0", optional = true }
serde_json = { version = "1", optional = true }
quick-xml = { version = "0.38", features = [
"serde",
"serialize",
"async-tokio",
] }
moka = { version = "0.12", features = ["future"], optional = true }
fastrand = { version = "2", optional = true }
http-cache-semantics = { version = "2", optional = true }
http-cache = { version = "0.20", optional = true, default-features = false }
http = { version = "1", optional = true }
phf = "0.11"
auto_encoder = { version = "0.1" }
base64 = { version = "0.22", optional = true }
string-interner = { version = "0.19", default-features = false, features = [
"std",
"inline-more",
"backends",
], optional = true }
httpdate = { version = "1", optional = true }
rand = { version = "0.9", optional = true }
serde_regex = { version = "1", optional = true }
statrs = { version = "0.18", optional = true }
aho-corasick = { version = "1" }
tracing = { version = "0.1", default-features = false, features = [
"std",
], optional = true }
sysinfo = { version = "0.35", default-features = false, features = [
"system",
], optional = true }
sqlx = { version = "0.8", features = [
"runtime-tokio",
"sqlite",
], optional = true }
h2 = "0.4"
tower = { version = "0.5", features = ["limit"] }
pin-project-lite = "0.2"
sonic-rs = { version = "0.5", optional = true }
wreq = { version = "5", optional = true, features = [
"json",
"stream",
"socks",
"gzip",
"brotli",
"zstd",
"deflate",
"cookies",
] }
wreq-util = { version = "2", optional = true, features = ["emulation-serde"] }
[dependencies.spider_chrome]
version = "2"
optional = true
default-features = false
features = ["bytes", "stream"]
[dependencies.spider_firewall]
version = "2"
optional = true
[dependencies.spider_fingerprint]
version = "2"
default-features = false
features = ["serde", "headers", "dynamic-versions"]
[target.'cfg(target_os = "linux")'.dependencies]
tokio-uring = { version = "0.4", optional = true }
libc = "0.2"
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
tokio = { version = "1", default-features = false, features = [
"macros",
"time",
"rt-multi-thread",
] }
fastrand = { version = "2", optional = true }
reqwest = { version = "0.12", features = [
"brotli",
"gzip",
"deflate",
"zstd",
"stream",
"http2"
] }
[target.'cfg(target_arch = "wasm32")'.dependencies]
tokio = { version = "1", default-features = false, features = [
"macros",
"time",
"rt",
] }
fastrand = { version = "2", optional = true, features = ["js"] }
reqwest = { version = "0.12", features = [
"brotli",
"gzip",
"deflate",
"stream",
"http2"
] }
[features]
default = ["basic", "io_uring"]
__basic = [
"sync",
"cookies",
"ua_generator",
"encoding",
"string_interner_buffer_backend",
"balance",
"real_browser"
]
basic_tls = [
"reqwest_native_tls_native_roots",
"disk_native_tls",
]
basic = [
"__basic",
"basic_tls"
]
disk = ["dep:sqlx"]
disk_native_tls = ["disk", "sqlx/runtime-tokio-native-tls"]
disk_aws = ["disk", "sqlx/tls-rustls-aws-lc-rs"]
adblock = ["chrome", "spider_chrome/adblock"]
balance = ["dep:sysinfo"]
regex = []
glob = ["dep:itertools"]
ua_generator = ["dep:ua_generator"]
decentralized = ["serde", "flexbuffers"]
control = []
time = []
io_uring = ["dep:tokio-uring"]
sync = ["tokio/sync"]
flexbuffers = ["dep:flexbuffers"]
serde = [
"dep:serde",
"hashbrown/serde",
"string-interner/serde",
"dep:serde_regex",
"smallvec/serde",
]
fs = ["tokio/fs"]
full_resources = []
socks = ["reqwest/socks"]
reqwest_json = ["reqwest/json"]
sitemap = ["dep:sitemap"]
cache_request = ["dep:reqwest-middleware", "dep:http-cache-reqwest"]
cache = ["cache_request", "http-cache-reqwest/manager-cacache"]
cache_mem = ["cache_request", "http-cache-reqwest/manager-moka"]
cache_openai = ["dep:moka"]
cache_chrome_hybrid = [
"cache_request",
"cache",
"chrome",
"dep:http-cache-semantics",
"dep:http-cache",
"dep:http",
]
cache_chrome_hybrid_mem = [
"cache_request",
"cache_mem",
"chrome",
"dep:http-cache-semantics",
"dep:http-cache",
"dep:http",
]
chrome = ["dep:spider_chrome", "dep:base64"]
chrome_headed = ["chrome"]
chrome_cpu = ["chrome"]
chrome_stealth = ["chrome"]
chrome_screenshot = ["chrome"]
chrome_store_page = ["chrome", "serde"]
chrome_intercept = ["chrome"]
chrome_headless_new = ["chrome"]
chrome_simd = ["chrome", "spider_chrome/simd", "simd"]
chrome_tls_connection = ["chrome", "spider_chrome/chrome_tls_connection"]
cookies = ["reqwest/cookies", "dep:cookie"]
cron = ["dep:async_job", "dep:chrono", "dep:cron", "dep:async-trait"]
smart = ["chrome", "dep:rand", "chrome_intercept"]
encoding = []
headers = ["dep:httpdate"]
remote_addr = []
real_browser = ["dep:statrs", "dep:rand", "dep:fastrand"]
openai = [
"chrome",
"serde",
"chrome_intercept",
"dep:async-openai",
"dep:tiktoken-rs",
"dep:serde_json",
]
openai_slim_fit = []
decentralized_headers = ["dep:const_format", "dep:itertools"]
spoof = ["dep:fastrand"]
reqwest_rustls_tls = ["reqwest/rustls-tls"]
reqwest_native_tls = ["reqwest/native-tls"]
reqwest_native_tls_alpn = ["reqwest/native-tls-alpn"]
reqwest_native_tls_vendored = ["reqwest/native-tls-vendored"]
reqwest_native_tls_manual_roots = ["reqwest/rustls-tls-manual-roots"]
reqwest_native_tls_webpki_roots = ["reqwest/rustls-tls-webpki-roots"]
reqwest_native_tls_native_roots = ["reqwest/rustls-tls-native-roots"]
reqwest_hickory_dns = ["reqwest/hickory-dns"]
reqwest_multipart = ["reqwest/multipart"]
tokio_io_std = ["tokio/io-std"]
tracing = ["tokio/tracing", "dep:tracing"]
string_interner_buffer_backend = ["dep:string-interner"]
string_interner_string_backend = ["dep:string-interner"]
string_interner_bucket_backend = ["dep:string-interner"]
page_error_status_details = []
simd = ["dep:sonic-rs"]
firewall = ["dep:spider_firewall", "spider_chrome/firewall"]
wreq = ["dep:wreq", "dep:wreq-util"]
rquest_hickory_dns = ["wreq/hickory-dns"]
cowboy = []
inline-more = []
[package.metadata.docs.rs]
cargo-args = ["-Zunstable-options", "-Zrustdoc-scrape-examples"]