[package]
edition = "2021"
name = "spider"
version = "2.40.2"
authors = ["j-mendez <jeff@spider.cloud>"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "A web crawler and scraper, building blocks for data curation workloads."
documentation = "https://docs.rs/spider"
readme = "README.md"
keywords = [
"crawler",
"spider",
"scraper",
]
categories = [
"web-programming",
"command-line-utilities",
]
license = "MIT"
repository = "https://github.com/spider-rs/spider"
[package.metadata.docs.rs]
cargo-args = [
"-Zunstable-options",
"-Zrustdoc-scrape-examples",
]
[badges.maintenance]
status = "as-is"
[features]
__basic = [
"sync",
"cookies",
"ua_generator",
"encoding",
"string_interner_buffer_backend",
"balance",
"real_browser",
"disk_native_tls",
"time",
]
adblock = [
"chrome",
"chromey/adblock",
]
balance = ["dep:sysinfo"]
basic = [
"__basic",
"basic_tls",
]
basic_tls = [
"reqwest_native_tls_native_roots",
"disk_native_tls",
]
cache = [
"cache_request",
"http-global-cache/cache",
]
cache_chrome_hybrid = [
"cache",
"chrome",
"dep:http-cache-semantics",
"dep:http-cache",
"dep:http",
]
cache_chrome_hybrid_mem = [
"cache_mem",
"chrome",
"dep:http-cache-semantics",
"dep:http-cache",
"dep:http",
]
cache_gemini = ["dep:moka"]
cache_mem = [
"cache_request",
"http-global-cache/cache_mem",
]
cache_openai = ["dep:moka"]
cache_request = [
"dep:http-global-cache",
"dep:reqwest-middleware",
"dep:http-cache-reqwest",
"dep:http",
]
chrome = [
"dep:chromey",
"dep:base64",
"dep:rand",
"serde",
"dep:fastrand",
"dep:which",
"dep:home",
"cookies",
"reqwest_json",
]
chrome_cpu = ["chrome"]
chrome_headed = ["chrome"]
chrome_headless_new = ["chrome"]
chrome_intercept = ["chrome"]
chrome_remote_cache = [
"chrome",
"chromey/_cache",
]
chrome_remote_cache_disk = [
"chrome_remote_cache",
"chromey/cache",
]
chrome_remote_cache_mem = [
"chrome_remote_cache",
"chromey/cache_mem",
]
chrome_screenshot = ["chrome"]
chrome_serde_stacker = [
"chrome",
"chromey/serde_stacker",
]
chrome_simd = [
"chrome",
"chromey/simd",
"simd",
]
chrome_stealth = ["chrome"]
chrome_store_page = [
"chrome",
"serde",
]
chrome_tls_connection = [
"chrome",
"chromey/chrome_tls_connection",
]
cmd = ["tokio/process"]
control = []
cookies = [
"reqwest/cookies",
"dep:cookie",
]
cowboy = []
cron = [
"dep:async_job",
"dep:chrono",
"dep:cron",
"dep:async-trait",
]
decentralized = [
"serde",
"flexbuffers",
]
decentralized_headers = [
"dep:const_format",
"dep:itertools",
]
default = [
"basic",
"io_uring",
]
disk = ["dep:sqlx"]
disk_aws = [
"disk",
"sqlx/tls-rustls-aws-lc-rs",
]
disk_native_tls = [
"disk",
"sqlx/runtime-tokio-native-tls",
]
encoding = []
extra_information = []
firewall = [
"dep:spider_firewall",
"chromey/firewall",
]
flexbuffers = ["dep:flexbuffers"]
fs = ["tokio/fs"]
full_resources = []
gemini = [
"chrome",
"serde",
"chrome_intercept",
"dep:gemini-rust",
"dep:serde_json",
]
glob = ["dep:itertools"]
headers = ["dep:httpdate"]
inline-more = []
io_uring = ["dep:tokio-uring"]
openai = [
"chrome",
"serde",
"chrome_intercept",
"dep:async-openai",
"dep:tiktoken-rs",
"dep:serde_json",
]
openai_slim_fit = []
page_error_status_details = []
real_browser = [
"dep:statrs",
"dep:rand",
"dep:fastrand",
]
regex = []
remote_addr = []
reqwest_hickory_dns = ["reqwest/hickory-dns"]
reqwest_json = ["reqwest/json"]
reqwest_multipart = ["reqwest/multipart"]
reqwest_native_tls = ["reqwest/native-tls"]
reqwest_native_tls_alpn = ["reqwest/native-tls-alpn"]
reqwest_native_tls_manual_roots = ["reqwest/rustls-tls-manual-roots"]
reqwest_native_tls_native_roots = ["reqwest/rustls-tls-native-roots"]
reqwest_native_tls_vendored = ["reqwest/native-tls-vendored"]
reqwest_native_tls_webpki_roots = ["reqwest/rustls-tls-webpki-roots"]
reqwest_rustls_tls = ["reqwest/rustls-tls"]
rquest_hickory_dns = ["wreq/hickory-dns"]
serde = [
"dep:serde",
"dep:serde_json",
"hashbrown/serde",
"string-interner/serde",
"dep:serde_regex",
"smallvec/serde",
]
simd = ["dep:sonic-rs"]
sitemap = ["dep:sitemap"]
smart = [
"chrome",
"chrome_intercept",
]
socks = ["reqwest/socks"]
spoof = ["dep:fastrand"]
string_interner_bucket_backend = ["dep:string-interner"]
string_interner_buffer_backend = ["dep:string-interner"]
string_interner_string_backend = ["dep:string-interner"]
sync = ["tokio/sync"]
time = []
tokio_io_std = ["tokio/io-std"]
tracing = [
"tokio/tracing",
"dep:tracing",
]
ua_generator = ["dep:ua_generator"]
wreq = [
"dep:wreq",
"dep:wreq-util",
]
[lib]
name = "spider"
path = "src/lib.rs"
[dependencies.ahash]
version = "0.8"
features = ["std"]
default-features = false
[dependencies.aho-corasick]
version = "1"
[dependencies.async-openai]
version = "0.29"
optional = true
[dependencies.async-trait]
version = "0.1"
optional = true
[dependencies.async_job]
version = "0.1"
optional = true
[dependencies.auto_encoder]
version = "0.1"
[dependencies.base64]
version = "0.22"
optional = true
[dependencies.bytes]
version = "1"
features = ["serde"]
[dependencies.case_insensitive_string]
version = "0.2"
features = [
"compact",
"serde",
]
[dependencies.chromey]
version = "2"
features = [
"bytes",
"stream",
]
optional = true
default-features = false
[dependencies.chrono]
version = "0.4"
optional = true
[dependencies.const_format]
version = "0.2"
optional = true
[dependencies.cookie]
version = "0"
optional = true
[dependencies.cron]
version = "0.15"
optional = true
[dependencies.fastrand]
version = "2"
optional = true
[dependencies.flexbuffers]
version = "2"
optional = true
[dependencies.gemini-rust]
version = "1.6"
optional = true
[dependencies.h2]
version = "0.4"
[dependencies.hashbrown]
version = "0.15"
default-features = true
[dependencies.home]
version = "0.5"
optional = true
[dependencies.http]
version = "1"
optional = true
[dependencies.http-cache]
version = "0.20"
optional = true
default-features = false
[dependencies.http-cache-reqwest]
version = "0.15"
optional = true
default-features = false
[dependencies.http-cache-semantics]
version = "2"
optional = true
[dependencies.http-global-cache]
version = "0.1.0"
features = ["cache_request"]
optional = true
default-features = false
[dependencies.httpdate]
version = "1"
optional = true
[dependencies.itertools]
version = "0.14"
optional = true
[dependencies.lazy_static]
version = "1"
[dependencies.log]
version = "0.4"
[dependencies.lol_html]
version = "2"
[dependencies.moka]
version = "0.12"
features = ["future"]
optional = true
[dependencies.num_cpus]
version = "1"
[dependencies.percent-encoding]
version = "2"
[dependencies.phf]
version = "0.11"
[dependencies.pin-project-lite]
version = "0.2"
[dependencies.quick-xml]
version = "0.38"
features = [
"serde",
"serialize",
"async-tokio",
]
[dependencies.rand]
version = "0.9"
optional = true
[dependencies.regex]
version = "1"
[dependencies.reqwest-middleware]
version = "0.4"
optional = true
default-features = false
[dependencies.serde]
version = "1"
features = ["derive"]
optional = true
[dependencies.serde_json]
version = "1"
optional = true
[dependencies.serde_regex]
version = "1"
optional = true
[dependencies.sitemap]
version = "0.4"
optional = true
[dependencies.smallvec]
version = "1"
[dependencies.sonic-rs]
version = "0.5"
optional = true
[dependencies.spider_fingerprint]
version = "2"
features = [
"serde",
"headers",
"dynamic-versions",
]
default-features = false
[dependencies.spider_firewall]
version = "2"
optional = true
[dependencies.sqlx]
version = "0.8"
features = [
"runtime-tokio",
"sqlite",
]
optional = true
[dependencies.statrs]
version = "0.18"
optional = true
[dependencies.string-interner]
version = "0.19"
features = [
"std",
"inline-more",
"backends",
]
optional = true
default-features = false
[dependencies.string_concat]
version = "0.0.1"
[dependencies.strum]
version = "0.26"
features = ["derive"]
[dependencies.sysinfo]
version = "0.35"
features = ["system"]
optional = true
default-features = false
[dependencies.tiktoken-rs]
version = "0.7"
optional = true
[dependencies.tokio-stream]
version = "0.1"
[dependencies.tower]
version = "0.5"
features = ["limit"]
[dependencies.tracing]
version = "0.1"
features = ["std"]
optional = true
default-features = false
[dependencies.ua_generator]
version = "^0.5"
optional = true
[dependencies.url]
version = "2"
[dependencies.which]
version = "6.0"
optional = true
[dependencies.wreq]
version = "5"
features = [
"json",
"stream",
"socks",
"gzip",
"brotli",
"zstd",
"deflate",
"cookies",
]
optional = true
[dependencies.wreq-util]
version = "2"
features = ["emulation-serde"]
optional = true
[target.'cfg(not(target_arch = "wasm32"))'.dependencies.fastrand]
version = "2"
optional = true
[target.'cfg(not(target_arch = "wasm32"))'.dependencies.reqwest]
version = "0.12"
features = [
"brotli",
"gzip",
"deflate",
"zstd",
"stream",
"http2",
]
[target.'cfg(not(target_arch = "wasm32"))'.dependencies.tokio]
version = "1"
features = [
"macros",
"time",
"rt-multi-thread",
]
default-features = false
[target.'cfg(target_arch = "wasm32")'.dependencies.fastrand]
version = "2"
features = ["js"]
optional = true
[target.'cfg(target_arch = "wasm32")'.dependencies.reqwest]
version = "0.12"
features = [
"brotli",
"gzip",
"deflate",
"stream",
"http2",
]
[target.'cfg(target_arch = "wasm32")'.dependencies.tokio]
version = "1"
features = [
"macros",
"time",
"rt",
]
default-features = false
[target.'cfg(target_os = "linux")'.dependencies.libc]
version = "0.2"
[target.'cfg(target_os = "linux")'.dependencies.tokio-uring]
version = "0.4"
optional = true