[package]
edition = "2024"
rust-version = "1.88"
name = "kumo"
version = "0.3.8"
authors = ["wihlarkop"]
build = false
exclude = [
".cargo/",
".github/",
"benchmark/",
"docs/",
"lefthook.yml",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "An async web crawling framework for Rust - Scrapy for Rust"
homepage = "https://github.com/wihlarkop/kumo"
documentation = "https://kumo.wihlarkop.com"
readme = "README.md"
keywords = [
"web",
"crawling",
"scraping",
"spider",
"async",
]
categories = [
"web-programming",
"asynchronous",
]
license = "MIT"
repository = "https://github.com/wihlarkop/kumo"
resolver = "2"
[features]
browser = [
"dep:chromiumoxide",
"dep:futures",
]
claude = ["llm"]
cloud = ["dep:object_store"]
cloud-azure = [
"cloud",
"object_store/azure",
]
cloud-gcs = [
"cloud",
"object_store/gcp",
]
cloud-s3 = [
"cloud",
"object_store/aws",
]
default = []
derive = ["dep:kumo-derive"]
gemini = ["llm"]
jsonpath = ["dep:jsonpath-rust"]
llm = [
"dep:schemars",
"dep:rig-core",
]
mysql = [
"dep:sqlx",
"sqlx/mysql",
]
ollama = ["llm"]
openai = ["llm"]
otel = [
"dep:opentelemetry",
"dep:opentelemetry_sdk",
"dep:opentelemetry-otlp",
"dep:tracing-opentelemetry",
]
persistence = []
postgres = [
"dep:sqlx",
"sqlx/postgres",
]
redis-frontier = ["dep:redis"]
sqlite = [
"dep:sqlx",
"sqlx/sqlite",
]
stealth = [
"dep:rquest",
"dep:rquest-util",
]
xpath = [
"dep:sxd-xpath",
"dep:sxd-document",
"dep:sxd_html",
]
[lib]
name = "kumo"
path = "src/lib.rs"
[[example]]
name = "autothrottle"
path = "examples/autothrottle.rs"
[[example]]
name = "books"
path = "examples/books.rs"
[[example]]
name = "books_derive"
path = "examples/books_derive.rs"
required-features = ["derive"]
[[example]]
name = "browser"
path = "examples/browser.rs"
required-features = ["browser"]
[[example]]
name = "cloud"
path = "examples/cloud.rs"
required-features = ["cloud"]
[[example]]
name = "http_cache"
path = "examples/http_cache.rs"
[[example]]
name = "link_extractor"
path = "examples/link_extractor.rs"
[[example]]
name = "llm_extract"
path = "examples/llm_extract.rs"
required-features = ["claude"]
[[example]]
name = "llm_fallback"
path = "examples/llm_fallback.rs"
required-features = [
"derive",
"claude",
]
[[example]]
name = "multi_spider"
path = "examples/multi_spider.rs"
[[example]]
name = "polite_crawling"
path = "examples/polite_crawling.rs"
[[example]]
name = "postgres"
path = "examples/postgres.rs"
required-features = ["postgres"]
[[example]]
name = "production_crawler"
path = "examples/production_crawler.rs"
required-features = ["persistence"]
[[example]]
name = "proxy_rotation"
path = "examples/proxy_rotation.rs"
[[example]]
name = "quotes"
path = "examples/quotes.rs"
[[example]]
name = "request_scheduling"
path = "examples/request_scheduling.rs"
[[example]]
name = "selectors"
path = "examples/selectors.rs"
[[example]]
name = "sqlite"
path = "examples/sqlite.rs"
required-features = ["sqlite"]
[[example]]
name = "stealth"
path = "examples/stealth.rs"
required-features = ["stealth"]
[[example]]
name = "xpath"
path = "examples/xpath.rs"
required-features = ["xpath"]
[[test]]
name = "crawl_stats"
path = "tests/crawl_stats.rs"
[[test]]
name = "derive_macro"
path = "tests/derive_macro.rs"
required-features = ["derive"]
[[test]]
name = "engine_basic"
path = "tests/engine_basic.rs"
[[test]]
name = "engine_budgets"
path = "tests/engine_budgets.rs"
[[test]]
name = "engine_retry"
path = "tests/engine_retry.rs"
[[test]]
name = "engine_stream"
path = "tests/engine_stream.rs"
[[test]]
name = "error"
path = "tests/error.rs"
[[test]]
name = "extract_link_extractor"
path = "tests/extract_link_extractor.rs"
[[test]]
name = "extract_response"
path = "tests/extract_response.rs"
[[test]]
name = "extract_selector"
path = "tests/extract_selector.rs"
[[test]]
name = "fetch_cache"
path = "tests/fetch_cache.rs"
[[test]]
name = "fetch_mock"
path = "tests/fetch_mock.rs"
[[test]]
name = "fingerprint_policy"
path = "tests/fingerprint_policy.rs"
[[test]]
name = "frontier_file"
path = "tests/frontier_file.rs"
[[test]]
name = "frontier_flush"
path = "tests/frontier_flush.rs"
[[test]]
name = "frontier_memory"
path = "tests/frontier_memory.rs"
[[test]]
name = "llm_extract"
path = "tests/llm_extract.rs"
[[test]]
name = "llm_prompt"
path = "tests/llm_prompt.rs"
[[test]]
name = "middleware_integration"
path = "tests/middleware_integration.rs"
[[test]]
name = "middleware_proxy"
path = "tests/middleware_proxy.rs"
[[test]]
name = "middleware_status_retry"
path = "tests/middleware_status_retry.rs"
[[test]]
name = "middleware_user_agent"
path = "tests/middleware_user_agent.rs"
[[test]]
name = "pipeline_dedup"
path = "tests/pipeline_dedup.rs"
[[test]]
name = "pipeline_filter"
path = "tests/pipeline_filter.rs"
[[test]]
name = "pipeline_integration"
path = "tests/pipeline_integration.rs"
[[test]]
name = "redis_frontier"
path = "tests/redis_frontier.rs"
[[test]]
name = "retry_policy"
path = "tests/retry_policy.rs"
[[test]]
name = "robots"
path = "tests/robots.rs"
[[test]]
name = "scheduler_politeness"
path = "tests/scheduler_politeness.rs"
[[test]]
name = "scheduler_retry"
path = "tests/scheduler_retry.rs"
[[test]]
name = "sitemap"
path = "tests/sitemap.rs"
[[test]]
name = "spider_output"
path = "tests/spider_output.rs"
[[test]]
name = "store_cloud"
path = "tests/store_cloud.rs"
[[test]]
name = "store_csv"
path = "tests/store_csv.rs"
[[test]]
name = "store_json"
path = "tests/store_json.rs"
[[test]]
name = "store_jsonl"
path = "tests/store_jsonl.rs"
[[test]]
name = "store_postgres"
path = "tests/store_postgres.rs"
[[bench]]
name = "fingerprint"
path = "benches/fingerprint.rs"
harness = false
[[bench]]
name = "frontier"
path = "benches/frontier.rs"
harness = false
[dependencies.async-trait]
version = "0.1.89"
[dependencies.bloomfilter]
version = "3.0.1"
[dependencies.bytes]
version = "1.11.1"
[dependencies.chromiumoxide]
version = "0.9.1"
optional = true
[dependencies.futures]
version = "0.3.32"
features = ["std"]
optional = true
default-features = false
[dependencies.governor]
version = "0.10.4"
[dependencies.httpdate]
version = "1.0.3"
[dependencies.jsonpath-rust]
version = "1.0.4"
optional = true
[dependencies.kumo-derive]
version = "0.1.3"
optional = true
[dependencies.object_store]
version = "0.11"
optional = true
[dependencies.opentelemetry]
version = "0.27"
optional = true
[dependencies.opentelemetry-otlp]
version = "0.27"
features = ["grpc-tonic"]
optional = true
[dependencies.opentelemetry_sdk]
version = "0.27"
features = ["rt-tokio"]
optional = true
[dependencies.rand]
version = "0.9"
[dependencies.redis]
version = "1.2.0"
features = ["tokio-rustls-comp"]
optional = true
[dependencies.regex]
version = "1.12.3"
[dependencies.reqwest]
version = "0.13.2"
features = [
"rustls",
"http2",
"gzip",
"brotli",
"deflate",
"json",
"cookies",
]
default-features = false
[dependencies.rig-core]
version = "0.35.0"
optional = true
[dependencies.rquest]
version = "5.1.0"
features = ["cookies"]
optional = true
[dependencies.rquest-util]
version = "2.2.1"
optional = true
[dependencies.schemars]
version = "1.2.1"
features = ["derive"]
optional = true
[dependencies.scraper]
version = "0.26.0"
[dependencies.serde]
version = "1.0.228"
features = ["derive"]
[dependencies.serde_json]
version = "1.0.149"
[dependencies.sqlx]
version = "0.8.6"
features = [
"runtime-tokio-rustls",
"json",
]
optional = true
[dependencies.sxd-document]
version = "0.3.2"
optional = true
[dependencies.sxd-xpath]
version = "0.4.2"
optional = true
[dependencies.sxd_html]
version = "0.1.2"
optional = true
[dependencies.texting_robots]
version = "0.2.2"
[dependencies.thiserror]
version = "2.0.18"
[dependencies.tokio]
version = "1.52.1"
features = [
"rt-multi-thread",
"sync",
"time",
"signal",
"macros",
"net",
]
[dependencies.tokio-stream]
version = "0.1"
[dependencies.tracing]
version = "0.1.44"
[dependencies.tracing-opentelemetry]
version = "0.28"
optional = true
[dependencies.tracing-subscriber]
version = "0.3.23"
features = ["env-filter"]
[dependencies.url]
version = "2.5.8"
[dev-dependencies.criterion]
version = "0.7"
features = ["async_tokio"]
[dev-dependencies.mockito]
version = "1.7.2"
[dev-dependencies.tempfile]
version = "3.27.0"
[dev-dependencies.trybuild]
version = "1.0.116"
features = ["diff"]