[package]
name = "halldyll-core"
version = "0.1.0"
edition = "2021"
description = "Core scraping engine for Halldyll - high-performance async web scraper for AI agents"
authors = ["Geryan Roy <geryan.roy@icloud.com>"]
license = "MIT"
repository = "https://github.com/Mr-soloDev/halldyll-Scrapper"
homepage = "https://github.com/Mr-soloDev/halldyll-Scrapper"
documentation = "https://docs.rs/halldyll-core"
readme = "../../README.md"
keywords = ["scraper", "crawler", "web", "ai", "async"]
categories = ["web-programming", "parsing"]
[dependencies]
tokio = { version = "1.35", features = ["full"] }
reqwest = { version = "0.11", features = ["gzip", "brotli", "deflate", "json", "stream", "cookies"] }
url = { version = "2.5", features = ["serde"] }
scraper = "0.18"
ego-tree = "0.6"
quick-xml = "0.31"
encoding_rs = "0.8"
flate2 = "1.0"
brotli = "3.4"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
texting_robots = "0.2"
sha2 = "0.10"
uuid = { version = "1.6", features = ["v4"] }
chrono = { version = "0.4", features = ["serde"] }
bytes = "1.5"
regex = "1.10"
thiserror = "1.0"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["json", "env-filter"] }
rand = "0.8"
futures = "0.3"
[dev-dependencies]
tokio-test = "0.4"
wiremock = "0.5"
tempfile = "3.9"
[features]
default = []
warc = []
browser = []