halldyll-core 0.1.0

Core scraping engine for Halldyll - high-performance async web scraper for AI agents
Documentation
[package]

name = "halldyll-core"

version = "0.1.0"

edition = "2021"

description = "Core scraping engine for Halldyll - high-performance async web scraper for AI agents"

authors = ["Geryan Roy <geryan.roy@icloud.com>"]

license = "MIT"

repository = "https://github.com/Mr-soloDev/halldyll-Scrapper"

homepage = "https://github.com/Mr-soloDev/halldyll-Scrapper"

documentation = "https://docs.rs/halldyll-core"

readme = "../../README.md"

keywords = ["scraper", "crawler", "web", "ai", "async"]

categories = ["web-programming", "parsing"]



[dependencies]

# Async runtime

tokio = { version = "1.35", features = ["full"] }



# HTTP

reqwest = { version = "0.11", features = ["gzip", "brotli", "deflate", "json", "stream", "cookies"] }

url = { version = "2.5", features = ["serde"] }



# HTML parsing

scraper = "0.18"

ego-tree = "0.6"



# XML parsing (pour sitemap)

quick-xml = "0.31"



# Encoding

encoding_rs = "0.8"



# Compression

flate2 = "1.0"

brotli = "3.4"



# Serialization

serde = { version = "1.0", features = ["derive"] }

serde_json = "1.0"



# Robots.txt

texting_robots = "0.2"



# Hashing

sha2 = "0.10"



# UUID

uuid = { version = "1.6", features = ["v4"] }



# Time

chrono = { version = "0.4", features = ["serde"] }



# Bytes

bytes = "1.5"



# Regex

regex = "1.10"



# Error handling

thiserror = "1.0"



# Tracing

tracing = "0.1"

tracing-subscriber = { version = "0.3", features = ["json", "env-filter"] }



# Random

rand = "0.8"



# Futures

futures = "0.3"



[dev-dependencies]

tokio-test = "0.4"

wiremock = "0.5"

tempfile = "3.9"



[features]

default = []

warc = []

browser = []