halldyll-core 0.1.0

Core scraping engine for Halldyll - high-performance async web scraper for AI agents
Documentation
1
2
3
4
5
6
7
8
9
10
11
//! Crawl - Frontier, normalization, deduplication

pub mod normalize;
pub mod frontier;
pub mod dedup;
pub mod canonical;

pub use normalize::UrlNormalizer;
pub use frontier::{Frontier, CrawlEntry};
pub use dedup::{UrlDedup, ContentDedup};
pub use canonical::CanonicalResolver;