Skip to main content

spider_agent_html/
lib.rs

1//! # Spider Agent HTML
2//!
3//! HTML processing utilities for spider_agent — cleaning, content analysis integration, and diffing.
4//!
5//! This crate provides the HTML cleaning functions extracted from `spider_agent`.
6//! Uses `lol_html` for fast, streaming HTML rewriting.
7//!
8//! ## Dependencies
9//!
10//! - `lol_html` — streaming HTML rewriter
11//! - `aho-corasick` — pattern matching (via spider_agent_types)
12//! - `spider_agent_types` — type definitions
13
14mod cleaning;
15
16pub use cleaning::{
17    clean_html, clean_html_base, clean_html_full, clean_html_raw, clean_html_slim,
18    clean_html_with_profile, clean_html_with_profile_and_intent, smart_clean_html,
19};