Skip to main content

servo_fetch/
lib.rs

1//! Web content extraction library powered by Servo and Readability.
2//!
3//! This crate provides utilities for extracting readable content from HTML:
4//!
5//! - [`extract`] — Convert HTML into Markdown or structured JSON using
6//!   Mozilla's Readability algorithm.
7//! - [`layout`] — CSS layout heuristics to detect and strip navbars,
8//!   sidebars, and footers before extraction.
9//! - [`sanitize`] — Strip ANSI escape sequences and control characters
10//!   from output strings.
11
12#![forbid(unsafe_code)]
13
14pub mod extract;
15pub mod layout;
16pub mod sanitize;