1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
//! # decruft
//!
//! Extract clean, readable content from web pages.
//!
//! Given a noisy HTML page (ads, navigation, sidebars, popups, cookie banners...),
//! decruft extracts the main content and metadata.
//!
//! ## Quick start
//!
//! ```
//! use decruft::{parse, DecruftOptions};
//!
//! let html = r#"<html>
//! <head><title>My Post - Blog</title></head>
//! <body>
//! <nav><a href="/">Home</a></nav>
//! <article><h1>My Post</h1><p>The content.</p></article>
//! <footer>Copyright 2025</footer>
//! </body>
//! </html>"#;
//!
//! let result = parse(html, &DecruftOptions::default());
//! assert!(result.content.contains("The content."));
//! assert!(!result.content.contains("Copyright"));
//! ```
//!
//! Or even simpler with [`parse_with_defaults`]:
//!
//! ```
//! let html = "<html><body><article><p>Hello</p></article></body></html>";
//! let result = decruft::parse_with_defaults(html);
//! assert!(result.content.contains("Hello"));
//! ```
pub
pub
pub
pub
pub
pub
pub
pub
pub
pub
pub
pub
pub
pub
pub
pub
pub
pub
pub
pub use parse;
pub use strip_html_tags;
pub use ;
/// Parse HTML with default options.
///
/// Equivalent to `parse(html, &DecruftOptions::default())`.
///
/// # Examples
///
/// ```
/// let html = "<html><body><article><p>Hello</p></article></body></html>";
/// let result = decruft::parse_with_defaults(html);
/// assert!(result.content.contains("Hello"));
/// ```