pub struct HtmlCleaner { /* private fields */ }Expand description
HTML cleaning utility.
Provides methods for removing, stripping, and normalizing HTML elements.
§Example
use html_cleaning::{HtmlCleaner, CleaningOptions};
use dom_query::Document;
let options = CleaningOptions {
tags_to_remove: vec!["script".into(), "style".into()],
prune_empty: true,
..Default::default()
};
let cleaner = HtmlCleaner::with_options(options);
let doc = Document::from("<div><script>x</script><p>Hello</p></div>");
cleaner.clean(&doc);
assert!(doc.select("script").is_empty());Implementations§
Source§impl HtmlCleaner
impl HtmlCleaner
Sourcepub fn with_options(options: CleaningOptions) -> Self
pub fn with_options(options: CleaningOptions) -> Self
Create a cleaner with custom options.
Sourcepub fn options(&self) -> &CleaningOptions
pub fn options(&self) -> &CleaningOptions
Get a reference to the current options.
Sourcepub fn clean(&self, doc: &Document)
pub fn clean(&self, doc: &Document)
Apply all configured cleaning operations to the document.
Operations are applied in this order:
- Remove tags (with children)
- Strip tags (keep children)
- Remove by CSS selector
- Prune empty elements
- Normalize whitespace
- Clean attributes
Remove elements matching tags (including all children).
§Example
use html_cleaning::HtmlCleaner;
use dom_query::Document;
let cleaner = HtmlCleaner::new();
let doc = Document::from("<div><script>bad</script><p>good</p></div>");
cleaner.remove_tags(&doc, &["script"]);
assert!(doc.select("script").is_empty());Strip tags but preserve their children.
The tag wrapper is removed but inner content (text and child elements) is moved to the parent.
§Example
use html_cleaning::HtmlCleaner;
use dom_query::Document;
let cleaner = HtmlCleaner::new();
let doc = Document::from("<div><span>text</span></div>");
cleaner.strip_tags(&doc, &["span"]);
assert!(doc.select("span").is_empty());Sourcepub fn remove_by_selector(&self, doc: &Document, selector: &str)
pub fn remove_by_selector(&self, doc: &Document, selector: &str)
Remove elements matching a CSS selector.
§Example
use html_cleaning::HtmlCleaner;
use dom_query::Document;
let cleaner = HtmlCleaner::new();
let doc = Document::from(r#"<div class="ad">Ad</div><p>Content</p>"#);
cleaner.remove_by_selector(&doc, ".ad");
assert!(doc.select(".ad").is_empty());Sourcepub fn prune_empty(&self, doc: &Document)
pub fn prune_empty(&self, doc: &Document)
Remove empty elements.
Elements are considered empty if they:
- Have no child elements
- Have no text content (or only whitespace)
Processes in reverse document order (children before parents).
Sourcepub fn normalize_text(&self, doc: &Document)
pub fn normalize_text(&self, doc: &Document)
Normalize text nodes (trim, collapse whitespace).
Walks all text nodes and collapses multiple whitespace to single space.
Sourcepub fn clean_attributes(&self, doc: &Document)
pub fn clean_attributes(&self, doc: &Document)
Remove or filter attributes from all elements.
If strip_attributes is true in options:
- Removes all attributes except those in
preserved_attributes
Trait Implementations§
Source§impl Clone for HtmlCleaner
impl Clone for HtmlCleaner
Source§fn clone(&self) -> HtmlCleaner
fn clone(&self) -> HtmlCleaner
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more