Skip to main content

html_cleaning/
options.rs

1//! Configuration options for HTML cleaning.
2
3/// Configuration for HTML cleaning operations.
4///
5/// # Example
6///
7/// ```
8/// use html_cleaning::CleaningOptions;
9///
10/// let options = CleaningOptions {
11///     tags_to_remove: vec!["script".into(), "style".into()],
12///     prune_empty: true,
13///     ..Default::default()
14/// };
15/// ```
16#[derive(Debug, Clone, Default)]
17pub struct CleaningOptions {
18    /// HTML tags to remove completely (including children).
19    ///
20    /// Example: `["script", "style", "noscript"]`
21    pub tags_to_remove: Vec<String>,
22
23    /// HTML tags to strip (remove tag, keep children).
24    ///
25    /// Example: `["span", "font"]`
26    pub tags_to_strip: Vec<String>,
27
28    /// CSS selectors for elements to remove.
29    ///
30    /// Example: `[".advertisement", "#cookie-banner"]`
31    pub selectors_to_remove: Vec<String>,
32
33    /// Remove elements with no text content.
34    pub prune_empty: bool,
35
36    /// Tags considered "empty" for pruning.
37    ///
38    /// Default: `["div", "span", "p", "section", "article"]`
39    pub empty_tags: Vec<String>,
40
41    /// Normalize whitespace in text nodes.
42    pub normalize_whitespace: bool,
43
44    /// Remove all attributes from elements.
45    pub strip_attributes: bool,
46
47    /// Attributes to preserve when `strip_attributes` is true.
48    ///
49    /// Example: `["href", "src", "alt"]`
50    pub preserved_attributes: Vec<String>,
51}
52
53impl CleaningOptions {
54    /// Create a new builder for `CleaningOptions`.
55    #[must_use]
56    pub fn builder() -> CleaningOptionsBuilder {
57        CleaningOptionsBuilder::default()
58    }
59}
60
61/// Builder for `CleaningOptions`.
62#[derive(Debug, Clone, Default)]
63pub struct CleaningOptionsBuilder {
64    options: CleaningOptions,
65}
66
67impl CleaningOptionsBuilder {
68    /// Add tags to remove (including children).
69    #[must_use]
70    pub fn remove_tags(mut self, tags: &[&str]) -> Self {
71        self.options
72            .tags_to_remove
73            .extend(tags.iter().map(|s| (*s).to_string()));
74        self
75    }
76
77    /// Add tags to strip (keep children).
78    #[must_use]
79    pub fn strip_tags(mut self, tags: &[&str]) -> Self {
80        self.options
81            .tags_to_strip
82            .extend(tags.iter().map(|s| (*s).to_string()));
83        self
84    }
85
86    /// Add CSS selectors to remove.
87    #[must_use]
88    pub fn remove_selectors(mut self, selectors: &[&str]) -> Self {
89        self.options
90            .selectors_to_remove
91            .extend(selectors.iter().map(|s| (*s).to_string()));
92        self
93    }
94
95    /// Enable empty element pruning.
96    #[must_use]
97    pub fn prune_empty(mut self, enabled: bool) -> Self {
98        self.options.prune_empty = enabled;
99        self
100    }
101
102    /// Set tags to consider as empty for pruning.
103    #[must_use]
104    pub fn empty_tags(mut self, tags: &[&str]) -> Self {
105        self.options.empty_tags = tags.iter().map(|s| (*s).to_string()).collect();
106        self
107    }
108
109    /// Enable whitespace normalization.
110    #[must_use]
111    pub fn normalize_whitespace(mut self, enabled: bool) -> Self {
112        self.options.normalize_whitespace = enabled;
113        self
114    }
115
116    /// Enable attribute stripping.
117    #[must_use]
118    pub fn strip_attributes(mut self, enabled: bool) -> Self {
119        self.options.strip_attributes = enabled;
120        self
121    }
122
123    /// Set preserved attributes (when `strip_attributes` is enabled).
124    #[must_use]
125    pub fn preserve_attributes(mut self, attrs: &[&str]) -> Self {
126        self.options.preserved_attributes = attrs.iter().map(|s| (*s).to_string()).collect();
127        self
128    }
129
130    /// Build the `CleaningOptions`.
131    #[must_use]
132    pub fn build(self) -> CleaningOptions {
133        self.options
134    }
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140
141    #[test]
142    fn test_default_options() {
143        let options = CleaningOptions::default();
144        assert!(options.tags_to_remove.is_empty());
145        assert!(!options.prune_empty);
146    }
147
148    #[test]
149    fn test_builder() {
150        let options = CleaningOptions::builder()
151            .remove_tags(&["script", "style"])
152            .prune_empty(true)
153            .build();
154
155        assert_eq!(options.tags_to_remove.len(), 2);
156        assert!(options.prune_empty);
157    }
158}