Skip to main content

html_filter/filter/
api.rs

1//! Public API for [`Filter`]
2
3use crate::Filter;
4use crate::filter::NodeTypeFilter;
5use crate::filter::element::{AttributeMatch, BlackWhiteList, ValueAssociateHash};
6
7/// Public API for [`Filter`] on node-type-filters (texts, doctypes, comments,
8/// etc.)
9impl Filter {
10    /// Short-hand to set the keep policy of comments, texts and doctypes at
11    /// once.
12    ///
13    /// - `true`: keep them
14    /// - `false`: remove them
15    ///
16    /// It is equivalent to:
17    ///
18    /// ```
19    /// use html_filter::*;
20    /// assert_eq!(Filter::new().doctype(true).text(true).comment(true), Filter::new().all(true));
21    /// assert_eq!(Filter::new().doctype(false).text(false).comment(false), Filter::new().all(false));
22    /// ```
23    #[must_use]
24    pub const fn all(self, all: bool) -> Self {
25        self.comment(all).doctype(all).text(all)
26    }
27
28    /// Removes the comments, and forces to keep doctypes and texts.
29    ///
30    /// See also [`Self::comment`] to allow comments without forcing others to
31    /// be kept.
32    ///
33    /// # Examples
34    ///
35    /// ```
36    /// use html_filter::*;
37    ///
38    /// let html = Html::parse("a <p> b <!-- c --></p> d").unwrap();
39    ///
40    /// assert_eq!(html.to_filtered(&Filter::new().tag_name("p").comment(false)), "<p> b </p>");
41    /// assert_eq!(html.filter(&Filter::new().tag_name("p").all_except_comment()), "a <p> b </p> d");
42    /// ```
43    #[must_use]
44    pub const fn all_except_comment(self) -> Self {
45        self.all(true).comment(false)
46    }
47
48    /// Removes the doctypes, and forces to keep comments and texts.
49    ///
50    /// See also [`Self::doctype`] to allow doctypes without forcing others to
51    /// be kept.
52    ///
53    /// # Examples
54    ///
55    /// ```
56    /// use html_filter::*;
57    ///
58    /// let html = Html::parse("<!doctype html> a <p> b </p> d").unwrap();
59    ///
60    /// assert_eq!(html.to_filtered(&Filter::new().tag_name("p").doctype(false)), "<p> b </p>");
61    /// assert_eq!(html.filter(&Filter::new().tag_name("p").all_except_doctype()), " a <p> b </p> d");
62    /// ```
63    #[must_use]
64    pub const fn all_except_doctype(self) -> Self {
65        self.all(true).doctype(false)
66    }
67
68    /// Removes the texts, and forces to keep doctypes and comments.
69    ///
70    /// See also [`Self::text`] to allow comments without forcing others to
71    /// be kept.
72    ///
73    /// # Examples
74    ///
75    /// ```
76    /// use html_filter::*;
77    ///
78    /// let html = Html::parse("<!doctype html> a <p> b <!-- c --></p> d <!-- e --> f").unwrap();
79    ///
80    /// assert_eq!(
81    ///     Filter::new().all_except_text(),
82    ///     Filter::new().text(false).comment(true).doctype(true)
83    /// );
84    ///
85    /// assert_eq!(html.to_filtered(&Filter::new().tag_name("p").text(false)), "<p><!-- c --></p>");
86    /// assert_eq!(
87    ///     html.filter(&Filter::new().tag_name("p").all_except_text()),
88    ///     "<!doctype html><p><!-- c --></p><!-- e -->"
89    /// );
90    /// ```
91    #[must_use]
92    pub const fn all_except_text(self) -> Self {
93        self.all(true).text(false)
94    }
95
96    /// Sets the filter for comments
97    ///
98    /// If `comment` is set to `true` (default), comments are kept.
99    /// If `comment` is set to `false`, comments are removed.
100    ///
101    /// See [`Filter`] for usage information.
102    #[must_use]
103    pub const fn comment(mut self, comment: bool) -> Self {
104        self.types.set_comment(comment);
105        self
106    }
107
108    /// Sets the filter for doctype tags
109    ///
110    /// If `doctype` is set to `true` (default), doctype tags are kept.
111    /// If `doctype` is set to `false`, doctype tags are removed.
112    ///
113    /// See [`Filter`] for usage information.
114    #[must_use]
115    pub const fn doctype(mut self, doctype: bool) -> Self {
116        self.types.set_doctype(doctype);
117        self
118    }
119
120    /// Keeps only the comments
121    ///
122    /// Doctypes and texts are removed, unless said otherwise by the user.
123    #[must_use]
124    pub const fn none_except_comment(self) -> Self {
125        self.all(false).comment(true)
126    }
127
128    /// Keeps only the doctypes
129    ///
130    /// Comments and texts are removed, unless said otherwise by the user.
131    #[must_use]
132    pub const fn none_except_doctype(self) -> Self {
133        self.all(false).doctype(true)
134    }
135
136    /// Keeps only the texts
137    ///
138    /// Comments and doctypes are removed, unless said otherwise by the user.
139    #[must_use]
140    pub const fn none_except_text(self) -> Self {
141        self.all(false).text(true)
142    }
143
144    /// Filters texts
145    ///
146    /// - If `text` is set to `true` (default), all texts are kept.
147    /// - If `text` is set to `false`, all texts are removed.
148    ///
149    /// See [`Filter`] for usage information.
150    #[must_use]
151    pub const fn text(mut self, text: bool) -> Self {
152        self.types.set_text(text);
153        self
154    }
155
156    /// Trims all texts
157    ///
158    /// This includes removal of text parts that contain only whitespaces, which
159    /// is very useful to remove new lines for example:
160    ///
161    /// # Examples
162    ///
163    /// ```
164    /// use html_filter::*;
165    ///
166    /// let html = Html::parse(
167    ///     "
168    /// <!doctype html>
169    /// <ul>
170    ///     <li>First</li>
171    ///     <li>Second></li>
172    /// </ul>
173    /// ",
174    /// )
175    /// .unwrap();
176    ///
177    /// // With trim
178    /// let filtered = html.to_filtered(&Filter::new().tag_name("ul").trim());
179    /// let (tag, child) = filtered.as_tag().unwrap();
180    /// assert_eq!(tag.as_name(), "ul");
181    ///
182    /// let vec = child.as_vec().unwrap();
183    /// assert!(matches!(vec[0], Html::Tag { .. })); // first li
184    /// assert!(matches!(vec[1], Html::Tag { .. })); // second li
185    /// assert_eq!(vec.len(), 2);
186    ///
187    /// // Without trim
188    /// let filtered = html.filter(&Filter::new().tag_name("ul"));
189    /// let (tag, child) = filtered.as_tag().unwrap();
190    /// assert_eq!(tag.as_name(), "ul");
191    ///
192    /// let vec = child.as_vec().unwrap();
193    /// assert_eq!(vec[0], Html::Text("\n    ".to_string()));
194    /// assert!(matches!(vec[1], Html::Tag { .. })); // first li
195    /// assert_eq!(vec[2], Html::Text("\n    ".to_string()));
196    /// assert!(matches!(vec[3], Html::Tag { .. })); // second li
197    /// assert_eq!(vec[4], Html::Text("\n".to_string()));
198    /// assert_eq!(vec.len(), 5);
199    /// ```
200    ///
201    /// See also [`Self::collapse`]
202    #[must_use]
203    pub const fn trim(mut self) -> Self {
204        self.types.trim();
205        self
206    }
207}
208
209/// Public API for [`Filter`] on tags and attributes
210impl Filter {
211    /// Specifies the name of an attribute in the wanted tags.
212    ///
213    /// This matches only tag attributes that don't have any value, such as
214    /// `enabled` in
215    ///
216    /// ```html
217    /// <button enabled type="submit" />
218    /// ```
219    ///
220    /// See [`Filter`] for usage information.
221    #[must_use]
222    pub fn attribute_name<N: Into<String>>(mut self, name: N) -> Self {
223        self.attrs.push(name.into(), AttributeMatch::NoValue, true);
224        self
225    }
226
227    /// Specifies the value of an attribute in the wanted tags.
228    ///
229    /// This matches only tag attributes that have the correct value for the
230    /// given name. To match only one value inside that values (e.g. class
231    /// names), cf. [`Filter::attribute_value_contains`].
232    ///
233    /// See [`Filter`] for usage information.
234    #[must_use]
235    pub fn attribute_value<N: Into<String>, V: Into<String>>(mut self, name: N, value: V) -> Self {
236        self.attrs.push(name.into(), AttributeMatch::Is(value.into()), true);
237        self
238    }
239
240    /// Specifies a possible value of an attribute in the wanted tags.
241    ///
242    /// This matches only tag attributes that have the given value as part of
243    /// the space-separated values inside the attribute value (cf. example
244    /// below). To match exact value, see [`Filter::attribute_value`].
245    ///
246    ///
247    /// # Examples
248    ///
249    /// ```
250    /// use html_filter::*;
251    ///
252    /// let html = Html::parse(r#"<div class="some_class other_class" />"#).unwrap();
253    /// let filter = Filter::new().attribute_value_contains("class", "some_class");
254    ///
255    /// if let Html::Tag { tag: Tag { name, .. }, .. } = html.filter(&filter) {
256    ///     assert_eq!(name, "div");
257    /// } else {
258    ///     unreachable!();
259    /// }
260    /// ```
261    #[must_use]
262    pub fn attribute_value_contains<N: Into<String>, V: Into<String>>(
263        mut self,
264        name: N,
265        value: V,
266    ) -> Self {
267        self.attrs.push(name.into(), AttributeMatch::Contains(value.into()), true);
268        self
269    }
270
271    /// Collapses successive text nodes.
272    ///
273    /// # Examples
274    ///
275    /// ```
276    /// use html_filter::*;
277    ///
278    /// let html =
279    ///     Html::parse("<div>before <!-- comment --> middle <strong>strong</strong> after</div>")
280    ///         .unwrap();
281    ///
282    /// // Without collapse
283    /// assert_eq!(
284    ///     Html::Vec(
285    ///         vec![
286    ///             Html::Text("before ".into()),
287    ///             Html::Comment(" comment ".into()),
288    ///             Html::Text(" middle ".into()),
289    ///             Html::Text("strong".into()),
290    ///             Html::Text(" after".into())
291    ///         ]
292    ///         .into()
293    ///     ),
294    ///     html.to_filtered(&Filter::new().no_tags().text(true))
295    /// );
296    ///
297    /// // With collapse
298    /// assert_eq!(
299    ///     Html::Vec(
300    ///         vec![
301    ///             Html::Text("before ".into()),
302    ///             Html::Comment(" comment ".into()),
303    ///             Html::Text(" middle strong after".into()),
304    ///         ]
305    ///         .into()
306    ///     ),
307    ///     html.to_filtered(&Filter::new().no_tags().text(true).collapse())
308    /// );
309    /// ```
310    #[must_use]
311    pub const fn collapse(mut self) -> Self {
312        self.types.set_collapse();
313        self
314    }
315
316    /// Specifies the depth of the desired nodes.
317    ///
318    /// The *depth* means at what depth the nodes must be kept according to the
319    /// filter. for this node. This allows you to search for a node, and
320    /// select the node, but also some of its ancestors, up to the chosen
321    /// depth. For instance, a depth of 0 means you only keep the tag, but a
322    /// depth of 1 means you keep the wanted tag, but it's parent and all
323    /// its children.
324    ///
325    /// # Examples
326    ///
327    /// For example, let's consider this HTML code:
328    ///
329    /// ```
330    /// use html_filter::*;
331    ///
332    /// let html = Html::parse(
333    ///     r#"
334    /// <main>
335    ///     <nav>
336    ///         <!-- Navigation menu -->
337    ///         <ul>
338    ///             <li href="first">First link</li>
339    ///             <li href="second">Second link</li>
340    ///             <li href="third">Third link</li>
341    ///         </ul>
342    ///     </nav>
343    /// </main>
344    /// "#,
345    /// )
346    /// .unwrap();
347    ///
348    /// assert_eq!(
349    ///     html.to_filtered(&Filter::new().attribute_value("href", "second").depth(0)),
350    ///     r#"<li href="second">Second link</li>"#
351    /// );
352    ///
353    /// assert_eq!(
354    ///     html.to_filtered(&Filter::new().attribute_value("href", "second").depth(1)),
355    ///     r#"<ul>
356    ///             <li href="first">First link</li>
357    ///             <li href="second">Second link</li>
358    ///             <li href="third">Third link</li>
359    ///         </ul>"#
360    /// );
361    ///
362    /// assert_eq!(
363    ///     html.to_filtered(&Filter::new().attribute_value("href", "second").depth(2)),
364    ///     r#"<nav>
365    ///         <!-- Navigation menu -->
366    ///         <ul>
367    ///             <li href="first">First link</li>
368    ///             <li href="second">Second link</li>
369    ///             <li href="third">Third link</li>
370    ///         </ul>
371    ///     </nav>"#
372    /// );
373    /// ```
374    #[must_use]
375    pub const fn depth(mut self, depth: usize) -> Self {
376        self.depth = depth;
377        self
378    }
379
380    /// Specifies the name of an attribute in the tags that must be dismissed.
381    ///
382    /// This matches only tag attributes that don't have any value, such as
383    /// `enabled` in
384    ///
385    /// ```html
386    /// <button enabled type="submit" />
387    /// ```
388    ///
389    /// See [`Filter`] for usage information.
390    #[must_use]
391    pub fn except_attribute_name<N: Into<String>>(mut self, name: N) -> Self {
392        self.attrs.push(name.into(), AttributeMatch::NoValue, false);
393        self
394    }
395
396    /// Specifies the value of an attribute in the tags that must be dismissed.
397    ///
398    /// This matches only tag attributes that have the correct value for the
399    /// given name. To filter out on a possible value inside the attribute name,
400    /// see [`Filter::except_attribute_value_contains`].
401    ///
402    /// See [`Filter`] for usage information.
403    #[must_use]
404    pub fn except_attribute_value<N, V>(mut self, name: N, value: V) -> Self
405    where
406        N: Into<String>,
407        V: Into<String>,
408    {
409        self.attrs.push(name.into(), AttributeMatch::Is(value.into()), false);
410        self
411    }
412
413    /// Specifies a possible value of an attribute that must be dismissed.
414    ///
415    /// This matches only tag attributes that have the given value as part of
416    /// the space-separated values inside the attribute value (cf. example
417    /// below). To match exact value, see [`Filter::except_attribute_value`].
418    ///
419    ///
420    /// # Examples
421    ///
422    /// ```
423    /// use html_filter::*;
424    ///
425    /// let html = Html::parse(r#"<div class="some_class other_class" />"#).unwrap();
426    /// let filter = Filter::new().except_attribute_value_contains("class", "some_class");
427    ///
428    /// assert_eq!(html.filter(&filter), Html::Empty);
429    /// ```
430    #[must_use]
431    pub fn except_attribute_value_contains<N: Into<String>, V: Into<String>>(
432        mut self,
433        name: N,
434        value: V,
435    ) -> Self {
436        self.attrs.push(name.into(), AttributeMatch::Contains(value.into()), false);
437        self
438    }
439
440    /// Specifies the tag name of the wanted tags.
441    ///
442    /// See [`Filter`] for usage information.
443    #[must_use]
444    #[expect(unused_must_use, reason = "filter does not yet support results")]
445    pub fn except_tag_name<N: Into<String>>(mut self, name: N) -> Self {
446        self.tags.push(name.into(), false);
447        self
448    }
449
450    /// Creates a default [`Filter`]
451    ///
452    /// By default, *comments* and *doctypes* are allowed, however no node is
453    /// wanted, so filtering on a default filter will return an empty
454    /// [`Html`](super::Html).
455    ///
456    /// # Examples
457    ///
458    /// ```
459    /// use html_filter::*;
460    ///
461    /// const _FILTER: Filter = Filter::new();
462    /// ```
463    #[must_use]
464    pub const fn new() -> Self {
465        Self {
466            attrs: ValueAssociateHash::new(),
467            depth: 0,
468            tags: BlackWhiteList::new(),
469            types: NodeTypeFilter::new(),
470        }
471    }
472
473    /// Disable all tags, except those explicitly whitelisted
474    ///
475    /// # Example
476    ///
477    /// ```
478    /// use html_filter::*;
479    /// let html = Html::parse("<!doctype html><div><!-- comment --></div>").unwrap();
480    /// assert_eq!(
481    ///     html.to_filtered(&Filter::new().no_tags()),
482    ///     Html::parse("<!doctype html><!-- comment -->").unwrap()
483    /// );
484    ///
485    /// let html = Html::parse("z<body>a<div>b<p>c</p>d</div>e</body>y").unwrap();
486    /// assert_eq!(
487    ///     html.to_filtered(&Filter::new().no_tags().tag_name("div").collapse()),
488    ///     Html::parse("<div>bd</div>").unwrap()
489    /// );
490    /// ```
491    #[must_use]
492    pub const fn no_tags(mut self) -> Self {
493        self.tags.set_default(false);
494        self
495    }
496
497    /// Specifies the tag name of the wanted tags.
498    ///
499    /// See [`Filter`] for usage information.
500    #[must_use]
501    #[expect(unused_must_use, reason = "filter does not yet support results")]
502    pub fn tag_name<N: Into<String>>(mut self, name: N) -> Self {
503        self.tags.push(name.into(), true);
504        self
505    }
506}