html_filter/filter/api.rs
1//! Public API for [`Filter`]
2
3use crate::Filter;
4use crate::filter::NodeTypeFilter;
5use crate::filter::element::{AttributeMatch, BlackWhiteList, ValueAssociateHash};
6
7/// Public API for [`Filter`] on node-type-filters (texts, doctypes, comments,
8/// etc.)
9impl Filter {
10 /// Short-hand to set the keep policy of comments, texts and doctypes at
11 /// once.
12 ///
13 /// - `true`: keep them
14 /// - `false`: remove them
15 ///
16 /// It is equivalent to:
17 ///
18 /// ```
19 /// use html_filter::*;
20 /// assert_eq!(Filter::new().doctype(true).text(true).comment(true), Filter::new().all(true));
21 /// assert_eq!(Filter::new().doctype(false).text(false).comment(false), Filter::new().all(false));
22 /// ```
23 #[must_use]
24 pub const fn all(self, all: bool) -> Self {
25 self.comment(all).doctype(all).text(all)
26 }
27
28 /// Removes the comments, and forces to keep doctypes and texts.
29 ///
30 /// See also [`Self::comment`] to allow comments without forcing others to
31 /// be kept.
32 ///
33 /// # Examples
34 ///
35 /// ```
36 /// use html_filter::*;
37 ///
38 /// let html = Html::parse("a <p> b <!-- c --></p> d").unwrap();
39 ///
40 /// assert_eq!(html.to_filtered(&Filter::new().tag_name("p").comment(false)), "<p> b </p>");
41 /// assert_eq!(html.filter(&Filter::new().tag_name("p").all_except_comment()), "a <p> b </p> d");
42 /// ```
43 #[must_use]
44 pub const fn all_except_comment(self) -> Self {
45 self.all(true).comment(false)
46 }
47
48 /// Removes the doctypes, and forces to keep comments and texts.
49 ///
50 /// See also [`Self::doctype`] to allow doctypes without forcing others to
51 /// be kept.
52 ///
53 /// # Examples
54 ///
55 /// ```
56 /// use html_filter::*;
57 ///
58 /// let html = Html::parse("<!doctype html> a <p> b </p> d").unwrap();
59 ///
60 /// assert_eq!(html.to_filtered(&Filter::new().tag_name("p").doctype(false)), "<p> b </p>");
61 /// assert_eq!(html.filter(&Filter::new().tag_name("p").all_except_doctype()), " a <p> b </p> d");
62 /// ```
63 #[must_use]
64 pub const fn all_except_doctype(self) -> Self {
65 self.all(true).doctype(false)
66 }
67
68 /// Removes the texts, and forces to keep doctypes and comments.
69 ///
70 /// See also [`Self::text`] to allow comments without forcing others to
71 /// be kept.
72 ///
73 /// # Examples
74 ///
75 /// ```
76 /// use html_filter::*;
77 ///
78 /// let html = Html::parse("<!doctype html> a <p> b <!-- c --></p> d <!-- e --> f").unwrap();
79 ///
80 /// assert_eq!(
81 /// Filter::new().all_except_text(),
82 /// Filter::new().text(false).comment(true).doctype(true)
83 /// );
84 ///
85 /// assert_eq!(html.to_filtered(&Filter::new().tag_name("p").text(false)), "<p><!-- c --></p>");
86 /// assert_eq!(
87 /// html.filter(&Filter::new().tag_name("p").all_except_text()),
88 /// "<!doctype html><p><!-- c --></p><!-- e -->"
89 /// );
90 /// ```
91 #[must_use]
92 pub const fn all_except_text(self) -> Self {
93 self.all(true).text(false)
94 }
95
96 /// Sets the filter for comments
97 ///
98 /// If `comment` is set to `true` (default), comments are kept.
99 /// If `comment` is set to `false`, comments are removed.
100 ///
101 /// See [`Filter`] for usage information.
102 #[must_use]
103 pub const fn comment(mut self, comment: bool) -> Self {
104 self.types.set_comment(comment);
105 self
106 }
107
108 /// Sets the filter for doctype tags
109 ///
110 /// If `doctype` is set to `true` (default), doctype tags are kept.
111 /// If `doctype` is set to `false`, doctype tags are removed.
112 ///
113 /// See [`Filter`] for usage information.
114 #[must_use]
115 pub const fn doctype(mut self, doctype: bool) -> Self {
116 self.types.set_doctype(doctype);
117 self
118 }
119
120 /// Keeps only the comments
121 ///
122 /// Doctypes and texts are removed, unless said otherwise by the user.
123 #[must_use]
124 pub const fn none_except_comment(self) -> Self {
125 self.all(false).comment(true)
126 }
127
128 /// Keeps only the doctypes
129 ///
130 /// Comments and texts are removed, unless said otherwise by the user.
131 #[must_use]
132 pub const fn none_except_doctype(self) -> Self {
133 self.all(false).doctype(true)
134 }
135
136 /// Keeps only the texts
137 ///
138 /// Comments and doctypes are removed, unless said otherwise by the user.
139 #[must_use]
140 pub const fn none_except_text(self) -> Self {
141 self.all(false).text(true)
142 }
143
144 /// Filters texts
145 ///
146 /// - If `text` is set to `true` (default), all texts are kept.
147 /// - If `text` is set to `false`, all texts are removed.
148 ///
149 /// See [`Filter`] for usage information.
150 #[must_use]
151 pub const fn text(mut self, text: bool) -> Self {
152 self.types.set_text(text);
153 self
154 }
155
156 /// Trims all texts
157 ///
158 /// This includes removal of text parts that contain only whitespaces, which
159 /// is very useful to remove new lines for example:
160 ///
161 /// # Examples
162 ///
163 /// ```
164 /// use html_filter::*;
165 ///
166 /// let html = Html::parse(
167 /// "
168 /// <!doctype html>
169 /// <ul>
170 /// <li>First</li>
171 /// <li>Second></li>
172 /// </ul>
173 /// ",
174 /// )
175 /// .unwrap();
176 ///
177 /// // With trim
178 /// let filtered = html.to_filtered(&Filter::new().tag_name("ul").trim());
179 /// let (tag, child) = filtered.as_tag().unwrap();
180 /// assert_eq!(tag.as_name(), "ul");
181 ///
182 /// let vec = child.as_vec().unwrap();
183 /// assert!(matches!(vec[0], Html::Tag { .. })); // first li
184 /// assert!(matches!(vec[1], Html::Tag { .. })); // second li
185 /// assert_eq!(vec.len(), 2);
186 ///
187 /// // Without trim
188 /// let filtered = html.filter(&Filter::new().tag_name("ul"));
189 /// let (tag, child) = filtered.as_tag().unwrap();
190 /// assert_eq!(tag.as_name(), "ul");
191 ///
192 /// let vec = child.as_vec().unwrap();
193 /// assert_eq!(vec[0], Html::Text("\n ".to_string()));
194 /// assert!(matches!(vec[1], Html::Tag { .. })); // first li
195 /// assert_eq!(vec[2], Html::Text("\n ".to_string()));
196 /// assert!(matches!(vec[3], Html::Tag { .. })); // second li
197 /// assert_eq!(vec[4], Html::Text("\n".to_string()));
198 /// assert_eq!(vec.len(), 5);
199 /// ```
200 ///
201 /// See also [`Self::collapse`]
202 #[must_use]
203 pub const fn trim(mut self) -> Self {
204 self.types.trim();
205 self
206 }
207}
208
209/// Public API for [`Filter`] on tags and attributes
210impl Filter {
211 /// Specifies the name of an attribute in the wanted tags.
212 ///
213 /// This matches only tag attributes that don't have any value, such as
214 /// `enabled` in
215 ///
216 /// ```html
217 /// <button enabled type="submit" />
218 /// ```
219 ///
220 /// See [`Filter`] for usage information.
221 #[must_use]
222 pub fn attribute_name<N: Into<String>>(mut self, name: N) -> Self {
223 self.attrs.push(name.into(), AttributeMatch::NoValue, true);
224 self
225 }
226
227 /// Specifies the value of an attribute in the wanted tags.
228 ///
229 /// This matches only tag attributes that have the correct value for the
230 /// given name. To match only one value inside that values (e.g. class
231 /// names), cf. [`Filter::attribute_value_contains`].
232 ///
233 /// See [`Filter`] for usage information.
234 #[must_use]
235 pub fn attribute_value<N: Into<String>, V: Into<String>>(mut self, name: N, value: V) -> Self {
236 self.attrs.push(name.into(), AttributeMatch::Is(value.into()), true);
237 self
238 }
239
240 /// Specifies a possible value of an attribute in the wanted tags.
241 ///
242 /// This matches only tag attributes that have the given value as part of
243 /// the space-separated values inside the attribute value (cf. example
244 /// below). To match exact value, see [`Filter::attribute_value`].
245 ///
246 ///
247 /// # Examples
248 ///
249 /// ```
250 /// use html_filter::*;
251 ///
252 /// let html = Html::parse(r#"<div class="some_class other_class" />"#).unwrap();
253 /// let filter = Filter::new().attribute_value_contains("class", "some_class");
254 ///
255 /// if let Html::Tag { tag: Tag { name, .. }, .. } = html.filter(&filter) {
256 /// assert_eq!(name, "div");
257 /// } else {
258 /// unreachable!();
259 /// }
260 /// ```
261 #[must_use]
262 pub fn attribute_value_contains<N: Into<String>, V: Into<String>>(
263 mut self,
264 name: N,
265 value: V,
266 ) -> Self {
267 self.attrs.push(name.into(), AttributeMatch::Contains(value.into()), true);
268 self
269 }
270
271 /// Collapses successive text nodes.
272 ///
273 /// # Examples
274 ///
275 /// ```
276 /// use html_filter::*;
277 ///
278 /// let html =
279 /// Html::parse("<div>before <!-- comment --> middle <strong>strong</strong> after</div>")
280 /// .unwrap();
281 ///
282 /// // Without collapse
283 /// assert_eq!(
284 /// Html::Vec(
285 /// vec![
286 /// Html::Text("before ".into()),
287 /// Html::Comment(" comment ".into()),
288 /// Html::Text(" middle ".into()),
289 /// Html::Text("strong".into()),
290 /// Html::Text(" after".into())
291 /// ]
292 /// .into()
293 /// ),
294 /// html.to_filtered(&Filter::new().no_tags().text(true))
295 /// );
296 ///
297 /// // With collapse
298 /// assert_eq!(
299 /// Html::Vec(
300 /// vec![
301 /// Html::Text("before ".into()),
302 /// Html::Comment(" comment ".into()),
303 /// Html::Text(" middle strong after".into()),
304 /// ]
305 /// .into()
306 /// ),
307 /// html.to_filtered(&Filter::new().no_tags().text(true).collapse())
308 /// );
309 /// ```
310 #[must_use]
311 pub const fn collapse(mut self) -> Self {
312 self.types.set_collapse();
313 self
314 }
315
316 /// Specifies the depth of the desired nodes.
317 ///
318 /// The *depth* means at what depth the nodes must be kept according to the
319 /// filter. for this node. This allows you to search for a node, and
320 /// select the node, but also some of its ancestors, up to the chosen
321 /// depth. For instance, a depth of 0 means you only keep the tag, but a
322 /// depth of 1 means you keep the wanted tag, but it's parent and all
323 /// its children.
324 ///
325 /// # Examples
326 ///
327 /// For example, let's consider this HTML code:
328 ///
329 /// ```
330 /// use html_filter::*;
331 ///
332 /// let html = Html::parse(
333 /// r#"
334 /// <main>
335 /// <nav>
336 /// <!-- Navigation menu -->
337 /// <ul>
338 /// <li href="first">First link</li>
339 /// <li href="second">Second link</li>
340 /// <li href="third">Third link</li>
341 /// </ul>
342 /// </nav>
343 /// </main>
344 /// "#,
345 /// )
346 /// .unwrap();
347 ///
348 /// assert_eq!(
349 /// html.to_filtered(&Filter::new().attribute_value("href", "second").depth(0)),
350 /// r#"<li href="second">Second link</li>"#
351 /// );
352 ///
353 /// assert_eq!(
354 /// html.to_filtered(&Filter::new().attribute_value("href", "second").depth(1)),
355 /// r#"<ul>
356 /// <li href="first">First link</li>
357 /// <li href="second">Second link</li>
358 /// <li href="third">Third link</li>
359 /// </ul>"#
360 /// );
361 ///
362 /// assert_eq!(
363 /// html.to_filtered(&Filter::new().attribute_value("href", "second").depth(2)),
364 /// r#"<nav>
365 /// <!-- Navigation menu -->
366 /// <ul>
367 /// <li href="first">First link</li>
368 /// <li href="second">Second link</li>
369 /// <li href="third">Third link</li>
370 /// </ul>
371 /// </nav>"#
372 /// );
373 /// ```
374 #[must_use]
375 pub const fn depth(mut self, depth: usize) -> Self {
376 self.depth = depth;
377 self
378 }
379
380 /// Specifies the name of an attribute in the tags that must be dismissed.
381 ///
382 /// This matches only tag attributes that don't have any value, such as
383 /// `enabled` in
384 ///
385 /// ```html
386 /// <button enabled type="submit" />
387 /// ```
388 ///
389 /// See [`Filter`] for usage information.
390 #[must_use]
391 pub fn except_attribute_name<N: Into<String>>(mut self, name: N) -> Self {
392 self.attrs.push(name.into(), AttributeMatch::NoValue, false);
393 self
394 }
395
396 /// Specifies the value of an attribute in the tags that must be dismissed.
397 ///
398 /// This matches only tag attributes that have the correct value for the
399 /// given name. To filter out on a possible value inside the attribute name,
400 /// see [`Filter::except_attribute_value_contains`].
401 ///
402 /// See [`Filter`] for usage information.
403 #[must_use]
404 pub fn except_attribute_value<N, V>(mut self, name: N, value: V) -> Self
405 where
406 N: Into<String>,
407 V: Into<String>,
408 {
409 self.attrs.push(name.into(), AttributeMatch::Is(value.into()), false);
410 self
411 }
412
413 /// Specifies a possible value of an attribute that must be dismissed.
414 ///
415 /// This matches only tag attributes that have the given value as part of
416 /// the space-separated values inside the attribute value (cf. example
417 /// below). To match exact value, see [`Filter::except_attribute_value`].
418 ///
419 ///
420 /// # Examples
421 ///
422 /// ```
423 /// use html_filter::*;
424 ///
425 /// let html = Html::parse(r#"<div class="some_class other_class" />"#).unwrap();
426 /// let filter = Filter::new().except_attribute_value_contains("class", "some_class");
427 ///
428 /// assert_eq!(html.filter(&filter), Html::Empty);
429 /// ```
430 #[must_use]
431 pub fn except_attribute_value_contains<N: Into<String>, V: Into<String>>(
432 mut self,
433 name: N,
434 value: V,
435 ) -> Self {
436 self.attrs.push(name.into(), AttributeMatch::Contains(value.into()), false);
437 self
438 }
439
440 /// Specifies the tag name of the wanted tags.
441 ///
442 /// See [`Filter`] for usage information.
443 #[must_use]
444 #[expect(unused_must_use, reason = "filter does not yet support results")]
445 pub fn except_tag_name<N: Into<String>>(mut self, name: N) -> Self {
446 self.tags.push(name.into(), false);
447 self
448 }
449
450 /// Creates a default [`Filter`]
451 ///
452 /// By default, *comments* and *doctypes* are allowed, however no node is
453 /// wanted, so filtering on a default filter will return an empty
454 /// [`Html`](super::Html).
455 ///
456 /// # Examples
457 ///
458 /// ```
459 /// use html_filter::*;
460 ///
461 /// const _FILTER: Filter = Filter::new();
462 /// ```
463 #[must_use]
464 pub const fn new() -> Self {
465 Self {
466 attrs: ValueAssociateHash::new(),
467 depth: 0,
468 tags: BlackWhiteList::new(),
469 types: NodeTypeFilter::new(),
470 }
471 }
472
473 /// Disable all tags, except those explicitly whitelisted
474 ///
475 /// # Example
476 ///
477 /// ```
478 /// use html_filter::*;
479 /// let html = Html::parse("<!doctype html><div><!-- comment --></div>").unwrap();
480 /// assert_eq!(
481 /// html.to_filtered(&Filter::new().no_tags()),
482 /// Html::parse("<!doctype html><!-- comment -->").unwrap()
483 /// );
484 ///
485 /// let html = Html::parse("z<body>a<div>b<p>c</p>d</div>e</body>y").unwrap();
486 /// assert_eq!(
487 /// html.to_filtered(&Filter::new().no_tags().tag_name("div").collapse()),
488 /// Html::parse("<div>bd</div>").unwrap()
489 /// );
490 /// ```
491 #[must_use]
492 pub const fn no_tags(mut self) -> Self {
493 self.tags.set_default(false);
494 self
495 }
496
497 /// Specifies the tag name of the wanted tags.
498 ///
499 /// See [`Filter`] for usage information.
500 #[must_use]
501 #[expect(unused_must_use, reason = "filter does not yet support results")]
502 pub fn tag_name<N: Into<String>>(mut self, name: N) -> Self {
503 self.tags.push(name.into(), true);
504 self
505 }
506}