1extern crate alloc;
11mod api;
12mod element;
13mod node_type;
14pub mod types;
15
16use alloc::borrow::Cow;
17use core::cmp::Ordering;
18use core::mem::take;
19
20use node_type::NodeTypeFilter;
21use types::Filter;
22
23use crate::errors::{safe_expect, safe_unreachable};
24use crate::{Html, Tag};
25
26#[repr(u8)]
33#[derive(Default, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
34enum DepthSuccess {
35 Found(usize) = 1,
37 #[default]
39 None = 2,
40 Success = 0,
42}
43
44impl DepthSuccess {
45 fn incr(mut self) -> Self {
47 if let Self::Found(depth) = &mut self {
48 *depth = safe_expect!(depth.checked_add(1), "Smaller than required depth");
49 }
50
51 self
52 }
53}
54
55#[derive(Default, Debug)]
57struct FilterSuccess {
58 depth: DepthSuccess,
66 html: Html,
68}
69
70impl FilterSuccess {
71 #[expect(clippy::unnecessary_wraps, reason = "useful for filter method")]
73 fn incr(mut self) -> Option<Self> {
74 self.depth = self.depth.incr();
75 Some(self)
76 }
77
78 #[expect(clippy::unnecessary_wraps, reason = "useful for filter method")]
83 const fn make_found(html: Html) -> Option<Self> {
84 Some(Self { depth: DepthSuccess::Found(0), html })
85 }
86
87 #[expect(clippy::unnecessary_wraps, reason = "useful for filter method")]
92 fn make_none(html: Cow<'_, Html>) -> Option<Self> {
93 Some(Self { depth: DepthSuccess::None, html: html.into_owned() })
94 }
95}
96
97impl Html {
98 fn check_depth(&self, max_depth: usize, filter: &Filter) -> Option<usize> {
103 match self {
104 Self::Empty | Self::Text(_) | Self::Comment { .. } | Self::Doctype { .. } => None,
105 Self::Tag { tag, .. } if filter.tag_explicitly_allowed(tag) => Some(0),
106 Self::Tag { .. } | Self::Vec(_) if max_depth == 0 => None,
107 Self::Tag { child, .. } => child
108 .check_depth(
109 #[expect(clippy::arithmetic_side_effects, reason = "non-0")]
110 {
111 max_depth - 1
112 },
113 filter,
114 )
115 .map(
116 #[expect(clippy::arithmetic_side_effects, reason = "< initial max_depth")]
117 |depth| depth + 1,
118 ),
119 Self::Vec(vec) => vec
120 .iter()
121 .try_fold(Some(usize::MAX), |acc, child| {
122 if acc == Some(0) { Err(()) } else { Ok(child.check_depth(max_depth, filter)) }
123 })
124 .unwrap_or(Some(0)),
125 }
126 }
127
128 #[must_use]
140 pub fn filter(self, filter: &Filter) -> Self {
141 filter_aux(Cow::Owned(self), filter, false).html
142 }
143
144 #[must_use]
155 pub fn find(self, filter: &Filter) -> Self {
156 self.filter(filter).into_first()
157 }
158
159 fn into_first(self) -> Self {
161 if let Self::Vec(vec) = self {
162 for elt in vec {
163 let res = elt.into_first();
164 if !res.is_empty() {
165 return res;
166 }
167 }
168 safe_unreachable!("Filtering removes empty nodes in vec.")
169 } else {
170 self
171 }
172 }
173
174 #[must_use]
178 pub fn to_filtered(&self, filter: &Filter) -> Self {
179 filter_aux(Cow::Borrowed(self), filter, false).html
180 }
181
182 #[must_use]
187 pub fn to_found(&self, filter: &Filter) -> Self {
188 self.to_filtered(filter).into_first()
189 }
190}
191
192#[allow(clippy::allow_attributes, reason = "expect is buggy")]
204#[allow(clippy::enum_glob_use, reason = "heavy syntax and Html is the main struct")]
205fn filter_aux(cow_html: Cow<'_, Html>, filter: &Filter, found: bool) -> FilterSuccess {
206 use Html::*;
207 match cow_html {
208 Cow::Borrowed(Comment(_)) | Cow::Owned(Comment(_))
209 if !filter.comment_explicitly_allowed() =>
210 None,
211 Cow::Borrowed(Doctype { .. }) | Cow::Owned(Doctype { .. }) if !filter.doctype_allowed() =>
212 None,
213 Cow::Borrowed(Doctype { .. } | Comment(_)) | Cow::Owned(Doctype { .. } | Comment(_)) =>
214 FilterSuccess::make_none(cow_html),
215 Cow::Borrowed(Text(text)) if filter.text_explicitly_allowed() && filter.should_trim() =>
216 FilterSuccess::make_none(Cow::Owned(Html::trim_text(text))),
217 Cow::Owned(Text(text)) if filter.text_explicitly_allowed() && filter.should_trim() =>
218 FilterSuccess::make_none(Cow::Owned(Html::trim_text(&text))),
219 Cow::Borrowed(Text(_)) | Cow::Owned(Text(_)) if filter.text_explicitly_allowed() =>
220 FilterSuccess::make_none(cow_html),
221 Cow::Borrowed(Text(_) | Empty) | Cow::Owned(Text(_) | Empty) => None,
222 Cow::Borrowed(Tag { tag, child }) =>
224 filter_aux_tag(Cow::Borrowed(&**child), Cow::Borrowed(tag), filter, found),
225 Cow::Owned(Tag { tag, child }) =>
226 filter_aux_tag(Cow::Owned(*child), Cow::Owned(tag), filter, found),
227 Cow::Borrowed(Vec(vec)) => filter_aux_vec(Cow::Borrowed(vec), filter),
228 Cow::Owned(Vec(vec)) => filter_aux_vec(Cow::Owned(vec), filter),
229 }
230 .unwrap_or_default()
231}
232
233#[expect(clippy::arithmetic_side_effects, reason = "incr depth when smaller than filter_depth")]
235fn filter_aux_tag(
236 child: Cow<'_, Html>,
237 tag: Cow<'_, Tag>,
238 filter: &Filter,
239 found: bool,
240) -> Option<FilterSuccess> {
241 if filter.tag_allowed(tag.as_ref()) {
242 FilterSuccess::make_found(Html::Tag {
243 tag: tag.into_owned(),
244 child: Box::new(filter_light(child, filter)),
245 })
246 } else if filter.as_depth() == 0 {
247 filter_aux(child, filter, found).incr()
248 } else {
249 let rec = filter_aux(child, filter, found);
250 match rec.depth {
251 DepthSuccess::None => None,
252 DepthSuccess::Success => Some(rec),
253 DepthSuccess::Found(depth) => match depth.cmp(&filter.as_depth()) {
254 Ordering::Less => Some(FilterSuccess {
255 depth: DepthSuccess::Found(depth + 1),
256 html: Html::Tag { tag: tag.into_owned(), child: Box::new(rec.html) },
257 }),
258 Ordering::Equal | Ordering::Greater =>
259 Some(FilterSuccess { depth: DepthSuccess::Success, html: rec.html }),
260 },
261 }
262 }
263}
264
265#[expect(clippy::arithmetic_side_effects, reason = "incr depth when smaller than filter_depth")]
267fn filter_aux_vec(vec: Cow<'_, Box<[Html]>>, filter: &Filter) -> Option<FilterSuccess> {
268 match vec
269 .as_ref()
270 .iter()
271 .filter_map(|child| child.check_depth(filter.as_depth() + 1, filter))
272 .min()
273 {
274 Some(depth) if depth < filter.as_depth() => Some(FilterSuccess {
275 depth: DepthSuccess::Found(depth),
276 html: unwrap_vec(
277 vec.iter()
278 .map(|child| filter_light(Cow::Borrowed(child), filter))
279 .filter(|child| !child.is_empty())
280 .collect(),
281 filter.as_collapse(),
282 ),
283 }),
284 Some(_) => Some(FilterSuccess {
285 depth: DepthSuccess::Success,
286 html: unwrap_vec(
287 into_iter_filter_map_collect(vec, |child| {
288 let rec = filter_aux(child, filter, true).html;
289 if rec.is_empty() { None } else { Some(rec) }
290 }),
291 filter.as_collapse(),
292 ),
293 }),
294 None => {
295 let mut filtered: Vec<FilterSuccess> = into_iter_filter_map_collect(vec, |child| {
296 let rec = filter_aux(child, filter, false);
297 if rec.html.is_empty() { None } else { Some(rec) }
298 });
299 if filtered.len() <= 1 {
300 filtered.pop()
301 } else {
302 filtered.iter().map(|child| child.depth).min().map(|depth| FilterSuccess {
303 depth,
304 html: unwrap_vec(
305 filtered.into_iter().map(|child| child.html).collect(),
306 filter.as_collapse(),
307 ),
308 })
309 }
310 }
311 }
312}
313
314#[allow(clippy::allow_attributes, reason = "expect is buggy")]
323#[allow(clippy::enum_glob_use, reason = "heavy syntax and Html is the main struct")]
324fn filter_light(cow_html: Cow<'_, Html>, filter: &Filter) -> Html {
325 use Html::*;
326 #[allow(clippy::ref_patterns, reason = "!")]
327 match cow_html {
328 Cow::Borrowed(Text(txt)) if filter.text_allowed() && filter.should_trim() =>
329 Html::trim_text(txt),
330 Cow::Owned(Text(txt)) if filter.text_allowed() && filter.should_trim() =>
331 Html::trim_text(&txt),
332 Cow::Owned(Text(_)) | Cow::Borrowed(Text(_)) if filter.text_allowed() =>
333 cow_html.into_owned(),
334 Cow::Borrowed(Comment(_)) | Cow::Owned(Comment(_)) if filter.comment_allowed() =>
335 cow_html.into_owned(),
336 Cow::Borrowed(Doctype { .. }) | Cow::Owned(Doctype { .. }) if filter.doctype_allowed() =>
337 cow_html.into_owned(),
338 Cow::Borrowed(Tag { tag, .. }) if filter.tag_explicitly_blacklisted(tag) => Html::Empty,
339 Cow::Owned(Tag { tag, .. }) if filter.tag_explicitly_blacklisted(&tag) => Html::Empty,
340 Cow::Borrowed(Tag { tag, child }) => Tag {
341 tag: tag.to_owned(),
342 child: Box::new(filter_light(Cow::Borrowed(&**child), filter)),
343 },
344 Cow::Owned(Tag { tag, child }) =>
345 Tag { tag, child: Box::new(filter_light(Cow::Owned(*child), filter)) },
346 Cow::Borrowed(Vec(vec)) => unwrap_vec(
347 vec.iter()
348 .map(|child| filter_light(Cow::Borrowed(child), filter))
349 .filter(|html| !html.is_empty())
350 .collect(),
351 filter.as_collapse(),
352 ),
353 Cow::Owned(Vec(vec)) => unwrap_vec(
354 vec.into_iter()
355 .map(|child| filter_light(Cow::Owned(child), filter))
356 .filter(|html| !html.is_empty())
357 .collect(),
358 filter.as_collapse(),
359 ),
360 Cow::Borrowed(Empty | Text(_) | Comment { .. } | Doctype { .. })
361 | Cow::Owned(Empty | Text(_) | Comment { .. } | Doctype { .. }) => Html::Empty,
362 }
363}
364
365fn unwrap_vec(vec: Vec<Html>, collapse: bool) -> Html {
367 let mut res = if collapse {
368 let mut previous = String::new();
369 let mut res = Vec::with_capacity(vec.len());
370 for this in vec {
371 if let Html::Text(text) = this {
372 previous.push_str(&text);
373 } else {
374 if !previous.is_empty() {
375 res.push(Html::Text(take(&mut previous)));
376 }
377 res.push(this);
378 }
379 }
380 if !previous.is_empty() {
381 res.push(Html::Text(take(&mut previous)));
382 }
383 res
384 } else {
385 vec
386 };
387 if res.len() <= 1 {
388 res.first_mut().map(take).unwrap_or_default()
389 } else {
390 Html::Vec(res.into_boxed_slice())
391 }
392}
393
394fn into_iter_filter_map_collect<T, U, V, F>(cow: Cow<'_, Box<[T]>>, map: F) -> V
397where
398 T: Clone,
399 V: FromIterator<U>,
400 F: Fn(Cow<'_, T>) -> Option<U>,
401{
402 match cow {
403 Cow::Borrowed(borrowed) =>
404 borrowed.into_iter().filter_map(|elt| map(Cow::Borrowed(elt))).collect(),
405 Cow::Owned(owned) => owned.into_iter().filter_map(|elt| map(Cow::Owned(elt))).collect(),
406 }
407}