1extern crate alloc;
11mod element;
12mod node_type;
13pub mod types;
14
15use alloc::borrow::Cow;
16use core::cmp::Ordering;
17
18use node_type::NodeTypeFilter;
19use types::Filter;
20
21use crate::errors::{safe_expect, safe_unreachable};
22use crate::prelude::{Html, Tag};
23
24#[repr(u8)]
31#[derive(Default, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
32enum DepthSuccess {
33 Found(usize) = 1,
35 #[default]
37 None = 2,
38 Success = 0,
40}
41
42impl DepthSuccess {
43 fn incr(mut self) -> Self {
45 if let Self::Found(depth) = &mut self {
46 *depth = safe_expect!(depth.checked_add(1), "Smaller than required depth");
47 }
48
49 self
50 }
51}
52
53#[derive(Default, Debug)]
55struct FilterSuccess {
56 depth: DepthSuccess,
64 html: Html,
66}
67
68impl FilterSuccess {
69 #[expect(clippy::unnecessary_wraps, reason = "useful for filter method")]
71 fn incr(mut self) -> Option<Self> {
72 self.depth = self.depth.incr();
73 Some(self)
74 }
75
76 #[expect(clippy::unnecessary_wraps, reason = "useful for filter method")]
81 const fn make_found(html: Html) -> Option<Self> {
82 Some(Self { depth: DepthSuccess::Found(0), html })
83 }
84
85 #[expect(clippy::unnecessary_wraps, reason = "useful for filter method")]
90 fn make_none(html: Cow<'_, Html>) -> Option<Self> {
91 Some(Self { depth: DepthSuccess::None, html: html.into_owned() })
92 }
93}
94
95impl Html {
96 fn check_depth(&self, max_depth: usize, filter: &Filter) -> Option<usize> {
101 match self {
102 Self::Empty | Self::Text(_) | Self::Comment { .. } | Self::Doctype { .. } => None,
103 Self::Tag { tag, .. } if filter.tag_explicitly_allowed(tag) => Some(0),
104 Self::Tag { .. } | Self::Vec(_) if max_depth == 0 => None,
105 Self::Tag { child, .. } => child
106 .check_depth(
107 #[expect(clippy::arithmetic_side_effects, reason = "non-0")]
108 {
109 max_depth - 1
110 },
111 filter,
112 )
113 .map(
114 #[expect(clippy::arithmetic_side_effects, reason = "< initial max_depth")]
115 |depth| depth + 1,
116 ),
117 Self::Vec(vec) => vec
118 .iter()
119 .try_fold(Some(usize::MAX), |acc, child| {
120 if acc == Some(0) {
121 Err(())
122 } else {
123 Ok(child.check_depth(max_depth, filter))
124 }
125 })
126 .unwrap_or(Some(0)),
127 }
128 }
129
130 #[must_use]
142 pub fn filter(self, filter: &Filter) -> Self {
143 filter_aux(Cow::Owned(self), filter, false).html
144 }
145
146 #[must_use]
157 pub fn find(self, filter: &Filter) -> Self {
158 self.filter(filter).into_first()
159 }
160
161 fn into_first(self) -> Self {
163 if let Self::Vec(vec) = self {
164 for elt in vec {
165 let res = elt.into_first();
166 if !res.is_empty() {
167 return res;
168 }
169 }
170 safe_unreachable("Filtering removes empty nodes in vec.")
171 } else {
172 self
173 }
174 }
175
176 #[must_use]
180 pub fn to_filtered(&self, filter: &Filter) -> Self {
181 filter_aux(Cow::Borrowed(self), filter, false).html
182 }
183
184 #[must_use]
189 pub fn to_found(&self, filter: &Filter) -> Self {
190 self.to_filtered(filter).into_first()
191 }
192}
193
194#[allow(clippy::allow_attributes, reason = "expect is buggy")]
206#[allow(
207 clippy::enum_glob_use,
208 reason = "heavy syntax and Html is the main struct"
209)]
210fn filter_aux(cow_html: Cow<'_, Html>, filter: &Filter, found: bool) -> FilterSuccess {
211 use Html::*;
212 match cow_html {
213 Cow::Borrowed(Comment(_)) | Cow::Owned(Comment(_))
214 if found || !filter.comment_explicitly_allowed() =>
215 None,
216 Cow::Borrowed(Doctype { .. }) | Cow::Owned(Doctype { .. })
217 if found || !filter.doctype_allowed() =>
218 None,
219 Cow::Borrowed(Doctype { .. } | Comment(_)) | Cow::Owned(Doctype { .. } | Comment(_)) =>
220 FilterSuccess::make_none(cow_html),
221 Cow::Borrowed(Text(_) | Empty) | Cow::Owned(Text(_) | Empty) => None,
222 Cow::Borrowed(Tag { tag, child }) =>
223 filter_aux_tag(Cow::Borrowed(&**child), Cow::Borrowed(tag), filter, found),
224 Cow::Owned(Tag { tag, child }) =>
225 filter_aux_tag(Cow::Owned(*child), Cow::Owned(tag), filter, found),
226 Cow::Borrowed(Vec(vec)) => filter_aux_vec(Cow::Borrowed(vec), filter),
227 Cow::Owned(Vec(vec)) => filter_aux_vec(Cow::Owned(vec), filter),
228 }
229 .unwrap_or_default()
230}
231
232#[expect(
234 clippy::arithmetic_side_effects,
235 reason = "incr depth when smaller than filter_depth"
236)]
237fn filter_aux_tag(
238 child: Cow<'_, Html>,
239 tag: Cow<'_, Tag>,
240 filter: &Filter,
241 found: bool,
242) -> Option<FilterSuccess> {
243 if filter.tag_allowed(tag.as_ref()) {
244 FilterSuccess::make_found(Html::Tag {
245 tag: tag.into_owned(),
246 child: Box::new(filter_light(child, filter)),
247 })
248 } else if filter.as_depth() == 0 {
249 filter_aux(child, filter, found).incr()
250 } else {
251 let rec = filter_aux(child, filter, found);
252 match rec.depth {
253 DepthSuccess::None => None,
254 DepthSuccess::Success => Some(rec),
255 DepthSuccess::Found(depth) => match depth.cmp(&filter.as_depth()) {
256 Ordering::Less => Some(FilterSuccess {
257 depth: DepthSuccess::Found(depth + 1),
258 html: Html::Tag { tag: tag.into_owned(), child: Box::new(rec.html) },
259 }),
260 Ordering::Equal | Ordering::Greater =>
261 Some(FilterSuccess { depth: DepthSuccess::Success, html: rec.html }),
262 },
263 }
264 }
265}
266
267#[expect(
269 clippy::arithmetic_side_effects,
270 reason = "incr depth when smaller than filter_depth"
271)]
272fn filter_aux_vec(vec: Cow<'_, Box<[Html]>>, filter: &Filter) -> Option<FilterSuccess> {
273 match vec
274 .as_ref()
275 .iter()
276 .filter_map(|child| child.check_depth(filter.as_depth() + 1, filter))
277 .min()
278 {
279 Some(depth) if depth < filter.as_depth() => Some(FilterSuccess {
280 depth: DepthSuccess::Found(depth),
281 html: Html::Vec(
282 vec.iter()
283 .map(|child| filter_light(Cow::Borrowed(child), filter))
284 .collect(),
285 ),
286 }),
287 Some(_) => Some(FilterSuccess {
288 depth: DepthSuccess::Success,
289 html: Html::Vec(into_iter_filter_map_collect(vec, |child| {
290 let rec = filter_aux(child, filter, true);
291 if rec.html.is_empty() {
292 None
293 } else {
294 Some(rec.html)
295 }
296 })),
297 }),
298 None => {
299 let mut filtered: Vec<FilterSuccess> = into_iter_filter_map_collect(vec, |child| {
300 let rec = filter_aux(child, filter, false);
301 if rec.html.is_empty() { None } else { Some(rec) }
302 });
303 if filtered.len() <= 1 {
304 filtered.pop()
305 } else {
306 filtered
307 .iter()
308 .map(|child| child.depth)
309 .min()
310 .map(|depth| FilterSuccess {
311 depth,
312 html: Html::Vec(filtered.into_iter().map(|child| child.html).collect()),
313 })
314 }
315 }
316 }
317}
318
319#[allow(clippy::allow_attributes, reason = "expect is buggy")]
328#[allow(
329 clippy::enum_glob_use,
330 reason = "heavy syntax and Html is the main struct"
331)]
332fn filter_light(cow_html: Cow<'_, Html>, filter: &Filter) -> Html {
333 use Html::*;
334 match cow_html {
335 Cow::Borrowed(Text(_)) | Cow::Owned(Text(_)) if filter.text_allowed() =>
336 cow_html.into_owned(),
337 Cow::Borrowed(Comment(_)) | Cow::Owned(Comment(_)) if filter.comment_allowed() =>
338 cow_html.into_owned(),
339 Cow::Borrowed(Doctype { .. }) | Cow::Owned(Doctype { .. }) if filter.doctype_allowed() =>
340 cow_html.into_owned(),
341 Cow::Borrowed(Tag { tag, .. }) if filter.tag_explicitly_blacklisted(tag) => Html::Empty,
342 Cow::Owned(Tag { tag, .. }) if filter.tag_explicitly_blacklisted(&tag) => Html::Empty,
343 Cow::Borrowed(Tag { tag, child }) => Tag {
344 tag: tag.to_owned(),
345 child: Box::new(filter_light(Cow::Borrowed(&**child), filter)),
346 },
347 Cow::Owned(Tag { tag, child }) =>
348 Tag { tag, child: Box::new(filter_light(Cow::Owned(*child), filter)) },
349 Cow::Borrowed(Vec(vec)) => Html::Vec(
350 vec.into_iter()
351 .map(|child| filter_light(Cow::Borrowed(child), filter))
352 .collect(),
353 ),
354 Cow::Owned(Vec(vec)) => Html::Vec(
355 vec.into_iter()
356 .map(|child| filter_light(Cow::Owned(child), filter))
357 .collect(),
358 ),
359 Cow::Borrowed(Empty | Text(_) | Comment { .. } | Doctype { .. })
360 | Cow::Owned(Empty | Text(_) | Comment { .. } | Doctype { .. }) => Html::Empty,
361 }
362}
363
364pub fn into_iter_filter_map_collect<T, U, V, F>(cow: Cow<'_, Box<[T]>>, map: F) -> V
367where
368 T: Clone,
369 V: FromIterator<U>,
370 F: Fn(Cow<'_, T>) -> Option<U>,
371{
372 match cow {
373 Cow::Borrowed(borrowed) => borrowed
374 .into_iter()
375 .filter_map(|elt| map(Cow::Borrowed(elt)))
376 .collect(),
377 Cow::Owned(owned) => owned
378 .into_iter()
379 .filter_map(|elt| map(Cow::Owned(elt)))
380 .collect(),
381 }
382}