extern crate alloc;
mod element;
mod node_type;
pub mod types;
use alloc::borrow::Cow;
use core::cmp::Ordering;
use node_type::NodeTypeFilter;
use types::Filter;
use crate::errors::{safe_expect, safe_unreachable};
use crate::{Html, Tag};
#[repr(u8)]
#[derive(Default, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
enum DepthSuccess {
Found(usize) = 1,
#[default]
None = 2,
Success = 0,
}
impl DepthSuccess {
fn incr(mut self) -> Self {
if let Self::Found(depth) = &mut self {
*depth = safe_expect!(depth.checked_add(1), "Smaller than required depth");
}
self
}
}
#[derive(Default, Debug)]
struct FilterSuccess {
depth: DepthSuccess,
html: Html,
}
impl FilterSuccess {
#[expect(clippy::unnecessary_wraps, reason = "useful for filter method")]
fn incr(mut self) -> Option<Self> {
self.depth = self.depth.incr();
Some(self)
}
#[expect(clippy::unnecessary_wraps, reason = "useful for filter method")]
const fn make_found(html: Html) -> Option<Self> {
Some(Self { depth: DepthSuccess::Found(0), html })
}
#[expect(clippy::unnecessary_wraps, reason = "useful for filter method")]
fn make_none(html: Cow<'_, Html>) -> Option<Self> {
Some(Self { depth: DepthSuccess::None, html: html.into_owned() })
}
}
impl Html {
fn check_depth(&self, max_depth: usize, filter: &Filter) -> Option<usize> {
match self {
Self::Empty | Self::Text(_) | Self::Comment { .. } | Self::Doctype { .. } => None,
Self::Tag { tag, .. } if filter.tag_explicitly_allowed(tag) => Some(0),
Self::Tag { .. } | Self::Vec(_) if max_depth == 0 => None,
Self::Tag { child, .. } => child
.check_depth(
#[expect(clippy::arithmetic_side_effects, reason = "non-0")]
{
max_depth - 1
},
filter,
)
.map(
#[expect(clippy::arithmetic_side_effects, reason = "< initial max_depth")]
|depth| depth + 1,
),
Self::Vec(vec) => vec
.iter()
.try_fold(Some(usize::MAX), |acc, child| {
if acc == Some(0) {
Err(())
} else {
Ok(child.check_depth(max_depth, filter))
}
})
.unwrap_or(Some(0)),
}
}
#[must_use]
pub fn filter(self, filter: &Filter) -> Self {
filter_aux(Cow::Owned(self), filter, false).html
}
#[must_use]
pub fn find(self, filter: &Filter) -> Self {
self.filter(filter).into_first()
}
fn into_first(self) -> Self {
if let Self::Vec(vec) = self {
for elt in vec {
let res = elt.into_first();
if !res.is_empty() {
return res;
}
}
safe_unreachable("Filtering removes empty nodes in vec.")
} else {
self
}
}
#[must_use]
pub fn to_filtered(&self, filter: &Filter) -> Self {
filter_aux(Cow::Borrowed(self), filter, false).html
}
#[must_use]
pub fn to_found(&self, filter: &Filter) -> Self {
self.to_filtered(filter).into_first()
}
}
#[allow(clippy::allow_attributes, reason = "expect is buggy")]
#[allow(
clippy::enum_glob_use,
reason = "heavy syntax and Html is the main struct"
)]
fn filter_aux(cow_html: Cow<'_, Html>, filter: &Filter, found: bool) -> FilterSuccess {
use Html::*;
match cow_html {
Cow::Borrowed(Comment(_)) | Cow::Owned(Comment(_))
if found || !filter.comment_explicitly_allowed() =>
None,
Cow::Borrowed(Doctype { .. }) | Cow::Owned(Doctype { .. })
if found || !filter.doctype_allowed() =>
None,
Cow::Borrowed(Doctype { .. } | Comment(_)) | Cow::Owned(Doctype { .. } | Comment(_)) =>
FilterSuccess::make_none(cow_html),
Cow::Borrowed(Text(_) | Empty) | Cow::Owned(Text(_) | Empty) => None,
Cow::Borrowed(Tag { tag, child }) =>
filter_aux_tag(Cow::Borrowed(&**child), Cow::Borrowed(tag), filter, found),
Cow::Owned(Tag { tag, child }) =>
filter_aux_tag(Cow::Owned(*child), Cow::Owned(tag), filter, found),
Cow::Borrowed(Vec(vec)) => filter_aux_vec(Cow::Borrowed(vec), filter),
Cow::Owned(Vec(vec)) => filter_aux_vec(Cow::Owned(vec), filter),
}
.unwrap_or_default()
}
#[expect(
clippy::arithmetic_side_effects,
reason = "incr depth when smaller than filter_depth"
)]
fn filter_aux_tag(
child: Cow<'_, Html>,
tag: Cow<'_, Tag>,
filter: &Filter,
found: bool,
) -> Option<FilterSuccess> {
if filter.tag_allowed(tag.as_ref()) {
FilterSuccess::make_found(Html::Tag {
tag: tag.into_owned(),
child: Box::new(filter_light(child, filter)),
})
} else if filter.as_depth() == 0 {
filter_aux(child, filter, found).incr()
} else {
let rec = filter_aux(child, filter, found);
match rec.depth {
DepthSuccess::None => None,
DepthSuccess::Success => Some(rec),
DepthSuccess::Found(depth) => match depth.cmp(&filter.as_depth()) {
Ordering::Less => Some(FilterSuccess {
depth: DepthSuccess::Found(depth + 1),
html: Html::Tag { tag: tag.into_owned(), child: Box::new(rec.html) },
}),
Ordering::Equal | Ordering::Greater =>
Some(FilterSuccess { depth: DepthSuccess::Success, html: rec.html }),
},
}
}
}
#[expect(
clippy::arithmetic_side_effects,
reason = "incr depth when smaller than filter_depth"
)]
fn filter_aux_vec(vec: Cow<'_, Box<[Html]>>, filter: &Filter) -> Option<FilterSuccess> {
match vec
.as_ref()
.iter()
.filter_map(|child| child.check_depth(filter.as_depth() + 1, filter))
.min()
{
Some(depth) if depth < filter.as_depth() => Some(FilterSuccess {
depth: DepthSuccess::Found(depth),
html: Html::Vec(
vec.iter()
.map(|child| filter_light(Cow::Borrowed(child), filter))
.collect(),
),
}),
Some(_) => Some(FilterSuccess {
depth: DepthSuccess::Success,
html: Html::Vec(into_iter_filter_map_collect(vec, |child| {
let rec = filter_aux(child, filter, true);
if rec.html.is_empty() {
None
} else {
Some(rec.html)
}
})),
}),
None => {
let mut filtered: Vec<FilterSuccess> = into_iter_filter_map_collect(vec, |child| {
let rec = filter_aux(child, filter, false);
if rec.html.is_empty() { None } else { Some(rec) }
});
if filtered.len() <= 1 {
filtered.pop()
} else {
filtered
.iter()
.map(|child| child.depth)
.min()
.map(|depth| FilterSuccess {
depth,
html: Html::Vec(filtered.into_iter().map(|child| child.html).collect()),
})
}
}
}
}
#[allow(clippy::allow_attributes, reason = "expect is buggy")]
#[allow(
clippy::enum_glob_use,
reason = "heavy syntax and Html is the main struct"
)]
fn filter_light(cow_html: Cow<'_, Html>, filter: &Filter) -> Html {
use Html::*;
match cow_html {
Cow::Borrowed(Text(_)) | Cow::Owned(Text(_)) if filter.text_allowed() =>
cow_html.into_owned(),
Cow::Borrowed(Comment(_)) | Cow::Owned(Comment(_)) if filter.comment_allowed() =>
cow_html.into_owned(),
Cow::Borrowed(Doctype { .. }) | Cow::Owned(Doctype { .. }) if filter.doctype_allowed() =>
cow_html.into_owned(),
Cow::Borrowed(Tag { tag, .. }) if filter.tag_explicitly_blacklisted(tag) => Html::Empty,
Cow::Owned(Tag { tag, .. }) if filter.tag_explicitly_blacklisted(&tag) => Html::Empty,
Cow::Borrowed(Tag { tag, child }) => Tag {
tag: tag.to_owned(),
child: Box::new(filter_light(Cow::Borrowed(&**child), filter)),
},
Cow::Owned(Tag { tag, child }) =>
Tag { tag, child: Box::new(filter_light(Cow::Owned(*child), filter)) },
Cow::Borrowed(Vec(vec)) => Html::Vec(
vec.into_iter()
.map(|child| filter_light(Cow::Borrowed(child), filter))
.collect(),
),
Cow::Owned(Vec(vec)) => Html::Vec(
vec.into_iter()
.map(|child| filter_light(Cow::Owned(child), filter))
.collect(),
),
Cow::Borrowed(Empty | Text(_) | Comment { .. } | Doctype { .. })
| Cow::Owned(Empty | Text(_) | Comment { .. } | Doctype { .. }) => Html::Empty,
}
}
fn into_iter_filter_map_collect<T, U, V, F>(cow: Cow<'_, Box<[T]>>, map: F) -> V
where
T: Clone,
V: FromIterator<U>,
F: Fn(Cow<'_, T>) -> Option<U>,
{
match cow {
Cow::Borrowed(borrowed) => borrowed
.into_iter()
.filter_map(|elt| map(Cow::Borrowed(elt)))
.collect(),
Cow::Owned(owned) => owned
.into_iter()
.filter_map(|elt| map(Cow::Owned(elt)))
.collect(),
}
}