use maybe_xml::{token::prop::TagName, Reader};
use crate::Ty;
pub use maybe_xml::token;
fn map_tag_name_to_ty(tag_name: TagName<'_>) -> Ty {
let local_name = tag_name.local().as_str();
if local_name.eq_ignore_ascii_case("rss") {
Ty::Rss
} else if local_name.eq_ignore_ascii_case("feed") {
Ty::Atom
} else {
Ty::XmlOrHtml
}
}
pub(super) fn find_ty(input: &str) -> Ty {
Reader::from_str(input)
.into_iter()
.find_map(|token| match token.ty() {
token::Ty::StartTag(start_tag) => Some(map_tag_name_to_ty(start_tag.name())),
token::Ty::EmptyElementTag(empty_tag) => Some(map_tag_name_to_ty(empty_tag.name())),
token::Ty::EndTag(_) => Some(Ty::XmlOrHtml),
token::Ty::Characters(chars) => {
if chars.as_str().chars().all(|c| c.is_ascii_whitespace()) {
return None;
}
Some(Ty::XmlOrHtml)
}
token::Ty::Cdata(cdata) => {
if cdata
.content()
.as_str()
.chars()
.all(|c| c.is_ascii_whitespace())
{
return None;
}
Some(Ty::XmlOrHtml)
}
token::Ty::ProcessingInstruction(_)
| token::Ty::Declaration(_)
| token::Ty::Comment(_) => None,
})
.unwrap_or(Ty::Unknown)
}
#[must_use]
pub(crate) fn read_until_end_tag<'a>(
tag_name: TagName<'a>,
reader: &Reader<'a>,
pos: &mut usize,
) -> usize {
let mut end = *pos;
let mut start_count = 1;
let tag_name = tag_name.as_str();
while let Some(token) = reader.tokenize(pos) {
match token.ty() {
token::Ty::EndTag(tag) => {
if tag.name().as_str().eq_ignore_ascii_case(tag_name) {
start_count -= 1;
if start_count == 0 {
break;
}
}
}
token::Ty::StartTag(tag) => {
if tag.name().as_str().eq_ignore_ascii_case(tag_name) {
start_count += 1;
}
}
token::Ty::EmptyElementTag(_)
| token::Ty::Characters(_)
| token::Ty::ProcessingInstruction(_)
| token::Ty::Declaration(_)
| token::Ty::Comment(_)
| token::Ty::Cdata(_) => {}
}
end = *pos;
}
end
}
#[must_use]
pub(crate) fn collect_bytes_until_end_tag<'a>(
tag_name: TagName<'a>,
reader: &Reader<'a>,
pos: &mut usize,
) -> &'a str {
let begin = *pos;
let end = read_until_end_tag(tag_name, reader, pos);
let input = reader.into_inner();
&input[begin..end]
}