1use maybe_xml::{token::prop::TagName, Reader};
5
6use crate::Ty;
7
8pub use maybe_xml::token;
9
10fn map_tag_name_to_ty(tag_name: TagName<'_>) -> Ty {
11 let local_name = tag_name.local().as_str();
12 if local_name.eq_ignore_ascii_case("rss") {
13 Ty::Rss
14 } else if local_name.eq_ignore_ascii_case("feed") {
15 Ty::Atom
16 } else {
17 Ty::XmlOrHtml
18 }
19}
20
21pub(super) fn find_ty(input: &str) -> Ty {
22 Reader::from_str(input)
23 .into_iter()
24 .find_map(|token| match token.ty() {
25 token::Ty::StartTag(start_tag) => Some(map_tag_name_to_ty(start_tag.name())),
26 token::Ty::EmptyElementTag(empty_tag) => Some(map_tag_name_to_ty(empty_tag.name())),
27 token::Ty::EndTag(_) => Some(Ty::XmlOrHtml),
28 token::Ty::Characters(chars) => {
29 if chars.as_str().chars().all(|c| c.is_ascii_whitespace()) {
30 return None;
31 }
32
33 Some(Ty::XmlOrHtml)
34 }
35 token::Ty::Cdata(cdata) => {
36 if cdata
37 .content()
38 .as_str()
39 .chars()
40 .all(|c| c.is_ascii_whitespace())
41 {
42 return None;
43 }
44
45 Some(Ty::XmlOrHtml)
46 }
47 token::Ty::ProcessingInstruction(_)
48 | token::Ty::Declaration(_)
49 | token::Ty::Comment(_) => None,
50 })
51 .unwrap_or(Ty::Unknown)
52}
53
54#[must_use]
55pub(crate) fn read_until_end_tag<'a>(
56 tag_name: TagName<'a>,
57 reader: &Reader<'a>,
58 pos: &mut usize,
59) -> usize {
60 let mut end = *pos;
61 let mut start_count = 1;
62 let tag_name = tag_name.as_str();
63
64 while let Some(token) = reader.tokenize(pos) {
65 match token.ty() {
66 token::Ty::EndTag(tag) => {
67 if tag.name().as_str().eq_ignore_ascii_case(tag_name) {
68 start_count -= 1;
69 if start_count == 0 {
70 break;
71 }
72 }
73 }
74 token::Ty::StartTag(tag) => {
75 if tag.name().as_str().eq_ignore_ascii_case(tag_name) {
76 start_count += 1;
77 }
78 }
79 token::Ty::EmptyElementTag(_)
80 | token::Ty::Characters(_)
81 | token::Ty::ProcessingInstruction(_)
82 | token::Ty::Declaration(_)
83 | token::Ty::Comment(_)
84 | token::Ty::Cdata(_) => {}
85 }
86
87 end = *pos;
88 }
89
90 end
91}
92
93#[must_use]
94pub(crate) fn collect_bytes_until_end_tag<'a>(
95 tag_name: TagName<'a>,
96 reader: &Reader<'a>,
97 pos: &mut usize,
98) -> &'a str {
99 let begin = *pos;
100 let end = read_until_end_tag(tag_name, reader, pos);
101
102 let input = reader.into_inner();
103 &input[begin..end]
104}