readfeed/
xml.rs

1//! Provides types to represent elements in an [XML][xml] document.
2//!
3//! [xml]: https://www.w3.org/TR/2006/REC-xml11-20060816/
4use maybe_xml::{token::prop::TagName, Reader};
5
6use crate::Ty;
7
8pub use maybe_xml::token;
9
10fn map_tag_name_to_ty(tag_name: TagName<'_>) -> Ty {
11    let local_name = tag_name.local().as_str();
12    if local_name.eq_ignore_ascii_case("rss") {
13        Ty::Rss
14    } else if local_name.eq_ignore_ascii_case("feed") {
15        Ty::Atom
16    } else {
17        Ty::XmlOrHtml
18    }
19}
20
21pub(super) fn find_ty(input: &str) -> Ty {
22    Reader::from_str(input)
23        .into_iter()
24        .find_map(|token| match token.ty() {
25            token::Ty::StartTag(start_tag) => Some(map_tag_name_to_ty(start_tag.name())),
26            token::Ty::EmptyElementTag(empty_tag) => Some(map_tag_name_to_ty(empty_tag.name())),
27            token::Ty::EndTag(_) => Some(Ty::XmlOrHtml),
28            token::Ty::Characters(chars) => {
29                if chars.as_str().chars().all(|c| c.is_ascii_whitespace()) {
30                    return None;
31                }
32
33                Some(Ty::XmlOrHtml)
34            }
35            token::Ty::Cdata(cdata) => {
36                if cdata
37                    .content()
38                    .as_str()
39                    .chars()
40                    .all(|c| c.is_ascii_whitespace())
41                {
42                    return None;
43                }
44
45                Some(Ty::XmlOrHtml)
46            }
47            token::Ty::ProcessingInstruction(_)
48            | token::Ty::Declaration(_)
49            | token::Ty::Comment(_) => None,
50        })
51        .unwrap_or(Ty::Unknown)
52}
53
54#[must_use]
55pub(crate) fn read_until_end_tag<'a>(
56    tag_name: TagName<'a>,
57    reader: &Reader<'a>,
58    pos: &mut usize,
59) -> usize {
60    let mut end = *pos;
61    let mut start_count = 1;
62    let tag_name = tag_name.as_str();
63
64    while let Some(token) = reader.tokenize(pos) {
65        match token.ty() {
66            token::Ty::EndTag(tag) => {
67                if tag.name().as_str().eq_ignore_ascii_case(tag_name) {
68                    start_count -= 1;
69                    if start_count == 0 {
70                        break;
71                    }
72                }
73            }
74            token::Ty::StartTag(tag) => {
75                if tag.name().as_str().eq_ignore_ascii_case(tag_name) {
76                    start_count += 1;
77                }
78            }
79            token::Ty::EmptyElementTag(_)
80            | token::Ty::Characters(_)
81            | token::Ty::ProcessingInstruction(_)
82            | token::Ty::Declaration(_)
83            | token::Ty::Comment(_)
84            | token::Ty::Cdata(_) => {}
85        }
86
87        end = *pos;
88    }
89
90    end
91}
92
93#[must_use]
94pub(crate) fn collect_bytes_until_end_tag<'a>(
95    tag_name: TagName<'a>,
96    reader: &Reader<'a>,
97    pos: &mut usize,
98) -> &'a str {
99    let begin = *pos;
100    let end = read_until_end_tag(tag_name, reader, pos);
101
102    let input = reader.into_inner();
103    &input[begin..end]
104}