exile/parser/
element.rs

1use crate::parser::bang::parse_bang;
2use crate::parser::chars::is_name_start_char;
3use crate::parser::error::Result;
4use crate::parser::pi::parse_pi;
5use crate::parser::string::{parse_string, StringType};
6use crate::parser::{parse_name, Iter};
7use crate::{Element, Node};
8
9pub(crate) fn parse_element(iter: &mut Iter<'_>) -> Result<Element> {
10    expect!(iter, '<')?;
11    iter.advance_or_die()?;
12    let name = parse_name(iter)?;
13    let mut element = Element::from_name(name);
14
15    // absorb whitespace
16    iter.skip_whitespace()?;
17
18    // check and return early if it is an empty, self-closing tag
19    if iter.is('/') {
20        iter.advance_or_die()?;
21        expect!(iter, '>')?;
22        iter.advance();
23        return Ok(element);
24    }
25
26    // now the only valid chars are '>' or the start of an attribute name
27    if iter.is_name_start_char() {
28        parse_attributes(iter, &mut element)?;
29    }
30
31    // check and return early if it is an empty, self-closing tag that had attributes
32    if iter.is('/') {
33        iter.advance_or_die()?;
34        expect!(iter, '>')?;
35        iter.advance();
36        return Ok(element);
37    }
38
39    // now the only valid char is '>' and we reach the child nodes
40    expect!(iter, '>')?;
41    iter.advance_or_die()?; // TODO - is it really fatal if we cannot advance?
42    parse_children(iter, &mut element)?;
43    debug_assert_eq!('>', iter.st.c);
44    iter.advance(); // TODO - should this be advance_or_die?
45    debug_assert_ne!('>', iter.st.c);
46    Ok(element)
47}
48
49fn parse_attributes(iter: &mut Iter<'_>, element: &mut Element) -> Result<()> {
50    loop {
51        iter.skip_whitespace()?;
52        if iter.is('/') || iter.is('>') {
53            break;
54        }
55        let key = if iter.is_name_start_char() {
56            parse_name(iter)?
57        } else {
58            String::default()
59        };
60        iter.skip_whitespace()?;
61        expect!(iter, '=')?;
62        iter.advance_or_die()?;
63        iter.skip_whitespace()?;
64        let (start, string_type) = attribute_start_quote(iter)?;
65        iter.advance_or_die()?;
66        let value = parse_attribute_value(iter, string_type)?;
67        expect!(iter, start)?;
68        element.add_attribute(key, value);
69        if !iter.advance() {
70            break;
71        }
72    }
73    Ok(())
74}
75
76fn attribute_start_quote(iter: &Iter<'_>) -> Result<(char, StringType)> {
77    let c = iter.st.c;
78    match c {
79        '\'' => Ok((c, StringType::AttributeSingle)),
80        '"' => Ok((c, StringType::AttributeDouble)),
81        _ => parse_err!(
82            iter,
83            "expected attribute value to start with either a single or double quote, got '{}'",
84            c
85        ),
86    }
87}
88
89/// Expects the iter to be pointing at the first character of the string.
90fn parse_attribute_value(iter: &mut Iter<'_>, string_type: StringType) -> Result<String> {
91    debug_assert!(matches!(
92        string_type,
93        StringType::AttributeDouble | StringType::AttributeSingle
94    ));
95    parse_string(iter, string_type)
96}
97
98// this function takes over after an element's opening tag (the parent element) has been parsed.
99// the nodes that are contained by the parent are parsed and added to the parent. this function is
100// recursive descending until an element with no children is reached.
101fn parse_children(iter: &mut Iter<'_>, parent: &mut Element) -> Result<()> {
102    loop {
103        iter.skip_whitespace()?;
104        if iter.is('<') {
105            let lt_parse = parse_lt(iter, parent)?;
106            match lt_parse {
107                LtParse::EndTag => {
108                    // this is the recursion's breaking condition
109                    return Ok(());
110                }
111                LtParse::Skip => {
112                    // do nothing
113                }
114                LtParse::Some(node) => match node {
115                    Node::Element(elem) => parent.add_child(elem),
116                    Node::Text(text) => parent.add_text(text),
117                    Node::CData(cdata) => parent
118                        .add_cdata(cdata)
119                        .map_err(|e| create_parser_error!(&iter.st, "{}", e))?,
120                    Node::Comment(comment) => parent
121                        .add_comment(comment)
122                        .map_err(|e| create_parser_error!(&iter.st, "{}", e))?,
123                    Node::Pi(pi) => parent.add_pi(pi),
124                    Node::DocType(_) => panic!("doctype unsupported"),
125                },
126                LtParse::DocType(_) => return parse_err!(iter, "doctype not allowed here"),
127            }
128        } else {
129            let text = parse_text(iter)?;
130            if !text.is_empty() {
131                parent.add_text(text);
132            }
133        }
134    }
135}
136
137// the return type for `parse_lt`. since the caller of `parse_lt` doesn't know what type of node
138// has been encountered, this enum is used to describe what was parsed.
139#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
140pub(super) enum LtParse {
141    // the parsed entity was an EndTag.
142    EndTag,
143    // the parsed entity was an unsupported node type, i.e. something we want to skip.
144    #[allow(dead_code)] // TODO - this is because of doctype_wip
145    Skip,
146    // the parsed entity was a supported node type.
147    Some(Node),
148    // TODO - make this a struct to support doctypes https://github.com/webern/exile/issues/22
149    DocType(String),
150}
151
152// parse the correct type of node (or end tag) when encountering a '<'
153fn parse_lt(iter: &mut Iter<'_>, parent: &mut Element) -> Result<LtParse> {
154    debug_assert_eq!('<', iter.st.c);
155    let next = iter.peek_or_die()?;
156    // do the most common case first
157    if is_name_start_char(next) {
158        let element = parse_element(iter)?;
159        debug_assert_ne!('>', iter.st.c);
160        return Ok(LtParse::Some(Node::Element(element)));
161    }
162    match next {
163        '/' => {
164            parse_end_tag_name(iter, parent)?;
165            Ok(LtParse::EndTag)
166        }
167        '?' => {
168            let pi = parse_pi(iter)?;
169            Ok(LtParse::Some(Node::Pi(pi)))
170        }
171        '!' => parse_bang(iter),
172        _ => {
173            // this error occurred on the peeked char, so to report the correct position of the
174            // error, we will first advance the iter (if possible).
175            iter.advance();
176            parse_err!(iter, "unexpected char following '<'")
177        }
178    }
179}
180
181// takes an iter pointing at '<' where the next character is required to be '/'. parses the name of
182// the end tag and compares it to make sure it matches `parent`. if anything goes wrong, Err.
183fn parse_end_tag_name(iter: &mut Iter<'_>, parent: &Element) -> Result<()> {
184    expect!(iter, '<')?;
185    iter.advance_or_die()?;
186    expect!(iter, '/')?;
187    iter.advance_or_die()?;
188    iter.skip_whitespace()?;
189    iter.expect_name_start_char()?;
190    let mut name = String::default();
191    name.push(iter.st.c);
192    loop {
193        iter.advance_or_die()?;
194        if iter.is('>') || iter.is_whitespace() {
195            break;
196        } else if iter.is_name_char() {
197            name.push(iter.st.c);
198        } else {
199            return parse_err!(iter);
200        }
201    }
202    iter.skip_whitespace()?;
203    expect!(iter, '>')?;
204    if name != parent.fullname() {
205        return parse_err!(
206            iter,
207            "closing element name '{}' does not match openeing element name '{}'",
208            name,
209            parent.fullname()
210        );
211    }
212    Ok(())
213}
214
215fn parse_text(iter: &mut Iter<'_>) -> Result<String> {
216    parse_string(iter, StringType::Element)
217}
218
219#[test]
220fn parse_attribute_value_test_1() {
221    let mut iter = Iter::new(r#"some "fun" attribute value'"#).unwrap();
222    let value = parse_attribute_value(&mut iter, StringType::AttributeSingle).unwrap();
223    assert_eq!(value, r#"some "fun" attribute value"#);
224    assert_eq!(iter.st.c, '\'');
225}