nxml_rs/
parser.rs

1use std::borrow::Cow;
2
3use thiserror::Error;
4
5use crate::{
6    element::ElementRef,
7    tokenizer::{Position, Token, Tokenizer},
8};
9
10#[derive(Debug, Error)]
11pub enum NxmlErr {
12    #[error("No closing '>' found for ending element </{element}>")]
13    NoClosingSymbolFound { element: String },
14    #[error("Couldn't find a '<' to start parsing with")]
15    NoOpeningSymbolFound,
16    #[error(
17        "Closing element is in wrong order. Expected '</{expected}>', but instead got '{}'", got.as_str()
18    )]
19    MismatchedClosingTag { expected: String, got: String },
20    #[error("parsing tag '{tag}', attribute '{attribute}' - expected '='")]
21    MissingEqualsSign { tag: String, attribute: String },
22    #[error("parsing tag '{tag}', attribute '{attribute}' - expected a \"string\" after =, but none found")]
23    MissingAttributeValue { tag: String, attribute: String },
24    #[error("Expected a name of the element after <")]
25    MissingElementName,
26}
27
28#[derive(Debug, Error)]
29#[error("{err} [{at}]")]
30pub struct NxmlError {
31    pub err: NxmlErr,
32    pub at: Position,
33}
34
35pub fn parse(s: &str) -> Result<ElementRef, NxmlError> {
36    Parser::new(s).parse()
37}
38
39pub fn parse_lenient(s: &str) -> (ElementRef, Vec<NxmlError>) {
40    let mut parser = Parser::new(s).lenient();
41    let element = parser.parse().expect("lenient parser never errors");
42    (element, parser.errors)
43}
44
45#[derive(Debug)]
46struct Parser<'s> {
47    tokenizer: Tokenizer<'s>,
48    errors: Vec<NxmlError>,
49    lenient: bool,
50}
51
52impl<'s> Parser<'s> {
53    fn new(data: &str) -> Parser {
54        Parser {
55            tokenizer: Tokenizer::new(data),
56            errors: Vec::new(),
57            lenient: false,
58        }
59    }
60
61    fn lenient(mut self) -> Self {
62        self.lenient = true;
63        self
64    }
65
66    fn report(&mut self, err: NxmlErr) -> Result<(), NxmlError> {
67        let error = NxmlError {
68            err,
69            at: self.tokenizer.position(),
70        };
71        if self.lenient {
72            self.errors.push(error);
73            return Ok(());
74        }
75        Err(error)
76    }
77
78    fn parse(&mut self) -> Result<ElementRef<'s>, NxmlError> {
79        self.parse_inner(false)
80    }
81
82    fn parse_inner(&mut self, skip_opening_tag: bool) -> Result<ElementRef<'s>, NxmlError> {
83        if !skip_opening_tag && !matches!(self.tokenizer.next_token(), Token::OpenLess) {
84            self.report(NxmlErr::NoOpeningSymbolFound)?;
85        }
86
87        let name = match self.tokenizer.next_token() {
88            Token::String(name) => name,
89            _ => {
90                self.report(NxmlErr::MissingElementName)?;
91                ""
92            }
93        };
94
95        let mut element = ElementRef::new(name);
96
97        loop {
98            match self.tokenizer.next_token() {
99                Token::Eof => return Ok(element),
100                Token::Slash => {
101                    if self.tokenizer.take('>') {
102                        return Ok(element);
103                    }
104                    break;
105                }
106                Token::CloseGreater => break,
107                Token::String(name) => {
108                    let Token::Equal = self.tokenizer.next_token() else {
109                        self.report(NxmlErr::MissingEqualsSign {
110                            tag: element.name.to_owned(),
111                            attribute: name.to_owned(),
112                        })?;
113                        continue;
114                    };
115
116                    let Token::String(value) = self.tokenizer.next_token() else {
117                        self.report(NxmlErr::MissingAttributeValue {
118                            tag: element.name.to_owned(),
119                            attribute: name.to_owned(),
120                        })?;
121                        continue;
122                    };
123
124                    element.attributes.insert(name, value);
125                }
126                _ => (),
127            }
128        }
129        loop {
130            match self.tokenizer.next_token() {
131                Token::Eof => return Ok(element),
132                Token::OpenLess => (),
133                token => {
134                    match element.text_content {
135                        Cow::Borrowed("") => {
136                            element.text_content = Cow::Borrowed(token.as_str());
137                        }
138                        Cow::Borrowed(content) => {
139                            element.text_content =
140                                Cow::Owned(content.to_owned() + " " + token.as_str())
141                        }
142                        Cow::Owned(ref mut s) => s.push_str(token.as_str()),
143                    }
144                    continue;
145                }
146            }
147
148            if !self.tokenizer.take('/') {
149                element.children.push(self.parse_inner(true)?);
150                continue;
151            }
152
153            match self.tokenizer.next_token() {
154                Token::String(name) if name == element.name => {
155                    if let Token::CloseGreater = self.tokenizer.next_token() {
156                        return Ok(element);
157                    }
158                    self.report(NxmlErr::NoClosingSymbolFound {
159                        element: name.to_owned(),
160                    })?;
161                }
162                token => self.report(NxmlErr::MismatchedClosingTag {
163                    expected: element.name.to_owned(),
164                    got: token.as_str().to_owned(),
165                })?,
166            };
167            return Ok(element);
168        }
169    }
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175
176    #[test]
177    fn parse_single_quote() {
178        let err = parse("\"").unwrap_err();
179        assert!(matches!(err.err, NxmlErr::NoOpeningSymbolFound));
180    }
181}