1use std::borrow::Cow;
2
3use thiserror::Error;
4
5use crate::{
6 element::ElementRef,
7 tokenizer::{Position, Token, Tokenizer},
8};
9
10#[derive(Debug, Error)]
11pub enum NxmlErr {
12 #[error("No closing '>' found for ending element </{element}>")]
13 NoClosingSymbolFound { element: String },
14 #[error("Couldn't find a '<' to start parsing with")]
15 NoOpeningSymbolFound,
16 #[error(
17 "Closing element is in wrong order. Expected '</{expected}>', but instead got '{}'", got.as_str()
18 )]
19 MismatchedClosingTag { expected: String, got: String },
20 #[error("parsing tag '{tag}', attribute '{attribute}' - expected '='")]
21 MissingEqualsSign { tag: String, attribute: String },
22 #[error("parsing tag '{tag}', attribute '{attribute}' - expected a \"string\" after =, but none found")]
23 MissingAttributeValue { tag: String, attribute: String },
24 #[error("Expected a name of the element after <")]
25 MissingElementName,
26}
27
28#[derive(Debug, Error)]
29#[error("{err} [{at}]")]
30pub struct NxmlError {
31 pub err: NxmlErr,
32 pub at: Position,
33}
34
35pub fn parse(s: &str) -> Result<ElementRef, NxmlError> {
36 Parser::new(s).parse()
37}
38
39pub fn parse_lenient(s: &str) -> (ElementRef, Vec<NxmlError>) {
40 let mut parser = Parser::new(s).lenient();
41 let element = parser.parse().expect("lenient parser never errors");
42 (element, parser.errors)
43}
44
45#[derive(Debug)]
46struct Parser<'s> {
47 tokenizer: Tokenizer<'s>,
48 errors: Vec<NxmlError>,
49 lenient: bool,
50}
51
52impl<'s> Parser<'s> {
53 fn new(data: &str) -> Parser {
54 Parser {
55 tokenizer: Tokenizer::new(data),
56 errors: Vec::new(),
57 lenient: false,
58 }
59 }
60
61 fn lenient(mut self) -> Self {
62 self.lenient = true;
63 self
64 }
65
66 fn report(&mut self, err: NxmlErr) -> Result<(), NxmlError> {
67 let error = NxmlError {
68 err,
69 at: self.tokenizer.position(),
70 };
71 if self.lenient {
72 self.errors.push(error);
73 return Ok(());
74 }
75 Err(error)
76 }
77
78 fn parse(&mut self) -> Result<ElementRef<'s>, NxmlError> {
79 self.parse_inner(false)
80 }
81
82 fn parse_inner(&mut self, skip_opening_tag: bool) -> Result<ElementRef<'s>, NxmlError> {
83 if !skip_opening_tag && !matches!(self.tokenizer.next_token(), Token::OpenLess) {
84 self.report(NxmlErr::NoOpeningSymbolFound)?;
85 }
86
87 let name = match self.tokenizer.next_token() {
88 Token::String(name) => name,
89 _ => {
90 self.report(NxmlErr::MissingElementName)?;
91 ""
92 }
93 };
94
95 let mut element = ElementRef::new(name);
96
97 loop {
98 match self.tokenizer.next_token() {
99 Token::Eof => return Ok(element),
100 Token::Slash => {
101 if self.tokenizer.take('>') {
102 return Ok(element);
103 }
104 break;
105 }
106 Token::CloseGreater => break,
107 Token::String(name) => {
108 let Token::Equal = self.tokenizer.next_token() else {
109 self.report(NxmlErr::MissingEqualsSign {
110 tag: element.name.to_owned(),
111 attribute: name.to_owned(),
112 })?;
113 continue;
114 };
115
116 let Token::String(value) = self.tokenizer.next_token() else {
117 self.report(NxmlErr::MissingAttributeValue {
118 tag: element.name.to_owned(),
119 attribute: name.to_owned(),
120 })?;
121 continue;
122 };
123
124 element.attributes.insert(name, value);
125 }
126 _ => (),
127 }
128 }
129 loop {
130 match self.tokenizer.next_token() {
131 Token::Eof => return Ok(element),
132 Token::OpenLess => (),
133 token => {
134 match element.text_content {
135 Cow::Borrowed("") => {
136 element.text_content = Cow::Borrowed(token.as_str());
137 }
138 Cow::Borrowed(content) => {
139 element.text_content =
140 Cow::Owned(content.to_owned() + " " + token.as_str())
141 }
142 Cow::Owned(ref mut s) => s.push_str(token.as_str()),
143 }
144 continue;
145 }
146 }
147
148 if !self.tokenizer.take('/') {
149 element.children.push(self.parse_inner(true)?);
150 continue;
151 }
152
153 match self.tokenizer.next_token() {
154 Token::String(name) if name == element.name => {
155 if let Token::CloseGreater = self.tokenizer.next_token() {
156 return Ok(element);
157 }
158 self.report(NxmlErr::NoClosingSymbolFound {
159 element: name.to_owned(),
160 })?;
161 }
162 token => self.report(NxmlErr::MismatchedClosingTag {
163 expected: element.name.to_owned(),
164 got: token.as_str().to_owned(),
165 })?,
166 };
167 return Ok(element);
168 }
169 }
170}
171
172#[cfg(test)]
173mod tests {
174 use super::*;
175
176 #[test]
177 fn parse_single_quote() {
178 let err = parse("\"").unwrap_err();
179 assert!(matches!(err.err, NxmlErr::NoOpeningSymbolFound));
180 }
181}