xml_data/quick_xml/
parser.rs

1use crate::{
2	extensions::*,
3	parser::{
4		ElementParser,
5		ElementState,
6		Element,
7	},
8	Result,
9	errors,
10};
11use quick_xml::events::Event;
12use std::io::BufRead;
13
14/// Parser adaptor for `quick_xml::Reader`
15pub struct Parser<'a, 'r, B: BufRead> {
16	inner: &'r mut quick_xml::Reader<B>,
17	buf: &'a mut Vec<u8>, // buf is used for pending; must not be touched if pending isn't None.
18	pending: Option<Event<'a>>,
19}
20
21impl<'a, 'r, B: BufRead> Parser<'a, 'r, B> {
22	/// Create a new adaptor from a reader and a buffer
23	pub fn new(inner: &'r mut quick_xml::Reader<B>, buf: &'a mut Vec<u8>) -> Self {
24		Self {
25			inner,
26			buf,
27			pending: None,
28		}
29	}
30
31	fn shift(&mut self) -> Result<()> {
32		self.pending = None; // release buf reference
33		let buf: &'static mut Vec<u8> = unsafe { &mut *(self.buf as *mut _) };
34		self.pending = Some(self.inner.read_event(buf)?);
35		Ok(())
36	}
37
38	fn peek(&mut self) -> Result<Event<'a>> {
39		if self.pending.is_none() { self.shift()?; }
40		Ok(self.pending.clone().expect("can't be None"))
41	}
42
43	fn clear(&mut self) {
44		self.pending = None;
45	}
46
47	/// Parse a single (root) element from reading a document
48	///
49	/// Uses the default state type for the returned element.
50	pub fn parse_document<E: Element>(&mut self) -> Result<E> {
51		self.parse_document_for_state::<E::ParseState>()
52	}
53
54	/// Parse a single (root) element from reading a document
55	///
56	/// Uses the given state type.
57	pub fn parse_document_for_state<S: ElementState>(&mut self) -> Result<S::Output> {
58		let mut output = None;
59		loop {
60			match self.peek()? {
61				Event::Eof => {
62					if let Some(o) = output {
63						return Ok(o);
64					}
65					return Err(errors::unexpected_eof("empty document"));
66				},
67				Event::End(_) => {
68					return Err(errors::unexpected_end());
69				},
70				Event::Start(s)|Event::Empty(s) => {
71					let tag = self.inner.decode(s.name());
72					let mut finished_inner = false;
73					let p = PRef { parser: self, finished_element: &mut finished_inner };
74					output = Some(p.parse_element::<S>(&tag)?);
75					if !finished_inner {
76						return Err(errors::inner_element_not_parsed(&tag));
77					}
78					continue;
79				},
80				// not supported
81				Event::PI(_) => return Err(errors::unexpected_pi()),
82				// ignore those at document level before the root element
83				Event::Decl(_) => {
84					if output.is_some() {
85						return Err(errors::unexpected_decl());
86					}
87				},
88				Event::DocType(_) => {
89					if output.is_some() {
90						return Err(errors::unexpected_doctype());
91					}
92				},
93				// ignore comments
94				Event::Comment(_) => (),
95				// text+cdata
96				Event::Text(t)|Event::CData(t) => {
97					let t = t.unescape_and_decode(self.inner)?;
98					if !t.trim().is_empty() {
99						return Err(errors::unexpected_text());
100					}
101				},
102			}
103			// Start+Empty continue directly; everything else needs to be cleared so we don't read it again
104			self.clear();
105		}
106	}
107}
108
109struct PRef<'x, 'a, 'r, B: BufRead> {
110	parser: &'x mut Parser<'a, 'r, B>,
111	finished_element: &'x mut bool,
112}
113
114impl<'x, 'a, 'r, B: BufRead> ElementParser for PRef<'x, 'a, 'r, B> {
115	fn parse_element_state<E: ElementState>(self, state: &mut E) -> Result<()> {
116		let (start, closed) = match self.parser.peek()? {
117			Event::Start(s) => (s, false),
118			Event::Empty(s) => (s, true),
119			_ => panic!("Element::read requires start or empty event"),
120		};
121
122		// TODO: quick-xml decoding sucks. no proper handling, "encoding" feature breaks API.
123		// improve quick-xml, then use it here
124
125		for attr in start.attributes() {
126			let attr = attr?;
127			let attr_key = self.parser.inner.decode(attr.key);
128			let attr_value = attr.unescape_and_decode_value(self.parser.inner)?;
129			state.parse_element_attribute(&attr_key, attr_value.into())?;
130		}
131
132		self.parser.clear(); // consume start tag
133
134		// read inner (unless there is no inner)
135		if closed {
136			*self.finished_element = true;
137			return Ok(());
138		}
139
140		loop {
141			match self.parser.peek()? {
142				Event::Eof => return Err(errors::unexpected_eof("unclosed element")),
143				Event::End(_) => {
144					self.parser.clear();
145					*self.finished_element = true;
146					return Ok(());
147				},
148				Event::Start(s)|Event::Empty(s) => {
149					let tag = self.parser.inner.decode(s.name());
150					let mut finished_inner = false;
151					let p = PRef { parser: self.parser, finished_element: &mut finished_inner };
152					state.parse_element_inner_node(&tag, p)?;
153					if !finished_inner {
154						return Err(errors::inner_element_not_parsed(&tag));
155					}
156					continue;
157				},
158				// not supported
159				Event::PI(_) => return Err(errors::unexpected_pi()),
160				// within elements those shouldn't be there
161				Event::Decl(_) => return Err(errors::unexpected_decl()),
162				Event::DocType(_) => return Err(errors::unexpected_doctype()),
163				// ignore comments
164				Event::Comment(_) => (),
165				// text+cdata
166				Event::Text(t)|Event::CData(t) => {
167					let t = t.unescape_and_decode(self.parser.inner)?;
168					state.parse_element_inner_text(t.into())?;
169				},
170			}
171			// Start+Empty continue directly; everything else needs to be cleared so we don't read it again
172			self.parser.clear();
173		}
174	}
175}
176
177#[cfg(test)]
178mod test {
179	use crate::Result;
180	use crate::test_struct::*;
181
182	fn parse<T: super::Element>(input: &str) -> Result<T> {
183		let mut r = quick_xml::Reader::from_reader(std::io::Cursor::new(input));
184		let mut buf = Vec::new();
185		let mut p = super::Parser::new(&mut r, &mut buf);
186		p.parse_document::<T>()
187	}
188
189	#[test]
190	fn test() {
191		assert_eq!(
192			parse::<Data>(Data::TEST_PARSE_DOCUMENT_1).unwrap(),
193			Data::TEST_RESULT_1,
194		);
195	}
196}