1use std::borrow::Cow;
2use std::collections::{BTreeMap, VecDeque};
3use std::str::{self, FromStr};
4
5use xmlparser::{ElementEnd, Token, Tokenizer};
6
7use crate::impls::CowStrAccumulator;
8use crate::{Error, Id};
9
10pub struct Deserializer<'cx, 'xml> {
11 pub(crate) local: &'xml str,
12 prefix: Option<&'xml str>,
13 level: usize,
14 done: bool,
15 context: &'cx mut Context<'xml>,
16}
17
18impl<'cx, 'xml> Deserializer<'cx, 'xml> {
19 pub(crate) fn new(element: Element<'xml>, context: &'cx mut Context<'xml>) -> Self {
20 let level = context.stack.len();
21 if !element.empty {
22 context.stack.push(element.level);
23 }
24
25 Self {
26 local: element.local,
27 prefix: element.prefix,
28 level,
29 done: false,
30 context,
31 }
32 }
33
34 pub fn take_str(&mut self) -> Result<Option<Cow<'xml, str>>, Error> {
35 loop {
36 match self.next() {
37 Some(Ok(Node::AttributeValue(s))) => return Ok(Some(s)),
38 Some(Ok(Node::Text(s))) => return Ok(Some(s)),
39 Some(Ok(Node::Attribute(_))) => continue,
40 Some(Ok(node)) => return Err(Error::ExpectedScalar(format!("{node:?}"))),
41 Some(Err(e)) => return Err(e),
42 None => return Ok(None),
43 }
44 }
45 }
46
47 pub fn nested<'a>(&'a mut self, element: Element<'xml>) -> Deserializer<'a, 'xml>
48 where
49 'cx: 'a,
50 {
51 Deserializer::new(element, self.context)
52 }
53
54 pub fn ignore(&mut self) -> Result<(), Error> {
55 loop {
56 match self.next() {
57 Some(Err(e)) => return Err(e),
58 Some(Ok(Node::Open(element))) => {
59 let mut nested = self.nested(element);
60 nested.ignore()?;
61 }
62 Some(_) => continue,
63 None => return Ok(()),
64 }
65 }
66 }
67
68 pub fn for_node<'a>(&'a mut self, node: Node<'xml>) -> Deserializer<'a, 'xml>
69 where
70 'cx: 'a,
71 {
72 self.context.records.push_front(node);
73 Deserializer {
74 local: self.local,
75 prefix: self.prefix,
76 level: self.level,
77 done: self.done,
78 context: self.context,
79 }
80 }
81
82 pub fn parent(&self) -> Id<'xml> {
83 Id {
84 ns: match self.prefix {
85 Some(ns) => self.context.lookup(ns).unwrap(),
86 None => self.context.default_ns(),
87 },
88 name: self.local,
89 }
90 }
91
92 #[inline]
93 pub fn element_id(&self, element: &Element<'xml>) -> Result<Id<'xml>, Error> {
94 self.context.element_id(element)
95 }
96
97 #[inline]
98 pub fn attribute_id(&self, attr: &Attribute<'xml>) -> Result<Id<'xml>, Error> {
99 self.context.attribute_id(attr)
100 }
101}
102
103impl<'xml> Iterator for Deserializer<'_, 'xml> {
104 type Item = Result<Node<'xml>, Error>;
105
106 fn next(&mut self) -> Option<Self::Item> {
107 if self.done {
108 return None;
109 }
110
111 let (prefix, local) = match self.context.next() {
112 Some(Ok(Node::Close { prefix, local })) => (prefix, local),
113 item => return item,
114 };
115
116 if self.context.stack.len() == self.level && local == self.local && prefix == self.prefix {
117 self.done = true;
118 return None;
119 }
120
121 Some(Err(Error::UnexpectedState("close element mismatch")))
122 }
123}
124
125pub(crate) struct Context<'xml> {
126 parser: Tokenizer<'xml>,
127 stack: Vec<Level<'xml>>,
128 records: VecDeque<Node<'xml>>,
129}
130
131impl<'xml> Context<'xml> {
132 pub(crate) fn new(input: &'xml str) -> Result<(Self, Element<'xml>), Error> {
133 let mut new = Self {
134 parser: Tokenizer::from(input),
135 stack: Vec::new(),
136 records: VecDeque::new(),
137 };
138
139 let root = match new.next() {
140 Some(result) => match result? {
141 Node::Open(element) => element,
142 _ => return Err(Error::UnexpectedState("first node does not open element")),
143 },
144 None => return Err(Error::UnexpectedEndOfStream),
145 };
146
147 Ok((new, root))
148 }
149
150 pub(crate) fn element_id(&self, element: &Element<'xml>) -> Result<Id<'xml>, Error> {
151 Ok(Id {
152 ns: match (element.default_ns, element.prefix) {
153 (_, Some(prefix)) => match element.level.prefixes.get(prefix) {
154 Some(ns) => ns,
155 None => match self.lookup(prefix) {
156 Some(ns) => ns,
157 None => return Err(Error::UnknownPrefix(prefix.to_owned())),
158 },
159 },
160 (Some(ns), None) => ns,
161 (None, None) => self.default_ns(),
162 },
163 name: element.local,
164 })
165 }
166
167 fn attribute_id(&self, attr: &Attribute<'xml>) -> Result<Id<'xml>, Error> {
168 Ok(Id {
169 ns: match attr.prefix {
170 Some(ns) => self
171 .lookup(ns)
172 .ok_or_else(|| Error::UnknownPrefix(ns.to_owned()))?,
173 None => "",
174 },
175 name: attr.local,
176 })
177 }
178
179 fn default_ns(&self) -> &'xml str {
180 self.stack
181 .iter()
182 .rev()
183 .find_map(|level| level.default_ns)
184 .unwrap_or("")
185 }
186
187 fn lookup(&self, prefix: &str) -> Option<&'xml str> {
188 if prefix == "xml" {
192 return Some("http://www.w3.org/XML/1998/namespace");
193 }
194
195 self.stack
196 .iter()
197 .rev()
198 .find_map(|level| level.prefixes.get(prefix).copied())
199 }
200}
201
202impl<'xml> Iterator for Context<'xml> {
203 type Item = Result<Node<'xml>, Error>;
204
205 fn next(&mut self) -> Option<Self::Item> {
206 if let Some(record) = self.records.pop_front() {
207 return Some(Ok(record));
208 }
209
210 let mut current = None;
211 loop {
212 let token = match self.parser.next() {
213 Some(v) => v,
214 None => return None,
215 };
216
217 match token {
218 Ok(Token::ElementStart { prefix, local, .. }) => {
219 let prefix = prefix.as_str();
220 current = Some(Level {
221 local: local.as_str(),
222 prefix: match prefix.is_empty() {
223 true => None,
224 false => Some(prefix),
225 },
226 default_ns: None,
227 prefixes: BTreeMap::new(),
228 });
229 }
230 Ok(Token::ElementEnd { end, .. }) => match end {
231 ElementEnd::Open => {
232 let level = match current {
233 Some(level) => level,
234 None => {
235 return Some(Err(Error::UnexpectedState(
236 "opening element with no parent",
237 )))
238 }
239 };
240
241 let element = Element {
242 local: level.local,
243 prefix: level.prefix,
244 default_ns: level.default_ns,
245 level,
246 empty: false,
247 };
248
249 return Some(Ok(Node::Open(element)));
250 }
251 ElementEnd::Close(prefix, v) => {
252 let level = match self.stack.pop() {
253 Some(level) => level,
254 None => {
255 return Some(Err(Error::UnexpectedState(
256 "closing element without parent",
257 )))
258 }
259 };
260
261 let prefix = match prefix.is_empty() {
262 true => None,
263 false => Some(prefix.as_str()),
264 };
265
266 match v.as_str() == level.local && prefix == level.prefix {
267 true => {
268 return Some(Ok(Node::Close {
269 prefix,
270 local: level.local,
271 }))
272 }
273 false => {
274 return Some(Err(Error::UnexpectedState("close element mismatch")))
275 }
276 }
277 }
278 ElementEnd::Empty => {
279 let level = match current {
280 Some(level) => level,
281 None => {
282 return Some(Err(Error::UnexpectedState(
283 "opening element with no parent",
284 )))
285 }
286 };
287
288 self.records.push_back(Node::Close {
289 prefix: level.prefix,
290 local: level.local,
291 });
292
293 let element = Element {
294 local: level.local,
295 prefix: level.prefix,
296 default_ns: level.default_ns,
297 level,
298 empty: true,
299 };
300
301 return Some(Ok(Node::Open(element)));
302 }
303 },
304 Ok(Token::Attribute {
305 prefix,
306 local,
307 value,
308 ..
309 }) => {
310 if prefix.is_empty() && local.as_str() == "xmlns" {
311 match &mut current {
312 Some(level) => level.default_ns = Some(value.as_str()),
313 None => {
314 return Some(Err(Error::UnexpectedState(
315 "attribute without element context",
316 )))
317 }
318 }
319 } else if prefix.as_str() == "xmlns" {
320 match &mut current {
321 Some(level) => {
322 level.prefixes.insert(local.as_str(), value.as_str());
323 }
324 None => {
325 return Some(Err(Error::UnexpectedState(
326 "attribute without element context",
327 )))
328 }
329 }
330 } else {
331 let value = match decode(value.as_str()) {
332 Ok(value) => value,
333 Err(e) => return Some(Err(e)),
334 };
335
336 self.records.push_back(Node::Attribute(Attribute {
337 prefix: match prefix.is_empty() {
338 true => None,
339 false => Some(prefix.as_str()),
340 },
341 local: local.as_str(),
342 value,
343 }));
344 }
345 }
346 Ok(Token::Text { text }) => {
347 return Some(decode(text.as_str()).map(Node::Text));
348 }
349 Ok(Token::Cdata { text, .. }) => {
350 return Some(Ok(Node::Text(Cow::Borrowed(text.as_str()))));
351 }
352 Ok(Token::Declaration { .. }) => {
353 if !self.stack.is_empty() {
354 return Some(Err(Error::UnexpectedToken(format!("{token:?}"))));
355 }
356 }
357 Ok(Token::Comment { .. }) => continue,
358 Ok(token) => return Some(Err(Error::UnexpectedToken(format!("{token:?}")))),
359 Err(e) => return Some(Err(Error::Parse(e))),
360 }
361 }
362 }
363}
364
365pub fn borrow_cow_str<'a, 'xml: 'a>(
366 into: &mut CowStrAccumulator<'xml, 'a>,
367 field: &'static str,
368 deserializer: &mut Deserializer<'_, 'xml>,
369) -> Result<(), Error> {
370 if into.inner.is_some() {
371 return Err(Error::DuplicateValue(field));
372 }
373
374 match deserializer.take_str()? {
375 Some(value) => into.inner = Some(value),
376 None => return Ok(()),
377 };
378
379 deserializer.ignore()?;
380 Ok(())
381}
382
383pub fn borrow_cow_slice_u8<'xml>(
384 into: &mut Option<Cow<'xml, [u8]>>,
385 field: &'static str,
386 deserializer: &mut Deserializer<'_, 'xml>,
387) -> Result<(), Error> {
388 if into.is_some() {
389 return Err(Error::DuplicateValue(field));
390 }
391
392 if let Some(value) = deserializer.take_str()? {
393 *into = Some(match value {
394 Cow::Borrowed(v) => Cow::Borrowed(v.as_bytes()),
395 Cow::Owned(v) => Cow::Owned(v.into_bytes()),
396 });
397 }
398
399 deserializer.ignore()?;
400 Ok(())
401}
402
403fn decode(input: &str) -> Result<Cow<'_, str>, Error> {
404 let mut result = String::with_capacity(input.len());
405 let (mut state, mut last_end) = (DecodeState::Normal, 0);
406 for (i, &b) in input.as_bytes().iter().enumerate() {
407 state = match (state, b) {
409 (DecodeState::Normal, b'&') => DecodeState::Entity([0; 6], 0),
410 (DecodeState::Normal, _) => DecodeState::Normal,
411 (DecodeState::Entity(chars, len), b';') => {
412 let decoded = match &chars[..len] {
413 [b'a', b'm', b'p'] => '&',
414 [b'a', b'p', b'o', b's'] => '\'',
415 [b'g', b't'] => '>',
416 [b'l', b't'] => '<',
417 [b'q', b'u', b'o', b't'] => '"',
418 [b'#', b'x' | b'X', hex @ ..] => {
419 str::from_utf8(hex)
421 .ok()
422 .and_then(|hex_str| u32::from_str_radix(hex_str, 16).ok())
423 .and_then(char::from_u32)
424 .filter(valid_xml_character)
425 .ok_or_else(|| {
426 Error::InvalidEntity(
427 String::from_utf8_lossy(&chars[..len]).into_owned(),
428 )
429 })?
430 }
431 [b'#', decimal @ ..] => {
432 str::from_utf8(decimal)
434 .ok()
435 .and_then(|decimal_str| u32::from_str(decimal_str).ok())
436 .and_then(char::from_u32)
437 .filter(valid_xml_character)
438 .ok_or_else(|| {
439 Error::InvalidEntity(
440 String::from_utf8_lossy(&chars[..len]).into_owned(),
441 )
442 })?
443 }
444 _ => {
445 return Err(Error::InvalidEntity(
446 String::from_utf8_lossy(&chars[..len]).into_owned(),
447 ))
448 }
449 };
450
451 let start = i - (len + 1); if last_end < start {
453 result.push_str(input.get(last_end..start).unwrap());
455 }
456
457 last_end = i + 1;
458 result.push(decoded);
459 DecodeState::Normal
460 }
461 (DecodeState::Entity(mut chars, len), b) => {
462 if len >= 6 {
463 let mut bytes = Vec::with_capacity(7);
464 bytes.extend(&chars[..len]);
465 bytes.push(b);
466 return Err(Error::InvalidEntity(
467 String::from_utf8_lossy(&bytes).into_owned(),
468 ));
469 }
470
471 chars[len] = b;
472 DecodeState::Entity(chars, len + 1)
473 }
474 };
475 }
476
477 if let DecodeState::Entity(chars, len) = state {
479 return Err(Error::InvalidEntity(
480 String::from_utf8_lossy(&chars[..len]).into_owned(),
481 ));
482 }
483
484 Ok(match result.is_empty() {
485 true => Cow::Borrowed(input),
486 false => {
487 result.push_str(input.get(last_end..input.len()).unwrap());
489 Cow::Owned(result)
490 }
491 })
492}
493
494#[derive(Debug)]
495enum DecodeState {
496 Normal,
497 Entity([u8; 6], usize),
498}
499
500fn valid_xml_character(c: &char) -> bool {
502 matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}')
503}
504
505#[derive(Debug)]
506pub enum Node<'xml> {
507 Attribute(Attribute<'xml>),
508 AttributeValue(Cow<'xml, str>),
509 Close {
510 prefix: Option<&'xml str>,
511 local: &'xml str,
512 },
513 Text(Cow<'xml, str>),
514 Open(Element<'xml>),
515}
516
517#[derive(Debug)]
518pub struct Element<'xml> {
519 local: &'xml str,
520 default_ns: Option<&'xml str>,
521 prefix: Option<&'xml str>,
522 level: Level<'xml>,
523 empty: bool,
524}
525
526#[derive(Debug)]
527struct Level<'xml> {
528 local: &'xml str,
529 prefix: Option<&'xml str>,
530 default_ns: Option<&'xml str>,
531 prefixes: BTreeMap<&'xml str, &'xml str>,
532}
533
534#[derive(Debug)]
535pub struct Attribute<'xml> {
536 pub prefix: Option<&'xml str>,
537 pub local: &'xml str,
538 pub value: Cow<'xml, str>,
539}
540
541#[cfg(test)]
542mod tests {
543 use super::*;
544
545 #[test]
546 fn test_decode() {
547 decode_ok("foo", "foo");
548 decode_ok("foo & bar", "foo & bar");
549 decode_ok("foo < bar", "foo < bar");
550 decode_ok("foo > bar", "foo > bar");
551 decode_ok("foo " bar", "foo \" bar");
552 decode_ok("foo ' bar", "foo ' bar");
553 decode_ok("foo &lt; bar", "foo < bar");
554 decode_ok("& foo", "& foo");
555 decode_ok("foo &", "foo &");
556 decode_ok("cbdtéda&sü", "cbdtéda&sü");
557 decode_ok("Ӓ", "Ӓ");
559 decode_ok("foo 	 bar", "foo \t bar");
560 decode_ok("foo | bar", "foo | bar");
561 decode_ok("foo Ӓ bar", "foo Ӓ bar");
562 decode_ok("Ä", "Ä");
564 decode_ok("Ä", "Ä");
565 decode_ok("foo 	 bar", "foo \t bar");
566 decode_ok("foo | bar", "foo | bar");
567 decode_ok("foo Ä bar", "foo Ä bar");
568 decode_ok("foo Ä bar", "foo Ä bar");
569 decode_ok("foo პ bar", "foo პ bar");
570
571 decode_err("&");
572 decode_err("&#");
573 decode_err("&#;");
574 decode_err("foo&");
575 decode_err("&bar");
576 decode_err("&foo;");
577 decode_err("&foobar;");
578 decode_err("cbdtéd&ü");
579 }
580
581 fn decode_ok(input: &str, expected: &'static str) {
582 assert_eq!(super::decode(input).unwrap(), expected, "{input:?}");
583 }
584
585 fn decode_err(input: &str) {
586 assert!(super::decode(input).is_err(), "{input:?}");
587 }
588}