1use std::borrow::Cow;
2use std::collections::{BTreeMap, VecDeque};
3use std::str::{self, FromStr};
4
5use xmlparser::{ElementEnd, Token, Tokenizer};
6
7use crate::impls::CowStrAccumulator;
8use crate::{Error, Id};
9
10pub struct Deserializer<'cx, 'xml> {
11 pub(crate) local: &'xml str,
12 prefix: Option<&'xml str>,
13 level: usize,
14 done: bool,
15 context: &'cx mut Context<'xml>,
16}
17
18impl<'cx, 'xml> Deserializer<'cx, 'xml> {
19 pub(crate) fn new(element: Element<'xml>, context: &'cx mut Context<'xml>) -> Self {
20 let level = context.stack.len();
21 Self {
22 local: element.local,
23 prefix: element.prefix,
24 level,
25 done: false,
26 context,
27 }
28 }
29
30 pub fn take_str(&mut self) -> Result<Option<Cow<'xml, str>>, Error> {
31 loop {
32 match self.next() {
33 Some(Ok(Node::AttributeValue(s))) => return Ok(Some(s)),
34 Some(Ok(Node::Text(s))) => return Ok(Some(s)),
35 Some(Ok(Node::Attribute(_))) => continue,
36 Some(Ok(node)) => return Err(Error::ExpectedScalar(format!("{node:?}"))),
37 Some(Err(e)) => return Err(e),
38 None => return Ok(None),
39 }
40 }
41 }
42
43 pub fn nested<'a>(&'a mut self, element: Element<'xml>) -> Deserializer<'a, 'xml>
44 where
45 'cx: 'a,
46 {
47 Deserializer::new(element, self.context)
48 }
49
50 pub fn ignore(&mut self) -> Result<(), Error> {
51 loop {
52 match self.next() {
53 Some(Err(e)) => return Err(e),
54 Some(Ok(Node::Open(element))) => {
55 let mut nested = self.nested(element);
56 nested.ignore()?;
57 }
58 Some(_) => continue,
59 None => return Ok(()),
60 }
61 }
62 }
63
64 pub fn for_node<'a>(&'a mut self, node: Node<'xml>) -> Deserializer<'a, 'xml>
65 where
66 'cx: 'a,
67 {
68 self.context.records.push_front(node);
69 Deserializer {
70 local: self.local,
71 prefix: self.prefix,
72 level: self.level,
73 done: self.done,
74 context: self.context,
75 }
76 }
77
78 pub fn parent(&self) -> Id<'xml> {
79 Id {
80 ns: match self.prefix {
81 Some(ns) => self.context.lookup(ns).unwrap(),
82 None => self.context.default_ns(),
83 },
84 name: self.local,
85 }
86 }
87
88 #[inline]
89 pub fn element_id(&self, element: &Element<'xml>) -> Result<Id<'xml>, Error> {
90 self.context.element_id(element)
91 }
92
93 #[inline]
94 pub fn attribute_id(&self, attr: &Attribute<'xml>) -> Result<Id<'xml>, Error> {
95 self.context.attribute_id(attr)
96 }
97}
98
99impl<'xml> Iterator for Deserializer<'_, 'xml> {
100 type Item = Result<Node<'xml>, Error>;
101
102 fn next(&mut self) -> Option<Self::Item> {
103 if self.done {
104 return None;
105 }
106
107 let (prefix, local) = match self.context.next() {
108 Some(Ok(Node::Close { prefix, local })) => (prefix, local),
109 item => return item,
110 };
111
112 if self.context.stack.len() == self.level - 1
113 && local == self.local
114 && prefix == self.prefix
115 {
116 self.done = true;
117 return None;
118 }
119
120 Some(Err(Error::UnexpectedState("close element mismatch")))
121 }
122}
123
124pub(crate) struct Context<'xml> {
125 parser: Tokenizer<'xml>,
126 stack: Vec<Level<'xml>>,
127 records: VecDeque<Node<'xml>>,
128}
129
130impl<'xml> Context<'xml> {
131 pub(crate) fn new(input: &'xml str) -> Result<(Self, Element<'xml>), Error> {
132 let mut new = Self {
133 parser: Tokenizer::from(input),
134 stack: Vec::new(),
135 records: VecDeque::new(),
136 };
137
138 let root = match new.next() {
139 Some(result) => match result? {
140 Node::Open(element) => element,
141 _ => return Err(Error::UnexpectedState("first node does not open element")),
142 },
143 None => return Err(Error::UnexpectedEndOfStream),
144 };
145
146 Ok((new, root))
147 }
148
149 pub(crate) fn element_id(&self, element: &Element<'xml>) -> Result<Id<'xml>, Error> {
150 Ok(Id {
151 ns: match (element.default_ns, element.prefix) {
152 (_, Some(prefix)) => match self.lookup(prefix) {
153 Some(ns) => ns,
154 None => return Err(Error::UnknownPrefix(prefix.to_owned())),
155 },
156 (Some(ns), None) => ns,
157 (None, None) => self.default_ns(),
158 },
159 name: element.local,
160 })
161 }
162
163 fn attribute_id(&self, attr: &Attribute<'xml>) -> Result<Id<'xml>, Error> {
164 Ok(Id {
165 ns: match attr.prefix {
166 Some(ns) => self
167 .lookup(ns)
168 .ok_or_else(|| Error::UnknownPrefix(ns.to_owned()))?,
169 None => "",
170 },
171 name: attr.local,
172 })
173 }
174
175 fn default_ns(&self) -> &'xml str {
176 self.stack
177 .iter()
178 .rev()
179 .find_map(|level| level.default_ns)
180 .unwrap_or("")
181 }
182
183 fn lookup(&self, prefix: &str) -> Option<&'xml str> {
184 if prefix == "xml" {
188 return Some("http://www.w3.org/XML/1998/namespace");
189 }
190
191 self.stack
192 .iter()
193 .rev()
194 .find_map(|level| level.prefixes.get(prefix).copied())
195 }
196}
197
198impl<'xml> Iterator for Context<'xml> {
199 type Item = Result<Node<'xml>, Error>;
200
201 fn next(&mut self) -> Option<Self::Item> {
202 if let Some(record) = self.records.pop_front() {
203 if let Node::Close { .. } = &record {
204 self.stack.pop();
205 }
206 return Some(Ok(record));
207 }
208
209 loop {
210 match self.parser.next()? {
211 Ok(Token::ElementStart { prefix, local, .. }) => {
212 let prefix = prefix.as_str();
213 self.stack.push(Level {
214 local: local.as_str(),
215 prefix: match prefix.is_empty() {
216 true => None,
217 false => Some(prefix),
218 },
219 default_ns: None,
220 prefixes: BTreeMap::new(),
221 });
222 }
223 Ok(Token::ElementEnd { end, .. }) => match end {
224 ElementEnd::Open => {
225 let level = match self.stack.last() {
226 Some(level) => level,
227 None => {
228 return Some(Err(Error::UnexpectedState(
229 "opening element with no parent",
230 )))
231 }
232 };
233
234 let element = Element {
235 local: level.local,
236 prefix: level.prefix,
237 default_ns: level.default_ns,
238 };
239
240 return Some(Ok(Node::Open(element)));
241 }
242 ElementEnd::Close(prefix, v) => {
243 let level = match self.stack.pop() {
244 Some(level) => level,
245 None => {
246 return Some(Err(Error::UnexpectedState(
247 "closing element without parent",
248 )))
249 }
250 };
251
252 let prefix = match prefix.is_empty() {
253 true => None,
254 false => Some(prefix.as_str()),
255 };
256
257 match v.as_str() == level.local && prefix == level.prefix {
258 true => {
259 return Some(Ok(Node::Close {
260 prefix,
261 local: level.local,
262 }))
263 }
264 false => {
265 return Some(Err(Error::UnexpectedState("close element mismatch")))
266 }
267 }
268 }
269 ElementEnd::Empty => {
270 let level = match self.stack.last() {
271 Some(level) => level,
272 None => {
273 return Some(Err(Error::UnexpectedState(
274 "opening element with no parent",
275 )))
276 }
277 };
278
279 self.records.push_back(Node::Close {
280 prefix: level.prefix,
281 local: level.local,
282 });
283
284 let element = Element {
285 local: level.local,
286 prefix: level.prefix,
287 default_ns: level.default_ns,
288 };
289
290 return Some(Ok(Node::Open(element)));
291 }
292 },
293 Ok(Token::Attribute {
294 prefix,
295 local,
296 value,
297 ..
298 }) => {
299 if prefix.is_empty() && local.as_str() == "xmlns" {
300 match self.stack.last_mut() {
301 Some(level) => level.default_ns = Some(value.as_str()),
302 None => {
303 return Some(Err(Error::UnexpectedState(
304 "attribute without element context",
305 )))
306 }
307 }
308 } else if prefix.as_str() == "xmlns" {
309 match self.stack.last_mut() {
310 Some(level) => {
311 level.prefixes.insert(local.as_str(), value.as_str());
312 }
313 None => {
314 return Some(Err(Error::UnexpectedState(
315 "attribute without element context",
316 )))
317 }
318 }
319 } else {
320 let value = match decode(value.as_str()) {
321 Ok(value) => value,
322 Err(e) => return Some(Err(e)),
323 };
324
325 self.records.push_back(Node::Attribute(Attribute {
326 prefix: match prefix.is_empty() {
327 true => None,
328 false => Some(prefix.as_str()),
329 },
330 local: local.as_str(),
331 value,
332 }));
333 }
334 }
335 Ok(Token::Text { text }) => {
336 return Some(decode(text.as_str()).map(Node::Text));
337 }
338 Ok(Token::Cdata { text, .. }) => {
339 return Some(Ok(Node::Text(Cow::Borrowed(text.as_str()))));
340 }
341 Ok(token @ Token::Declaration { .. }) => {
342 if !self.stack.is_empty() {
343 return Some(Err(Error::UnexpectedToken(format!("{token:?}"))));
344 }
345 }
346 Ok(Token::Comment { .. }) => continue,
347 Ok(token) => return Some(Err(Error::UnexpectedToken(format!("{token:?}")))),
348 Err(e) => return Some(Err(Error::Parse(e))),
349 }
350 }
351 }
352}
353
354pub fn borrow_cow_str<'a, 'xml: 'a>(
355 into: &mut CowStrAccumulator<'xml, 'a>,
356 field: &'static str,
357 deserializer: &mut Deserializer<'_, 'xml>,
358) -> Result<(), Error> {
359 if into.inner.is_some() {
360 return Err(Error::DuplicateValue(field));
361 }
362
363 match deserializer.take_str()? {
364 Some(value) => into.inner = Some(value),
365 None => return Ok(()),
366 };
367
368 deserializer.ignore()?;
369 Ok(())
370}
371
372pub fn borrow_cow_slice_u8<'xml>(
373 into: &mut Option<Cow<'xml, [u8]>>,
374 field: &'static str,
375 deserializer: &mut Deserializer<'_, 'xml>,
376) -> Result<(), Error> {
377 if into.is_some() {
378 return Err(Error::DuplicateValue(field));
379 }
380
381 if let Some(value) = deserializer.take_str()? {
382 *into = Some(match value {
383 Cow::Borrowed(v) => Cow::Borrowed(v.as_bytes()),
384 Cow::Owned(v) => Cow::Owned(v.into_bytes()),
385 });
386 }
387
388 deserializer.ignore()?;
389 Ok(())
390}
391
392fn decode(input: &str) -> Result<Cow<'_, str>, Error> {
393 let mut result = String::with_capacity(input.len());
394 let (mut state, mut last_end) = (DecodeState::Normal, 0);
395 for (i, &b) in input.as_bytes().iter().enumerate() {
396 state = match (state, b) {
398 (DecodeState::Normal, b'&') => DecodeState::Entity([0; 6], 0),
399 (DecodeState::Normal, _) => DecodeState::Normal,
400 (DecodeState::Entity(chars, len), b';') => {
401 let decoded = match &chars[..len] {
402 [b'a', b'm', b'p'] => '&',
403 [b'a', b'p', b'o', b's'] => '\'',
404 [b'g', b't'] => '>',
405 [b'l', b't'] => '<',
406 [b'q', b'u', b'o', b't'] => '"',
407 [b'#', b'x' | b'X', hex @ ..] => {
408 str::from_utf8(hex)
410 .ok()
411 .and_then(|hex_str| u32::from_str_radix(hex_str, 16).ok())
412 .and_then(char::from_u32)
413 .filter(valid_xml_character)
414 .ok_or_else(|| {
415 Error::InvalidEntity(
416 String::from_utf8_lossy(&chars[..len]).into_owned(),
417 )
418 })?
419 }
420 [b'#', decimal @ ..] => {
421 str::from_utf8(decimal)
423 .ok()
424 .and_then(|decimal_str| u32::from_str(decimal_str).ok())
425 .and_then(char::from_u32)
426 .filter(valid_xml_character)
427 .ok_or_else(|| {
428 Error::InvalidEntity(
429 String::from_utf8_lossy(&chars[..len]).into_owned(),
430 )
431 })?
432 }
433 _ => {
434 return Err(Error::InvalidEntity(
435 String::from_utf8_lossy(&chars[..len]).into_owned(),
436 ))
437 }
438 };
439
440 let start = i - (len + 1); if last_end < start {
442 result.push_str(input.get(last_end..start).unwrap());
444 }
445
446 last_end = i + 1;
447 result.push(decoded);
448 DecodeState::Normal
449 }
450 (DecodeState::Entity(mut chars, len), b) => {
451 if len >= 6 {
452 let mut bytes = Vec::with_capacity(7);
453 bytes.extend(&chars[..len]);
454 bytes.push(b);
455 return Err(Error::InvalidEntity(
456 String::from_utf8_lossy(&bytes).into_owned(),
457 ));
458 }
459
460 chars[len] = b;
461 DecodeState::Entity(chars, len + 1)
462 }
463 };
464 }
465
466 if let DecodeState::Entity(chars, len) = state {
468 return Err(Error::InvalidEntity(
469 String::from_utf8_lossy(&chars[..len]).into_owned(),
470 ));
471 }
472
473 Ok(match result.is_empty() {
474 true => Cow::Borrowed(input),
475 false => {
476 result.push_str(input.get(last_end..input.len()).unwrap());
478 Cow::Owned(result)
479 }
480 })
481}
482
483#[derive(Debug)]
484enum DecodeState {
485 Normal,
486 Entity([u8; 6], usize),
487}
488
489fn valid_xml_character(c: &char) -> bool {
491 matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}')
492}
493
494#[derive(Debug)]
495pub enum Node<'xml> {
496 Attribute(Attribute<'xml>),
497 AttributeValue(Cow<'xml, str>),
498 Close {
499 prefix: Option<&'xml str>,
500 local: &'xml str,
501 },
502 Text(Cow<'xml, str>),
503 Open(Element<'xml>),
504}
505
506#[derive(Debug)]
507pub struct Element<'xml> {
508 local: &'xml str,
509 default_ns: Option<&'xml str>,
510 prefix: Option<&'xml str>,
511}
512
513#[derive(Debug)]
514struct Level<'xml> {
515 local: &'xml str,
516 prefix: Option<&'xml str>,
517 default_ns: Option<&'xml str>,
518 prefixes: BTreeMap<&'xml str, &'xml str>,
519}
520
521#[derive(Debug)]
522pub struct Attribute<'xml> {
523 pub prefix: Option<&'xml str>,
524 pub local: &'xml str,
525 pub value: Cow<'xml, str>,
526}
527
528#[cfg(test)]
529mod tests {
530 use super::*;
531
532 #[test]
533 fn test_decode() {
534 decode_ok("foo", "foo");
535 decode_ok("foo & bar", "foo & bar");
536 decode_ok("foo < bar", "foo < bar");
537 decode_ok("foo > bar", "foo > bar");
538 decode_ok("foo " bar", "foo \" bar");
539 decode_ok("foo ' bar", "foo ' bar");
540 decode_ok("foo &lt; bar", "foo < bar");
541 decode_ok("& foo", "& foo");
542 decode_ok("foo &", "foo &");
543 decode_ok("cbdtéda&sü", "cbdtéda&sü");
544 decode_ok("Ӓ", "Ӓ");
546 decode_ok("foo 	 bar", "foo \t bar");
547 decode_ok("foo | bar", "foo | bar");
548 decode_ok("foo Ӓ bar", "foo Ӓ bar");
549 decode_ok("Ä", "Ä");
551 decode_ok("Ä", "Ä");
552 decode_ok("foo 	 bar", "foo \t bar");
553 decode_ok("foo | bar", "foo | bar");
554 decode_ok("foo Ä bar", "foo Ä bar");
555 decode_ok("foo Ä bar", "foo Ä bar");
556 decode_ok("foo პ bar", "foo პ bar");
557
558 decode_err("&");
559 decode_err("&#");
560 decode_err("&#;");
561 decode_err("foo&");
562 decode_err("&bar");
563 decode_err("&foo;");
564 decode_err("&foobar;");
565 decode_err("cbdtéd&ü");
566 }
567
568 fn decode_ok(input: &str, expected: &'static str) {
569 assert_eq!(super::decode(input).unwrap(), expected, "{input:?}");
570 }
571
572 fn decode_err(input: &str) {
573 assert!(super::decode(input).is_err(), "{input:?}");
574 }
575}