1extern crate alloc;
2
3use alloc::borrow::Cow;
4use alloc::collections::BTreeMap;
5use alloc::string::String;
6use alloc::vec::Vec;
7use core::fmt;
8
9use facet_format::{
10 ContainerKind, FieldEvidence, FieldKey, FieldLocationHint, FormatParser, ParseEvent,
11 ProbeStream, ScalarValue,
12};
13use quick_xml::NsReader;
14use quick_xml::escape::resolve_xml_entity;
15use quick_xml::events::Event;
16use quick_xml::name::ResolveResult;
17use std::io::Cursor;
18
19#[derive(Debug, Clone, PartialEq, Eq)]
26#[allow(dead_code)] struct QName {
28 namespace: Option<String>,
34 local_name: String,
36}
37
38#[allow(dead_code)] impl QName {
40 fn local(name: impl Into<String>) -> Self {
42 Self {
43 namespace: None,
44 local_name: name.into(),
45 }
46 }
47
48 fn with_ns(namespace: impl Into<String>, local_name: impl Into<String>) -> Self {
50 Self {
51 namespace: Some(namespace.into()),
52 local_name: local_name.into(),
53 }
54 }
55
56 fn matches(&self, local_name: &str, expected_ns: Option<&str>) -> bool {
61 if self.local_name != local_name {
62 return false;
63 }
64 match expected_ns {
65 None => true, Some(ns) => self.namespace.as_deref() == Some(ns),
67 }
68 }
69}
70
71impl fmt::Display for QName {
72 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73 match &self.namespace {
74 Some(ns) => write!(f, "{{{}}}{}", ns, self.local_name),
75 None => write!(f, "{}", self.local_name),
76 }
77 }
78}
79
80pub struct XmlParser<'de> {
81 events: Vec<ParseEvent<'de>>,
82 idx: usize,
83 pending_error: Option<XmlError>,
84}
85
86impl<'de> XmlParser<'de> {
87 pub fn new(input: &'de [u8]) -> Self {
88 match build_events(input) {
89 Ok(events) => Self {
90 events,
91 idx: 0,
92 pending_error: None,
93 },
94 Err(err) => Self {
95 events: Vec::new(),
96 idx: 0,
97 pending_error: Some(err),
98 },
99 }
100 }
101}
102
103#[derive(Debug, Clone)]
104pub enum XmlError {
105 ParseError(alloc::string::String),
106 UnexpectedEof,
107 UnbalancedTags,
108 InvalidUtf8,
109 MultipleRoots,
110}
111
112impl fmt::Display for XmlError {
113 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
114 match self {
115 XmlError::ParseError(msg) => write!(f, "XML parse error: {}", msg),
116 XmlError::UnexpectedEof => write!(f, "Unexpected end of XML"),
117 XmlError::UnbalancedTags => write!(f, "Unbalanced XML tags"),
118 XmlError::InvalidUtf8 => write!(f, "Invalid UTF-8 in XML"),
119 XmlError::MultipleRoots => write!(f, "XML document has multiple root elements"),
120 }
121 }
122}
123
124impl<'de> FormatParser<'de> for XmlParser<'de> {
125 type Error = XmlError;
126 type Probe<'a>
127 = XmlProbe<'de>
128 where
129 Self: 'a;
130
131 fn next_event(&mut self) -> Result<ParseEvent<'de>, Self::Error> {
132 if let Some(err) = &self.pending_error {
133 return Err(err.clone());
134 }
135 if self.idx >= self.events.len() {
136 return Err(XmlError::UnexpectedEof);
137 }
138 let event = self.events[self.idx].clone();
139 self.idx += 1;
140 Ok(event)
141 }
142
143 fn peek_event(&mut self) -> Result<ParseEvent<'de>, Self::Error> {
144 if let Some(err) = &self.pending_error {
145 return Err(err.clone());
146 }
147 self.events
148 .get(self.idx)
149 .cloned()
150 .ok_or(XmlError::UnexpectedEof)
151 }
152
153 fn skip_value(&mut self) -> Result<(), Self::Error> {
154 let mut depth = 0usize;
155 loop {
156 let event = self.next_event()?;
157 match event {
158 ParseEvent::StructStart(_) | ParseEvent::SequenceStart(_) => {
159 depth += 1;
160 }
161 ParseEvent::StructEnd | ParseEvent::SequenceEnd => {
162 if depth == 0 {
163 break;
164 } else {
165 depth -= 1;
166 }
167 }
168 ParseEvent::Scalar(_) | ParseEvent::VariantTag(_) => {
169 if depth == 0 {
170 break;
171 }
172 }
173 ParseEvent::FieldKey(_) => {
174 depth += 1;
176 }
177 }
178 }
179 Ok(())
180 }
181
182 fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error> {
183 let evidence = self.build_probe();
185 Ok(XmlProbe { evidence, idx: 0 })
186 }
187}
188
189impl<'de> XmlParser<'de> {
190 fn build_probe(&self) -> Vec<FieldEvidence<'de>> {
192 let mut evidence = Vec::new();
193
194 if self.idx >= self.events.len() {
196 return evidence;
197 }
198
199 if !matches!(
200 self.events.get(self.idx),
201 Some(ParseEvent::StructStart(ContainerKind::Element))
202 ) {
203 return evidence;
204 }
205
206 let mut i = self.idx + 1;
208 let mut depth = 0usize;
209
210 while i < self.events.len() {
211 match &self.events[i] {
212 ParseEvent::StructStart(_) | ParseEvent::SequenceStart(_) => {
213 depth += 1;
214 i += 1;
215 }
216 ParseEvent::StructEnd | ParseEvent::SequenceEnd => {
217 if depth == 0 {
218 break;
220 }
221 depth -= 1;
222 i += 1;
223 }
224 ParseEvent::FieldKey(key) if depth == 0 => {
225 let scalar_value = if let Some(next_event) = self.events.get(i + 1) {
228 match next_event {
229 ParseEvent::Scalar(sv) => Some(sv.clone()),
230 _ => None,
231 }
232 } else {
233 None
234 };
235
236 if let Some(sv) = scalar_value {
237 evidence.push(FieldEvidence::with_scalar_value(
238 key.name.clone(),
239 key.location,
240 None,
241 sv,
242 key.namespace.clone(),
243 ));
244 } else {
245 evidence.push(FieldEvidence::new(
246 key.name.clone(),
247 key.location,
248 None,
249 key.namespace.clone(),
250 ));
251 }
252 i += 1;
253 }
254 _ => {
255 i += 1;
256 }
257 }
258 }
259
260 evidence
261 }
262}
263
264pub struct XmlProbe<'de> {
265 evidence: Vec<FieldEvidence<'de>>,
266 idx: usize,
267}
268
269impl<'de> ProbeStream<'de> for XmlProbe<'de> {
270 type Error = XmlError;
271
272 fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error> {
273 if self.idx >= self.evidence.len() {
274 Ok(None)
275 } else {
276 let ev = self.evidence[self.idx].clone();
277 self.idx += 1;
278 Ok(Some(ev))
279 }
280 }
281}
282
283fn resolve_namespace(resolve: ResolveResult<'_>) -> Result<Option<String>, XmlError> {
285 match resolve {
286 ResolveResult::Bound(ns) => Ok(Some(String::from_utf8_lossy(ns.as_ref()).into_owned())),
287 ResolveResult::Unbound => Ok(None),
288 ResolveResult::Unknown(_) => {
289 Ok(None)
291 }
292 }
293}
294
295fn resolve_entity(raw: &str) -> Result<String, XmlError> {
298 if let Some(resolved) = resolve_xml_entity(raw) {
300 return Ok(resolved.into());
301 }
302
303 if let Some(rest) = raw.strip_prefix('#') {
305 let code = if let Some(hex) = rest.strip_prefix('x').or_else(|| rest.strip_prefix('X')) {
306 u32::from_str_radix(hex, 16).map_err(|_| {
308 XmlError::ParseError(format!("Invalid hex numeric entity: #{}", rest))
309 })?
310 } else {
311 rest.parse::<u32>().map_err(|_| {
313 XmlError::ParseError(format!("Invalid decimal numeric entity: #{}", rest))
314 })?
315 };
316
317 let ch = char::from_u32(code)
318 .ok_or_else(|| XmlError::ParseError(format!("Invalid Unicode code point: {}", code)))?;
319 return Ok(ch.to_string());
320 }
321
322 Ok(format!("&{};", raw))
324}
325
326#[derive(Debug, Clone)]
327struct Element {
328 name: QName,
329 attributes: Vec<(QName, String)>,
330 children: Vec<Element>,
331 text: String,
332}
333
334impl Element {
335 fn new(name: QName, attributes: Vec<(QName, String)>) -> Self {
336 Self {
337 name,
338 attributes,
339 children: Vec::new(),
340 text: String::new(),
341 }
342 }
343
344 fn push_text(&mut self, text: &str) {
345 self.push_text_impl(text, true);
346 }
347
348 fn push_text_raw(&mut self, text: &str) {
349 self.push_text_impl(text, false);
350 }
351
352 fn push_text_impl(&mut self, text: &str, should_trim: bool) {
353 let content = if should_trim { text.trim() } else { text };
354 if content.is_empty() {
355 return;
356 }
357 self.text.push_str(content);
358 }
359}
360
361fn build_events<'de>(input: &'de [u8]) -> Result<Vec<ParseEvent<'de>>, XmlError> {
362 let mut reader = NsReader::from_reader(Cursor::new(input));
363 reader.config_mut().trim_text(true);
364
365 let mut buf = Vec::new();
366 let mut stack: Vec<Element> = Vec::new();
367 let mut root: Option<Element> = None;
368
369 loop {
370 buf.clear();
371 let (resolve, event) = reader
372 .read_resolved_event_into(&mut buf)
373 .map_err(|e| XmlError::ParseError(e.to_string()))?;
374
375 match event {
376 Event::Start(ref e) | Event::Empty(ref e) => {
377 let ns = resolve_namespace(resolve)?;
379 let local = core::str::from_utf8(e.local_name().as_ref())
380 .map_err(|_| XmlError::InvalidUtf8)?
381 .to_string();
382 let name = match ns {
383 Some(uri) => QName::with_ns(uri, local),
384 None => QName::local(local),
385 };
386
387 let mut attributes = Vec::new();
389 for attr in e.attributes() {
390 let attr = attr.map_err(|e| XmlError::ParseError(e.to_string()))?;
391
392 let key = attr.key;
394 if key.as_ref() == b"xmlns" {
395 continue; }
397 if let Some(prefix) = key.prefix()
398 && prefix.as_ref() == b"xmlns"
399 {
400 continue; }
402
403 let (attr_resolve, _) = reader.resolve_attribute(key);
404 let attr_ns = resolve_namespace(attr_resolve)?;
405 let attr_local = core::str::from_utf8(key.local_name().as_ref())
406 .map_err(|_| XmlError::InvalidUtf8)?
407 .to_string();
408 let attr_qname = match attr_ns {
409 Some(uri) => QName::with_ns(uri, attr_local),
410 None => QName::local(attr_local),
411 };
412 let value = attr
413 .unescape_value()
414 .map_err(|e| XmlError::ParseError(e.to_string()))?
415 .into_owned();
416 attributes.push((attr_qname, value));
417 }
418
419 let elem = Element::new(name, attributes);
420
421 if matches!(event, Event::Start(_)) {
422 stack.push(elem);
423 } else {
424 attach_element(stack.as_mut_slice(), elem, &mut root)?;
426 }
427 }
428 Event::End(_) => {
429 let elem = stack.pop().ok_or(XmlError::UnbalancedTags)?;
430 attach_element(stack.as_mut_slice(), elem, &mut root)?;
431 }
432 Event::Text(e) => {
433 if let Some(current) = stack.last_mut() {
434 let text = e
435 .decode()
436 .map_err(|err| XmlError::ParseError(err.to_string()))?;
437 current.push_text(text.as_ref());
438 }
439 }
440 Event::CData(e) => {
441 if let Some(current) = stack.last_mut() {
442 let text =
443 core::str::from_utf8(e.as_ref()).map_err(|_| XmlError::InvalidUtf8)?;
444 current.push_text(text);
445 }
446 }
447 Event::GeneralRef(e) => {
448 if let Some(current) = stack.last_mut() {
451 let raw = e
452 .decode()
453 .map_err(|err| XmlError::ParseError(err.to_string()))?;
454 let resolved = resolve_entity(&raw)?;
455 current.push_text_raw(&resolved);
457 }
458 }
459 Event::Decl(_) | Event::Comment(_) | Event::PI(_) | Event::DocType(_) => {}
460 Event::Eof => break,
461 }
462 }
463
464 if !stack.is_empty() {
465 return Err(XmlError::UnbalancedTags);
466 }
467
468 let root = root.ok_or(XmlError::UnexpectedEof)?;
469 let mut events = Vec::new();
470 emit_element_events(&root, &mut events);
471 Ok(events)
472}
473
474fn attach_element(
475 stack: &mut [Element],
476 elem: Element,
477 root: &mut Option<Element>,
478) -> Result<(), XmlError> {
479 if let Some(parent) = stack.last_mut() {
480 parent.children.push(elem);
481 } else if root.is_none() {
482 *root = Some(elem);
483 } else {
484 return Err(XmlError::MultipleRoots);
485 }
486 Ok(())
487}
488
489fn emit_element_events<'de>(elem: &Element, events: &mut Vec<ParseEvent<'de>>) {
491 let text = elem.text.trim();
492 let has_attrs = !elem.attributes.is_empty();
493 let has_children = !elem.children.is_empty();
494
495 if !has_attrs && !has_children {
497 if text.is_empty() {
498 events.push(ParseEvent::StructStart(ContainerKind::Element));
500 events.push(ParseEvent::StructEnd);
501 } else {
502 emit_scalar_from_text(text, events);
503 }
504 return;
505 }
506
507 if !has_attrs && has_children && text.is_empty() && elem.children.len() > 1 {
509 let first = &elem.children[0].name;
510 if elem.children.iter().all(|child| &child.name == first) {
511 events.push(ParseEvent::SequenceStart(ContainerKind::Element));
512 for child in &elem.children {
513 emit_element_events(child, events);
514 }
515 events.push(ParseEvent::SequenceEnd);
516 return;
517 }
518 }
519
520 events.push(ParseEvent::StructStart(ContainerKind::Element));
522
523 for (qname, value) in &elem.attributes {
525 let mut key = FieldKey::new(
526 Cow::Owned(qname.local_name.clone()),
527 FieldLocationHint::Attribute,
528 );
529 if let Some(ns) = &qname.namespace {
530 key = key.with_namespace(Cow::Owned(ns.clone()));
531 }
532 events.push(ParseEvent::FieldKey(key));
533 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
535 value.clone(),
536 ))));
537 }
538
539 let mut grouped: BTreeMap<(&str, Option<&str>), Vec<&Element>> = BTreeMap::new();
541 for child in &elem.children {
542 let key = (
543 child.name.local_name.as_str(),
544 child.name.namespace.as_deref(),
545 );
546 grouped.entry(key).or_default().push(child);
547 }
548
549 for ((local_name, namespace), children) in grouped {
551 let mut key = FieldKey::new(Cow::Owned(local_name.to_string()), FieldLocationHint::Child);
552 if let Some(ns) = namespace {
553 key = key.with_namespace(Cow::Owned(ns.to_string()));
554 }
555 events.push(ParseEvent::FieldKey(key));
556
557 if children.len() == 1 {
558 emit_element_events(children[0], events);
559 } else {
560 events.push(ParseEvent::SequenceStart(ContainerKind::Element));
562 for child in children {
563 emit_element_events(child, events);
564 }
565 events.push(ParseEvent::SequenceEnd);
566 }
567 }
568
569 if !text.is_empty() {
571 let key = FieldKey::new(Cow::Borrowed("_text"), FieldLocationHint::Text);
572 events.push(ParseEvent::FieldKey(key));
573 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
574 text.to_string(),
575 ))));
576 }
577
578 events.push(ParseEvent::StructEnd);
579}
580
581fn emit_scalar_from_text<'de>(text: &str, events: &mut Vec<ParseEvent<'de>>) {
583 if text.eq_ignore_ascii_case("null") {
584 events.push(ParseEvent::Scalar(ScalarValue::Null));
585 return;
586 }
587 if let Ok(b) = text.parse::<bool>() {
588 events.push(ParseEvent::Scalar(ScalarValue::Bool(b)));
589 return;
590 }
591 if let Ok(i) = text.parse::<i64>() {
592 events.push(ParseEvent::Scalar(ScalarValue::I64(i)));
593 return;
594 }
595 if let Ok(u) = text.parse::<u64>() {
596 events.push(ParseEvent::Scalar(ScalarValue::U64(u)));
597 return;
598 }
599 if text.parse::<i128>().is_ok() || text.parse::<u128>().is_ok() {
602 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
603 text.to_string(),
604 ))));
605 return;
606 }
607 if let Ok(f) = text.parse::<f64>() {
608 events.push(ParseEvent::Scalar(ScalarValue::F64(f)));
609 return;
610 }
611 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
612 text.to_string(),
613 ))));
614}