1extern crate alloc;
2
3use alloc::borrow::Cow;
4use alloc::collections::BTreeMap;
5use alloc::string::String;
6use alloc::vec::Vec;
7use core::fmt;
8
9use facet_format::{
10 ContainerKind, FieldEvidence, FieldKey, FieldLocationHint, FormatParser, ParseEvent,
11 ProbeStream, ScalarValue,
12};
13use quick_xml::NsReader;
14use quick_xml::escape::resolve_xml_entity;
15use quick_xml::events::Event;
16use quick_xml::name::ResolveResult;
17use std::io::Cursor;
18
19#[derive(Debug, Clone, PartialEq, Eq)]
26#[allow(dead_code)] struct QName {
28 namespace: Option<String>,
34 local_name: String,
36}
37
38#[allow(dead_code)] impl QName {
40 fn local(name: impl Into<String>) -> Self {
42 Self {
43 namespace: None,
44 local_name: name.into(),
45 }
46 }
47
48 fn with_ns(namespace: impl Into<String>, local_name: impl Into<String>) -> Self {
50 Self {
51 namespace: Some(namespace.into()),
52 local_name: local_name.into(),
53 }
54 }
55
56 fn matches(&self, local_name: &str, expected_ns: Option<&str>) -> bool {
61 if self.local_name != local_name {
62 return false;
63 }
64 match expected_ns {
65 None => true, Some(ns) => self.namespace.as_deref() == Some(ns),
67 }
68 }
69}
70
71impl fmt::Display for QName {
72 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73 match &self.namespace {
74 Some(ns) => write!(f, "{{{}}}{}", ns, self.local_name),
75 None => write!(f, "{}", self.local_name),
76 }
77 }
78}
79
80pub struct XmlParser<'de> {
81 events: Vec<ParseEvent<'de>>,
82 idx: usize,
83 pending_error: Option<XmlError>,
84}
85
86impl<'de> XmlParser<'de> {
87 pub fn new(input: &'de [u8]) -> Self {
88 match build_events(input) {
89 Ok(events) => Self {
90 events,
91 idx: 0,
92 pending_error: None,
93 },
94 Err(err) => Self {
95 events: Vec::new(),
96 idx: 0,
97 pending_error: Some(err),
98 },
99 }
100 }
101}
102
103#[derive(Debug, Clone)]
104pub enum XmlError {
105 ParseError(alloc::string::String),
106 UnexpectedEof,
107 UnbalancedTags,
108 InvalidUtf8,
109 MultipleRoots,
110}
111
112impl fmt::Display for XmlError {
113 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
114 match self {
115 XmlError::ParseError(msg) => write!(f, "XML parse error: {}", msg),
116 XmlError::UnexpectedEof => write!(f, "Unexpected end of XML"),
117 XmlError::UnbalancedTags => write!(f, "Unbalanced XML tags"),
118 XmlError::InvalidUtf8 => write!(f, "Invalid UTF-8 in XML"),
119 XmlError::MultipleRoots => write!(f, "XML document has multiple root elements"),
120 }
121 }
122}
123
124impl<'de> FormatParser<'de> for XmlParser<'de> {
125 type Error = XmlError;
126 type Probe<'a>
127 = XmlProbe<'de>
128 where
129 Self: 'a;
130
131 fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
132 if let Some(err) = &self.pending_error {
133 return Err(err.clone());
134 }
135 if self.idx >= self.events.len() {
136 return Ok(None);
137 }
138 let event = self.events[self.idx].clone();
139 self.idx += 1;
140 Ok(Some(event))
141 }
142
143 fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
144 if let Some(err) = &self.pending_error {
145 return Err(err.clone());
146 }
147 Ok(self.events.get(self.idx).cloned())
148 }
149
150 fn skip_value(&mut self) -> Result<(), Self::Error> {
151 let mut depth = 0usize;
152 loop {
153 let event = self.next_event()?.ok_or(XmlError::UnexpectedEof)?;
154 match event {
155 ParseEvent::StructStart(_) | ParseEvent::SequenceStart(_) => {
156 depth += 1;
157 }
158 ParseEvent::StructEnd | ParseEvent::SequenceEnd => {
159 if depth == 0 {
160 break;
161 } else {
162 depth -= 1;
163 }
164 }
165 ParseEvent::Scalar(_) | ParseEvent::VariantTag(_) => {
166 if depth == 0 {
167 break;
168 }
169 }
170 ParseEvent::FieldKey(_) | ParseEvent::OrderedField => {
171 depth += 1;
173 }
174 }
175 }
176 Ok(())
177 }
178
179 fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error> {
180 let evidence = self.build_probe();
182 Ok(XmlProbe { evidence, idx: 0 })
183 }
184}
185
186impl<'de> XmlParser<'de> {
187 fn build_probe(&self) -> Vec<FieldEvidence<'de>> {
189 let mut evidence = Vec::new();
190
191 if self.idx >= self.events.len() {
193 return evidence;
194 }
195
196 if !matches!(
197 self.events.get(self.idx),
198 Some(ParseEvent::StructStart(ContainerKind::Element))
199 ) {
200 return evidence;
201 }
202
203 let mut i = self.idx + 1;
205 let mut depth = 0usize;
206
207 while i < self.events.len() {
208 match &self.events[i] {
209 ParseEvent::StructStart(_) | ParseEvent::SequenceStart(_) => {
210 depth += 1;
211 i += 1;
212 }
213 ParseEvent::StructEnd | ParseEvent::SequenceEnd => {
214 if depth == 0 {
215 break;
217 }
218 depth -= 1;
219 i += 1;
220 }
221 ParseEvent::FieldKey(key) if depth == 0 => {
222 let scalar_value = if let Some(next_event) = self.events.get(i + 1) {
225 match next_event {
226 ParseEvent::Scalar(sv) => Some(sv.clone()),
227 _ => None,
228 }
229 } else {
230 None
231 };
232
233 if let Some(sv) = scalar_value {
234 evidence.push(FieldEvidence::with_scalar_value(
235 key.name.clone(),
236 key.location,
237 None,
238 sv,
239 key.namespace.clone(),
240 ));
241 } else {
242 evidence.push(FieldEvidence::new(
243 key.name.clone(),
244 key.location,
245 None,
246 key.namespace.clone(),
247 ));
248 }
249 i += 1;
250 }
251 _ => {
252 i += 1;
253 }
254 }
255 }
256
257 evidence
258 }
259}
260
261pub struct XmlProbe<'de> {
262 evidence: Vec<FieldEvidence<'de>>,
263 idx: usize,
264}
265
266impl<'de> ProbeStream<'de> for XmlProbe<'de> {
267 type Error = XmlError;
268
269 fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error> {
270 if self.idx >= self.evidence.len() {
271 Ok(None)
272 } else {
273 let ev = self.evidence[self.idx].clone();
274 self.idx += 1;
275 Ok(Some(ev))
276 }
277 }
278}
279
280fn resolve_namespace(resolve: ResolveResult<'_>) -> Result<Option<String>, XmlError> {
282 match resolve {
283 ResolveResult::Bound(ns) => Ok(Some(String::from_utf8_lossy(ns.as_ref()).into_owned())),
284 ResolveResult::Unbound => Ok(None),
285 ResolveResult::Unknown(_) => {
286 Ok(None)
288 }
289 }
290}
291
292fn resolve_entity(raw: &str) -> Result<String, XmlError> {
295 if let Some(resolved) = resolve_xml_entity(raw) {
297 return Ok(resolved.into());
298 }
299
300 if let Some(rest) = raw.strip_prefix('#') {
302 let code = if let Some(hex) = rest.strip_prefix('x').or_else(|| rest.strip_prefix('X')) {
303 u32::from_str_radix(hex, 16).map_err(|_| {
305 XmlError::ParseError(format!("Invalid hex numeric entity: #{}", rest))
306 })?
307 } else {
308 rest.parse::<u32>().map_err(|_| {
310 XmlError::ParseError(format!("Invalid decimal numeric entity: #{}", rest))
311 })?
312 };
313
314 let ch = char::from_u32(code)
315 .ok_or_else(|| XmlError::ParseError(format!("Invalid Unicode code point: {}", code)))?;
316 return Ok(ch.to_string());
317 }
318
319 Ok(format!("&{};", raw))
321}
322
323#[derive(Debug, Clone)]
324struct Element {
325 name: QName,
326 attributes: Vec<(QName, String)>,
327 children: Vec<Element>,
328 text: String,
329}
330
331impl Element {
332 fn new(name: QName, attributes: Vec<(QName, String)>) -> Self {
333 Self {
334 name,
335 attributes,
336 children: Vec::new(),
337 text: String::new(),
338 }
339 }
340
341 fn push_text(&mut self, text: &str) {
342 self.push_text_impl(text, true);
343 }
344
345 fn push_text_raw(&mut self, text: &str) {
346 self.push_text_impl(text, false);
347 }
348
349 fn push_text_impl(&mut self, text: &str, should_trim: bool) {
350 let content = if should_trim { text.trim() } else { text };
351 if content.is_empty() {
352 return;
353 }
354 self.text.push_str(content);
355 }
356}
357
358fn build_events<'de>(input: &'de [u8]) -> Result<Vec<ParseEvent<'de>>, XmlError> {
359 let mut reader = NsReader::from_reader(Cursor::new(input));
360 reader.config_mut().trim_text(true);
361
362 let mut buf = Vec::new();
363 let mut stack: Vec<Element> = Vec::new();
364 let mut root: Option<Element> = None;
365
366 loop {
367 buf.clear();
368 let (resolve, event) = reader
369 .read_resolved_event_into(&mut buf)
370 .map_err(|e| XmlError::ParseError(e.to_string()))?;
371
372 match event {
373 Event::Start(ref e) | Event::Empty(ref e) => {
374 let ns = resolve_namespace(resolve)?;
376 let local = core::str::from_utf8(e.local_name().as_ref())
377 .map_err(|_| XmlError::InvalidUtf8)?
378 .to_string();
379 let name = match ns {
380 Some(uri) => QName::with_ns(uri, local),
381 None => QName::local(local),
382 };
383
384 let mut attributes = Vec::new();
386 for attr in e.attributes() {
387 let attr = attr.map_err(|e| XmlError::ParseError(e.to_string()))?;
388
389 let key = attr.key;
391 if key.as_ref() == b"xmlns" {
392 continue; }
394 if let Some(prefix) = key.prefix()
395 && prefix.as_ref() == b"xmlns"
396 {
397 continue; }
399
400 let (attr_resolve, _) = reader.resolve_attribute(key);
401 let attr_ns = resolve_namespace(attr_resolve)?;
402 let attr_local = core::str::from_utf8(key.local_name().as_ref())
403 .map_err(|_| XmlError::InvalidUtf8)?
404 .to_string();
405 let attr_qname = match attr_ns {
406 Some(uri) => QName::with_ns(uri, attr_local),
407 None => QName::local(attr_local),
408 };
409 let value = attr
410 .unescape_value()
411 .map_err(|e| XmlError::ParseError(e.to_string()))?
412 .into_owned();
413 attributes.push((attr_qname, value));
414 }
415
416 let elem = Element::new(name, attributes);
417
418 if matches!(event, Event::Start(_)) {
419 stack.push(elem);
420 } else {
421 attach_element(stack.as_mut_slice(), elem, &mut root)?;
423 }
424 }
425 Event::End(_) => {
426 let elem = stack.pop().ok_or(XmlError::UnbalancedTags)?;
427 attach_element(stack.as_mut_slice(), elem, &mut root)?;
428 }
429 Event::Text(e) => {
430 if let Some(current) = stack.last_mut() {
431 let text = e
432 .decode()
433 .map_err(|err| XmlError::ParseError(err.to_string()))?;
434 current.push_text(text.as_ref());
435 }
436 }
437 Event::CData(e) => {
438 if let Some(current) = stack.last_mut() {
439 let text =
440 core::str::from_utf8(e.as_ref()).map_err(|_| XmlError::InvalidUtf8)?;
441 current.push_text(text);
442 }
443 }
444 Event::GeneralRef(e) => {
445 if let Some(current) = stack.last_mut() {
448 let raw = e
449 .decode()
450 .map_err(|err| XmlError::ParseError(err.to_string()))?;
451 let resolved = resolve_entity(&raw)?;
452 current.push_text_raw(&resolved);
454 }
455 }
456 Event::Decl(_) | Event::Comment(_) | Event::PI(_) | Event::DocType(_) => {}
457 Event::Eof => break,
458 }
459 }
460
461 if !stack.is_empty() {
462 return Err(XmlError::UnbalancedTags);
463 }
464
465 let root = root.ok_or(XmlError::UnexpectedEof)?;
466 let mut events = Vec::new();
467 emit_element_events(&root, &mut events);
468 Ok(events)
469}
470
471fn attach_element(
472 stack: &mut [Element],
473 elem: Element,
474 root: &mut Option<Element>,
475) -> Result<(), XmlError> {
476 if let Some(parent) = stack.last_mut() {
477 parent.children.push(elem);
478 } else if root.is_none() {
479 *root = Some(elem);
480 } else {
481 return Err(XmlError::MultipleRoots);
482 }
483 Ok(())
484}
485
486fn emit_element_events<'de>(elem: &Element, events: &mut Vec<ParseEvent<'de>>) {
488 let text = elem.text.trim();
489 let has_attrs = !elem.attributes.is_empty();
490 let has_children = !elem.children.is_empty();
491
492 if !has_attrs && !has_children {
494 if text.is_empty() {
495 events.push(ParseEvent::StructStart(ContainerKind::Element));
497 events.push(ParseEvent::StructEnd);
498 } else {
499 emit_scalar_from_text(text, events);
500 }
501 return;
502 }
503
504 if !has_attrs && has_children && text.is_empty() && elem.children.len() > 1 {
506 let first = &elem.children[0].name;
507 if elem.children.iter().all(|child| &child.name == first) {
508 events.push(ParseEvent::SequenceStart(ContainerKind::Element));
509 for child in &elem.children {
510 emit_element_events(child, events);
511 }
512 events.push(ParseEvent::SequenceEnd);
513 return;
514 }
515 }
516
517 events.push(ParseEvent::StructStart(ContainerKind::Element));
519
520 for (qname, value) in &elem.attributes {
522 let mut key = FieldKey::new(
523 Cow::Owned(qname.local_name.clone()),
524 FieldLocationHint::Attribute,
525 );
526 if let Some(ns) = &qname.namespace {
527 key = key.with_namespace(Cow::Owned(ns.clone()));
528 }
529 events.push(ParseEvent::FieldKey(key));
530 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
532 value.clone(),
533 ))));
534 }
535
536 let mut grouped: BTreeMap<(&str, Option<&str>), Vec<&Element>> = BTreeMap::new();
538 for child in &elem.children {
539 let key = (
540 child.name.local_name.as_str(),
541 child.name.namespace.as_deref(),
542 );
543 grouped.entry(key).or_default().push(child);
544 }
545
546 for ((local_name, namespace), children) in grouped {
548 let mut key = FieldKey::new(Cow::Owned(local_name.to_string()), FieldLocationHint::Child);
549 if let Some(ns) = namespace {
550 key = key.with_namespace(Cow::Owned(ns.to_string()));
551 }
552 events.push(ParseEvent::FieldKey(key));
553
554 if children.len() == 1 {
555 emit_element_events(children[0], events);
556 } else {
557 events.push(ParseEvent::SequenceStart(ContainerKind::Element));
559 for child in children {
560 emit_element_events(child, events);
561 }
562 events.push(ParseEvent::SequenceEnd);
563 }
564 }
565
566 if !text.is_empty() {
568 let key = FieldKey::new(Cow::Borrowed("_text"), FieldLocationHint::Text);
569 events.push(ParseEvent::FieldKey(key));
570 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
571 text.to_string(),
572 ))));
573 }
574
575 events.push(ParseEvent::StructEnd);
576}
577
578fn emit_scalar_from_text<'de>(text: &str, events: &mut Vec<ParseEvent<'de>>) {
580 if text.eq_ignore_ascii_case("null") {
581 events.push(ParseEvent::Scalar(ScalarValue::Null));
582 return;
583 }
584 if let Ok(b) = text.parse::<bool>() {
585 events.push(ParseEvent::Scalar(ScalarValue::Bool(b)));
586 return;
587 }
588 if let Ok(i) = text.parse::<i64>() {
589 events.push(ParseEvent::Scalar(ScalarValue::I64(i)));
590 return;
591 }
592 if let Ok(u) = text.parse::<u64>() {
593 events.push(ParseEvent::Scalar(ScalarValue::U64(u)));
594 return;
595 }
596 if text.parse::<i128>().is_ok() || text.parse::<u128>().is_ok() {
599 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
600 text.to_string(),
601 ))));
602 return;
603 }
604 if let Ok(f) = text.parse::<f64>() {
605 events.push(ParseEvent::Scalar(ScalarValue::F64(f)));
606 return;
607 }
608 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
609 text.to_string(),
610 ))));
611}