1extern crate alloc;
2
3use alloc::borrow::Cow;
4use alloc::string::String;
5use alloc::vec::Vec;
6use core::fmt;
7
8use facet_format::{
9 ContainerKind, FieldEvidence, FieldKey, FieldLocationHint, FormatParser, ParseEvent,
10 ProbeStream, ScalarValue,
11};
12use quick_xml::NsReader;
13use quick_xml::escape::resolve_xml_entity;
14use quick_xml::events::Event;
15use quick_xml::name::ResolveResult;
16use std::io::Cursor;
17
18#[derive(Debug, Clone, PartialEq, Eq)]
25#[allow(dead_code)] struct QName {
27 namespace: Option<String>,
33 local_name: String,
35}
36
37#[allow(dead_code)] impl QName {
39 fn local(name: impl Into<String>) -> Self {
41 Self {
42 namespace: None,
43 local_name: name.into(),
44 }
45 }
46
47 fn with_ns(namespace: impl Into<String>, local_name: impl Into<String>) -> Self {
49 Self {
50 namespace: Some(namespace.into()),
51 local_name: local_name.into(),
52 }
53 }
54
55 fn matches(&self, local_name: &str, expected_ns: Option<&str>) -> bool {
60 if self.local_name != local_name {
61 return false;
62 }
63 match expected_ns {
64 None => true, Some(ns) => self.namespace.as_deref() == Some(ns),
66 }
67 }
68}
69
70impl fmt::Display for QName {
71 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72 match &self.namespace {
73 Some(ns) => write!(f, "{{{}}}{}", ns, self.local_name),
74 None => write!(f, "{}", self.local_name),
75 }
76 }
77}
78
79pub struct XmlParser<'de> {
80 events: Vec<ParseEvent<'de>>,
81 idx: usize,
82 pending_error: Option<XmlError>,
83}
84
85impl<'de> XmlParser<'de> {
86 pub fn new(input: &'de [u8]) -> Self {
87 match build_events(input) {
88 Ok(events) => Self {
89 events,
90 idx: 0,
91 pending_error: None,
92 },
93 Err(err) => Self {
94 events: Vec::new(),
95 idx: 0,
96 pending_error: Some(err),
97 },
98 }
99 }
100}
101
102#[derive(Debug, Clone)]
103pub enum XmlError {
104 ParseError(alloc::string::String),
105 UnexpectedEof,
106 UnbalancedTags,
107 InvalidUtf8(core::str::Utf8Error),
108 MultipleRoots,
109}
110
111impl fmt::Display for XmlError {
112 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
113 match self {
114 XmlError::ParseError(msg) => write!(f, "XML parse error: {}", msg),
115 XmlError::UnexpectedEof => write!(f, "Unexpected end of XML"),
116 XmlError::UnbalancedTags => write!(f, "Unbalanced XML tags"),
117 XmlError::InvalidUtf8(e) => write!(f, "Invalid UTF-8 in XML: {}", e),
118 XmlError::MultipleRoots => write!(f, "XML document has multiple root elements"),
119 }
120 }
121}
122
123impl<'de> FormatParser<'de> for XmlParser<'de> {
124 type Error = XmlError;
125 type Probe<'a>
126 = XmlProbe<'de>
127 where
128 Self: 'a;
129
130 fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
131 if let Some(err) = &self.pending_error {
132 return Err(err.clone());
133 }
134 if self.idx >= self.events.len() {
135 return Ok(None);
136 }
137 let event = self.events[self.idx].clone();
138 self.idx += 1;
139 Ok(Some(event))
140 }
141
142 fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
143 if let Some(err) = &self.pending_error {
144 return Err(err.clone());
145 }
146 Ok(self.events.get(self.idx).cloned())
147 }
148
149 fn skip_value(&mut self) -> Result<(), Self::Error> {
150 let mut struct_depth = 0usize;
154 let mut pending_field_value = false;
155
156 loop {
157 let event = self.next_event()?.ok_or(XmlError::UnexpectedEof)?;
158 match event {
159 ParseEvent::StructStart(_) | ParseEvent::SequenceStart(_) => {
160 pending_field_value = false;
162 struct_depth += 1;
163 }
164 ParseEvent::StructEnd | ParseEvent::SequenceEnd => {
165 if struct_depth == 0 {
166 break;
168 } else {
169 struct_depth -= 1;
170 if struct_depth == 0 && !pending_field_value {
172 break;
173 }
174 }
175 }
176 ParseEvent::Scalar(_) | ParseEvent::VariantTag(_) => {
177 if struct_depth == 0 && !pending_field_value {
178 break;
180 }
181 pending_field_value = false;
183 }
184 ParseEvent::FieldKey(_) | ParseEvent::OrderedField => {
185 pending_field_value = true;
187 }
188 }
189 }
190 Ok(())
191 }
192
193 fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error> {
194 let evidence = self.build_probe();
196 Ok(XmlProbe { evidence, idx: 0 })
197 }
198}
199
200impl<'de> XmlParser<'de> {
201 fn build_probe(&self) -> Vec<FieldEvidence<'de>> {
203 let mut evidence = Vec::new();
204
205 if self.idx >= self.events.len() {
207 return evidence;
208 }
209
210 if !matches!(
211 self.events.get(self.idx),
212 Some(ParseEvent::StructStart(ContainerKind::Element))
213 ) {
214 return evidence;
215 }
216
217 let mut i = self.idx + 1;
219 let mut depth = 0usize;
220
221 while i < self.events.len() {
222 match &self.events[i] {
223 ParseEvent::StructStart(_) | ParseEvent::SequenceStart(_) => {
224 depth += 1;
225 i += 1;
226 }
227 ParseEvent::StructEnd | ParseEvent::SequenceEnd => {
228 if depth == 0 {
229 break;
231 }
232 depth -= 1;
233 i += 1;
234 }
235 ParseEvent::FieldKey(key) if depth == 0 => {
236 let scalar_value = if let Some(next_event) = self.events.get(i + 1) {
239 match next_event {
240 ParseEvent::Scalar(sv) => Some(sv.clone()),
241 _ => None,
242 }
243 } else {
244 None
245 };
246
247 if let Some(sv) = scalar_value {
248 evidence.push(FieldEvidence::with_scalar_value(
249 key.name.clone(),
250 key.location,
251 None,
252 sv,
253 key.namespace.clone(),
254 ));
255 } else {
256 evidence.push(FieldEvidence::new(
257 key.name.clone(),
258 key.location,
259 None,
260 key.namespace.clone(),
261 ));
262 }
263 i += 1;
264 }
265 _ => {
266 i += 1;
267 }
268 }
269 }
270
271 evidence
272 }
273}
274
275pub struct XmlProbe<'de> {
276 evidence: Vec<FieldEvidence<'de>>,
277 idx: usize,
278}
279
280impl<'de> ProbeStream<'de> for XmlProbe<'de> {
281 type Error = XmlError;
282
283 fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error> {
284 if self.idx >= self.evidence.len() {
285 Ok(None)
286 } else {
287 let ev = self.evidence[self.idx].clone();
288 self.idx += 1;
289 Ok(Some(ev))
290 }
291 }
292}
293
294fn resolve_namespace(resolve: ResolveResult<'_>) -> Result<Option<String>, XmlError> {
296 match resolve {
297 ResolveResult::Bound(ns) => Ok(Some(String::from_utf8_lossy(ns.as_ref()).into_owned())),
298 ResolveResult::Unbound => Ok(None),
299 ResolveResult::Unknown(_) => {
300 Ok(None)
302 }
303 }
304}
305
306fn resolve_entity(raw: &str) -> Result<String, XmlError> {
309 if let Some(resolved) = resolve_xml_entity(raw) {
311 return Ok(resolved.into());
312 }
313
314 if let Some(rest) = raw.strip_prefix('#') {
316 let code = if let Some(hex) = rest.strip_prefix('x').or_else(|| rest.strip_prefix('X')) {
317 u32::from_str_radix(hex, 16).map_err(|_| {
319 XmlError::ParseError(format!("Invalid hex numeric entity: #{}", rest))
320 })?
321 } else {
322 rest.parse::<u32>().map_err(|_| {
324 XmlError::ParseError(format!("Invalid decimal numeric entity: #{}", rest))
325 })?
326 };
327
328 let ch = char::from_u32(code)
329 .ok_or_else(|| XmlError::ParseError(format!("Invalid Unicode code point: {}", code)))?;
330 return Ok(ch.to_string());
331 }
332
333 Ok(format!("&{};", raw))
335}
336
337#[derive(Debug, Clone)]
338struct Element {
339 name: QName,
340 attributes: Vec<(QName, String)>,
341 children: Vec<Element>,
342 text: String,
343}
344
345impl Element {
346 fn new(name: QName, attributes: Vec<(QName, String)>) -> Self {
347 Self {
348 name,
349 attributes,
350 children: Vec::new(),
351 text: String::new(),
352 }
353 }
354
355 fn push_text(&mut self, text: &str) {
356 self.push_text_impl(text, true);
357 }
358
359 fn push_text_raw(&mut self, text: &str) {
360 self.push_text_impl(text, false);
361 }
362
363 fn push_text_impl(&mut self, text: &str, should_trim: bool) {
364 let content = if should_trim { text.trim() } else { text };
365 if content.is_empty() {
366 return;
367 }
368 self.text.push_str(content);
369 }
370}
371
372fn build_events<'de>(input: &'de [u8]) -> Result<Vec<ParseEvent<'de>>, XmlError> {
373 let mut reader = NsReader::from_reader(Cursor::new(input));
374 reader.config_mut().trim_text(true);
375
376 let mut buf = Vec::new();
377 let mut stack: Vec<Element> = Vec::new();
378 let mut root: Option<Element> = None;
379
380 loop {
381 buf.clear();
382 let (resolve, event) = reader
383 .read_resolved_event_into(&mut buf)
384 .map_err(|e| XmlError::ParseError(e.to_string()))?;
385
386 match event {
387 Event::Start(ref e) | Event::Empty(ref e) => {
388 let ns = resolve_namespace(resolve)?;
390 let local = core::str::from_utf8(e.local_name().as_ref())
391 .map_err(XmlError::InvalidUtf8)?
392 .to_string();
393 let name = match ns {
394 Some(uri) => QName::with_ns(uri, local),
395 None => QName::local(local),
396 };
397
398 let mut attributes = Vec::new();
400 for attr in e.attributes() {
401 let attr = attr.map_err(|e| XmlError::ParseError(e.to_string()))?;
402
403 let key = attr.key;
405 if key.as_ref() == b"xmlns" {
406 continue; }
408 if let Some(prefix) = key.prefix()
409 && prefix.as_ref() == b"xmlns"
410 {
411 continue; }
413
414 let (attr_resolve, _) = reader.resolve_attribute(key);
415 let attr_ns = resolve_namespace(attr_resolve)?;
416 let attr_local = core::str::from_utf8(key.local_name().as_ref())
417 .map_err(XmlError::InvalidUtf8)?
418 .to_string();
419 let attr_qname = match attr_ns {
420 Some(uri) => QName::with_ns(uri, attr_local),
421 None => QName::local(attr_local),
422 };
423 let value = attr
424 .unescape_value()
425 .map_err(|e| XmlError::ParseError(e.to_string()))?
426 .into_owned();
427 attributes.push((attr_qname, value));
428 }
429
430 let elem = Element::new(name, attributes);
431
432 if matches!(event, Event::Start(_)) {
433 stack.push(elem);
434 } else {
435 attach_element(stack.as_mut_slice(), elem, &mut root)?;
437 }
438 }
439 Event::End(_) => {
440 let elem = stack.pop().ok_or(XmlError::UnbalancedTags)?;
441 attach_element(stack.as_mut_slice(), elem, &mut root)?;
442 }
443 Event::Text(e) => {
444 if let Some(current) = stack.last_mut() {
445 let text = e
446 .decode()
447 .map_err(|err| XmlError::ParseError(err.to_string()))?;
448 current.push_text(text.as_ref());
449 }
450 }
451 Event::CData(e) => {
452 if let Some(current) = stack.last_mut() {
453 let text = core::str::from_utf8(e.as_ref()).map_err(XmlError::InvalidUtf8)?;
454 current.push_text(text);
455 }
456 }
457 Event::GeneralRef(e) => {
458 if let Some(current) = stack.last_mut() {
461 let raw = e
462 .decode()
463 .map_err(|err| XmlError::ParseError(err.to_string()))?;
464 let resolved = resolve_entity(&raw)?;
465 current.push_text_raw(&resolved);
467 }
468 }
469 Event::Decl(_) | Event::Comment(_) | Event::PI(_) | Event::DocType(_) => {}
470 Event::Eof => break,
471 }
472 }
473
474 if !stack.is_empty() {
475 return Err(XmlError::UnbalancedTags);
476 }
477
478 let root = root.ok_or(XmlError::UnexpectedEof)?;
479 let mut events = Vec::new();
480 emit_element_events(&root, &mut events);
481 Ok(events)
482}
483
484fn attach_element(
485 stack: &mut [Element],
486 elem: Element,
487 root: &mut Option<Element>,
488) -> Result<(), XmlError> {
489 if let Some(parent) = stack.last_mut() {
490 parent.children.push(elem);
491 } else if root.is_none() {
492 *root = Some(elem);
493 } else {
494 return Err(XmlError::MultipleRoots);
495 }
496 Ok(())
497}
498
499fn emit_element_events<'de>(elem: &Element, events: &mut Vec<ParseEvent<'de>>) {
501 let text = elem.text.trim();
502 let has_attrs = !elem.attributes.is_empty();
503 let has_children = !elem.children.is_empty();
504
505 if !has_attrs && !has_children {
507 if text.is_empty() {
508 events.push(ParseEvent::StructStart(ContainerKind::Element));
510 events.push(ParseEvent::StructEnd);
511 } else {
512 emit_scalar_from_text(text, events);
513 }
514 return;
515 }
516
517 if !has_attrs && has_children && text.is_empty() && elem.children.len() > 1 {
519 let first = &elem.children[0].name;
520 if elem.children.iter().all(|child| &child.name == first) {
521 events.push(ParseEvent::SequenceStart(ContainerKind::Element));
522 for child in &elem.children {
523 emit_element_events(child, events);
524 }
525 events.push(ParseEvent::SequenceEnd);
526 return;
527 }
528 }
529
530 events.push(ParseEvent::StructStart(ContainerKind::Element));
532
533 for (qname, value) in &elem.attributes {
535 let mut key = FieldKey::new(
536 Cow::Owned(qname.local_name.clone()),
537 FieldLocationHint::Attribute,
538 );
539 if let Some(ns) = &qname.namespace {
540 key = key.with_namespace(Cow::Owned(ns.clone()));
541 }
542 events.push(ParseEvent::FieldKey(key));
543 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
545 value.clone(),
546 ))));
547 }
548
549 for child in &elem.children {
553 let mut key = FieldKey::new(
554 Cow::Owned(child.name.local_name.clone()),
555 FieldLocationHint::Child,
556 );
557 if let Some(ns) = &child.name.namespace {
558 key = key.with_namespace(Cow::Owned(ns.clone()));
559 }
560 events.push(ParseEvent::FieldKey(key));
561 emit_element_events(child, events);
562 }
563
564 if !text.is_empty() {
566 let key = FieldKey::new(Cow::Borrowed("_text"), FieldLocationHint::Text);
567 events.push(ParseEvent::FieldKey(key));
568 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
569 text.to_string(),
570 ))));
571 }
572
573 events.push(ParseEvent::StructEnd);
574}
575
576fn emit_scalar_from_text<'de>(text: &str, events: &mut Vec<ParseEvent<'de>>) {
578 if text.eq_ignore_ascii_case("null") {
579 events.push(ParseEvent::Scalar(ScalarValue::Null));
580 return;
581 }
582 if let Ok(b) = text.parse::<bool>() {
583 events.push(ParseEvent::Scalar(ScalarValue::Bool(b)));
584 return;
585 }
586 if let Ok(i) = text.parse::<i64>() {
587 events.push(ParseEvent::Scalar(ScalarValue::I64(i)));
588 return;
589 }
590 if let Ok(u) = text.parse::<u64>() {
591 events.push(ParseEvent::Scalar(ScalarValue::U64(u)));
592 return;
593 }
594 if text.parse::<i128>().is_ok() || text.parse::<u128>().is_ok() {
597 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
598 text.to_string(),
599 ))));
600 return;
601 }
602 if let Ok(f) = text.parse::<f64>() {
603 events.push(ParseEvent::Scalar(ScalarValue::F64(f)));
604 return;
605 }
606 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
607 text.to_string(),
608 ))));
609}