Skip to main content

xml_3dm/xml/
parser.rs

1//! XML parser that builds node trees.
2//!
3//! This parser uses quick-xml's streaming API to build node trees matching
4//! the Java implementation's behavior.
5
6use std::collections::HashMap;
7use std::fs::File;
8use std::io::{BufReader, Read};
9use std::path::Path;
10
11use quick_xml::escape::unescape;
12use quick_xml::events::{BytesStart, Event};
13use quick_xml::Reader;
14
15use super::NodeFactory;
16use crate::error::{Error, Result};
17use crate::node::{
18    is_xmlns_attr, split_qname, ExpandedName, NamespaceContext, NodeInner, NodeRef, XmlComment,
19    XmlContent, XmlElement, XmlProcessingInstruction, XmlText,
20};
21
22/// Whitespace handling mode during parsing.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
24enum WhitespaceMode {
25    /// Normalize whitespace (collapse consecutive, trim).
26    #[default]
27    Normalize,
28    /// Preserve all whitespace exactly as in source.
29    Preserve,
30}
31
32/// XML parser that builds node trees.
33pub struct XmlParser<F: NodeFactory> {
34    factory: F,
35}
36
37impl<F: NodeFactory> XmlParser<F> {
38    /// Creates a new parser with the given node factory.
39    pub fn new(factory: F) -> Self {
40        XmlParser { factory }
41    }
42
43    /// Parses XML from a string.
44    pub fn parse_str(&self, xml: &str) -> Result<NodeRef> {
45        let mut reader = Reader::from_str(xml);
46        // Don't trim text - we handle whitespace normalization ourselves
47        reader.config_mut().trim_text_start = false;
48        reader.config_mut().trim_text_end = false;
49        self.parse_reader(&mut reader)
50    }
51
52    /// Parses XML from a file.
53    pub fn parse_file<P: AsRef<Path>>(&self, path: P) -> Result<NodeRef> {
54        let file = File::open(path)?;
55        let buf_reader = BufReader::new(file);
56        let mut reader = Reader::from_reader(buf_reader);
57        // Don't trim text - we handle whitespace normalization ourselves
58        reader.config_mut().trim_text_start = false;
59        reader.config_mut().trim_text_end = false;
60        self.parse_reader(&mut reader)
61    }
62
63    /// Parses XML from a quick-xml Reader.
64    fn parse_reader<R: Read + std::io::BufRead>(&self, reader: &mut Reader<R>) -> Result<NodeRef> {
65        // Create the synthetic $ROOT$ element (matches Java's startDocument)
66        let root = self.factory.make_node(XmlContent::Element(XmlElement::new(
67            "$ROOT$".to_string(),
68            HashMap::new(),
69        )));
70
71        let mut node_stack: Vec<NodeRef> = vec![root.clone()];
72        let mut ws_mode_stack: Vec<WhitespaceMode> = vec![WhitespaceMode::Normalize];
73        let mut ns_context = NamespaceContext::new();
74        let mut current_text: Option<String> = None;
75        let mut buf = Vec::new();
76
77        loop {
78            match reader.read_event_into(&mut buf) {
79                Ok(Event::Start(ref e)) => {
80                    // Get current whitespace mode
81                    let current_ws_mode =
82                        *ws_mode_stack.last().unwrap_or(&WhitespaceMode::Normalize);
83
84                    // Flush any accumulated text
85                    if let Some(text) = current_text.take() {
86                        let text_to_store = if current_ws_mode == WhitespaceMode::Preserve {
87                            text // Keep as-is
88                        } else {
89                            text.trim().to_string() // Trim
90                        };
91                        if !text_to_store.is_empty() || current_ws_mode == WhitespaceMode::Preserve
92                        {
93                            let text_node = self
94                                .factory
95                                .make_node(XmlContent::Text(XmlText::new(&text_to_store)));
96                            if let Some(parent) = node_stack.last() {
97                                NodeInner::add_child_to_ref(parent, text_node);
98                            }
99                        }
100                    }
101
102                    // Push namespace scope BEFORE parsing element
103                    ns_context.push_scope();
104
105                    // Create the element node with namespace awareness
106                    let (element, ws_mode_override) =
107                        self.parse_element_with_ns(e, reader, &mut ns_context)?;
108                    let node = self.factory.make_node(XmlContent::Element(element));
109
110                    // Push whitespace mode (inherit or override)
111                    let new_ws_mode = ws_mode_override.unwrap_or(current_ws_mode);
112                    ws_mode_stack.push(new_ws_mode);
113
114                    // Add to parent and push onto stack
115                    if let Some(parent) = node_stack.last() {
116                        NodeInner::add_child_to_ref(parent, node.clone());
117                    }
118                    node_stack.push(node);
119                }
120                Ok(Event::End(_)) => {
121                    let current_ws_mode =
122                        *ws_mode_stack.last().unwrap_or(&WhitespaceMode::Normalize);
123
124                    // Flush any accumulated text
125                    if let Some(text) = current_text.take() {
126                        let text_to_store = if current_ws_mode == WhitespaceMode::Preserve {
127                            text
128                        } else {
129                            text.trim().to_string()
130                        };
131                        if !text_to_store.is_empty() || current_ws_mode == WhitespaceMode::Preserve
132                        {
133                            let text_node = self
134                                .factory
135                                .make_node(XmlContent::Text(XmlText::new(&text_to_store)));
136                            if let Some(parent) = node_stack.last() {
137                                NodeInner::add_child_to_ref(parent, text_node);
138                            }
139                        }
140                    }
141
142                    // Pop from stacks
143                    node_stack.pop();
144                    ws_mode_stack.pop();
145                    ns_context.pop_scope();
146                }
147                Ok(Event::Empty(ref e)) => {
148                    // Self-closing tag - handle like Start + End (no children, so no scope push)
149                    let current_ws_mode =
150                        *ws_mode_stack.last().unwrap_or(&WhitespaceMode::Normalize);
151
152                    // Flush any accumulated text
153                    if let Some(text) = current_text.take() {
154                        let text_to_store = if current_ws_mode == WhitespaceMode::Preserve {
155                            text
156                        } else {
157                            text.trim().to_string()
158                        };
159                        if !text_to_store.is_empty() {
160                            let text_node = self
161                                .factory
162                                .make_node(XmlContent::Text(XmlText::new(&text_to_store)));
163                            if let Some(parent) = node_stack.last() {
164                                NodeInner::add_child_to_ref(parent, text_node);
165                            }
166                        }
167                    }
168
169                    // Push/pop namespace scope for empty elements (may have xmlns declarations)
170                    ns_context.push_scope();
171                    let (element, _ws_mode_override) =
172                        self.parse_element_with_ns(e, reader, &mut ns_context)?;
173                    ns_context.pop_scope();
174                    let node = self.factory.make_node(XmlContent::Element(element));
175
176                    if let Some(parent) = node_stack.last() {
177                        NodeInner::add_child_to_ref(parent, node);
178                    }
179                }
180                Ok(Event::Text(e)) => {
181                    let current_ws_mode =
182                        *ws_mode_stack.last().unwrap_or(&WhitespaceMode::Normalize);
183                    let raw =
184                        std::str::from_utf8(e.as_ref()).map_err(|e| Error::Parse(e.to_string()))?;
185                    let text = unescape(raw).map_err(|e| Error::Parse(e.to_string()))?;
186
187                    if current_ws_mode == WhitespaceMode::Preserve {
188                        // Preserve mode: keep text exactly as-is
189                        current_text = Some(match current_text {
190                            Some(mut existing) => {
191                                existing.push_str(&text);
192                                existing
193                            }
194                            None => text.to_string(),
195                        });
196                    } else {
197                        // Normalize mode: use existing normalization
198                        let normalized = self.normalize_whitespace(&text, current_text.as_deref());
199                        if let Some(normalized) = normalized {
200                            current_text = Some(match current_text {
201                                Some(mut existing) => {
202                                    existing.push_str(&normalized);
203                                    existing
204                                }
205                                None => normalized,
206                            });
207                        }
208                    }
209                }
210                Ok(Event::CData(ref e)) => {
211                    // CDATA sections preserve whitespace by their nature
212                    let text = String::from_utf8_lossy(e.as_ref());
213                    current_text = Some(match current_text {
214                        Some(mut existing) => {
215                            existing.push_str(&text);
216                            existing
217                        }
218                        None => text.to_string(),
219                    });
220                }
221                Ok(Event::Eof) => break,
222                Ok(Event::Comment(ref e)) => {
223                    let current_ws_mode =
224                        *ws_mode_stack.last().unwrap_or(&WhitespaceMode::Normalize);
225
226                    // Flush any accumulated text first
227                    if let Some(text) = current_text.take() {
228                        let text_to_store = if current_ws_mode == WhitespaceMode::Preserve {
229                            text
230                        } else {
231                            text.trim().to_string()
232                        };
233                        if !text_to_store.is_empty() {
234                            let text_node = self
235                                .factory
236                                .make_node(XmlContent::Text(XmlText::new(&text_to_store)));
237                            if let Some(parent) = node_stack.last() {
238                                NodeInner::add_child_to_ref(parent, text_node);
239                            }
240                        }
241                    }
242
243                    // Capture comments as nodes
244                    let comment_text = String::from_utf8_lossy(e.as_ref()).to_string();
245                    let comment_node = self
246                        .factory
247                        .make_node(XmlContent::Comment(XmlComment::new(&comment_text)));
248                    if let Some(parent) = node_stack.last() {
249                        NodeInner::add_child_to_ref(parent, comment_node);
250                    }
251                }
252                Ok(Event::PI(ref e)) => {
253                    let current_ws_mode =
254                        *ws_mode_stack.last().unwrap_or(&WhitespaceMode::Normalize);
255
256                    // Flush any accumulated text first (like Event::Start does)
257                    if let Some(text) = current_text.take() {
258                        let text_to_store = if current_ws_mode == WhitespaceMode::Preserve {
259                            text
260                        } else {
261                            text.trim().to_string()
262                        };
263                        if !text_to_store.is_empty() || current_ws_mode == WhitespaceMode::Preserve
264                        {
265                            let text_node = self
266                                .factory
267                                .make_node(XmlContent::Text(XmlText::new(&text_to_store)));
268                            if let Some(parent) = node_stack.last() {
269                                NodeInner::add_child_to_ref(parent, text_node);
270                            }
271                        }
272                    }
273
274                    // Parse PI: <?target content?>
275                    let pi_data = String::from_utf8_lossy(e.as_ref()).to_string();
276                    // Use char_indices() for UTF-8 safety
277                    let (target, content) = match pi_data
278                        .char_indices()
279                        .find(|(_, c)| c.is_whitespace())
280                        .map(|(i, _)| i)
281                    {
282                        Some(pos) => (
283                            pi_data[..pos].to_string(),
284                            pi_data[pos..].trim().to_string(),
285                        ),
286                        None => (pi_data, String::new()),
287                    };
288
289                    let pi_node = self.factory.make_node(XmlContent::ProcessingInstruction(
290                        XmlProcessingInstruction::new(&target, &content),
291                    ));
292                    if let Some(parent) = node_stack.last() {
293                        NodeInner::add_child_to_ref(parent, pi_node);
294                    }
295                }
296                Ok(Event::Decl(_)) => {
297                    // Still ignore XML declaration
298                }
299                Ok(Event::DocType(_)) => {
300                    // Ignore DOCTYPE
301                }
302                Ok(Event::GeneralRef(_)) => {
303                    // Ignore general entity references
304                }
305                Err(e) => return Err(Error::Parse(format!("XML parse error: {}", e))),
306            }
307            buf.clear();
308        }
309
310        Ok(root)
311    }
312
313    /// Parses an element's name and attributes with namespace awareness.
314    ///
315    /// Returns the element and an optional whitespace mode override if `xml:space` is present.
316    fn parse_element_with_ns<R: Read + std::io::BufRead>(
317        &self,
318        e: &BytesStart,
319        reader: &Reader<R>,
320        ns_context: &mut NamespaceContext,
321    ) -> Result<(XmlElement, Option<WhitespaceMode>)> {
322        let qname = reader
323            .decoder()
324            .decode(e.name().as_ref())
325            .map_err(|e| Error::Parse(e.to_string()))?
326            .to_string();
327
328        let mut attributes = HashMap::new();
329        let mut namespace_decls = HashMap::new();
330        let mut ws_mode_override = None;
331
332        for attr_result in e.attributes() {
333            let attr = attr_result.map_err(|e| Error::Parse(format!("Attribute error: {}", e)))?;
334            let key = reader
335                .decoder()
336                .decode(attr.key.as_ref())
337                .map_err(|e| Error::Parse(e.to_string()))?
338                .to_string();
339            let value = attr
340                .unescape_value()
341                .map_err(|e| Error::Parse(e.to_string()))?
342                .to_string();
343
344            // Check for xml:space attribute
345            if key == "xml:space" {
346                ws_mode_override = Some(match value.as_str() {
347                    "preserve" => WhitespaceMode::Preserve,
348                    _ => WhitespaceMode::Normalize,
349                });
350            }
351
352            if is_xmlns_attr(&key) {
353                // Namespace declaration
354                let prefix = if key == "xmlns" {
355                    String::new()
356                } else {
357                    key[6..].to_string() // "xmlns:prefix" -> "prefix"
358                };
359                ns_context.bind(&prefix, &value);
360                namespace_decls.insert(prefix, value);
361            } else {
362                attributes.insert(key, value);
363            }
364        }
365
366        // Resolve element name to expanded name
367        let (prefix, local_name) = split_qname(&qname);
368        let expanded_name = match prefix {
369            Some(p) => ns_context
370                .resolve(p)
371                .map(|uri| ExpandedName::new(uri, local_name.to_string())),
372            None => {
373                if let Some(uri) = ns_context.default_namespace() {
374                    if !uri.is_empty() {
375                        Some(ExpandedName::new(uri, local_name.to_string()))
376                    } else {
377                        Some(ExpandedName::no_namespace(local_name.to_string()))
378                    }
379                } else {
380                    Some(ExpandedName::no_namespace(local_name.to_string()))
381                }
382            }
383        };
384
385        Ok((
386            XmlElement::new_with_namespace(qname, expanded_name, namespace_decls, attributes),
387            ws_mode_override,
388        ))
389    }
390
391    /// Normalizes whitespace in text content, matching Java's behavior.
392    ///
393    /// The Java implementation:
394    /// - Collapses consecutive whitespace to a single space
395    /// - Tracks whether the previous text ended with whitespace
396    /// - Only returns Some if there's non-whitespace content
397    fn normalize_whitespace(&self, text: &str, previous: Option<&str>) -> Option<String> {
398        let last_is_ws = previous.is_none_or(|p| p.ends_with(' '));
399        let mut last_was_ws = last_is_ws;
400        let mut has_non_ws = false;
401        let mut result = String::new();
402
403        for c in text.chars() {
404            if c.is_whitespace() {
405                if !last_was_ws {
406                    result.push(' ');
407                    last_was_ws = true;
408                }
409                // Skip additional whitespace
410            } else {
411                result.push(c);
412                last_was_ws = false;
413                has_non_ws = true;
414            }
415        }
416
417        if has_non_ws {
418            Some(result)
419        } else {
420            None
421        }
422    }
423}
424
425/// Parses XML from a file using a base node factory.
426pub fn parse_file<P: AsRef<Path>>(path: P) -> Result<NodeRef> {
427    let parser = XmlParser::new(super::BaseNodeFactory);
428    parser.parse_file(path)
429}
430
431/// Parses XML from a string using a base node factory.
432pub fn parse_str(xml: &str) -> Result<NodeRef> {
433    let parser = XmlParser::new(super::BaseNodeFactory);
434    parser.parse_str(xml)
435}
436
437#[cfg(test)]
438mod tests {
439    use super::*;
440    use crate::xml::BaseNodeFactory;
441
442    #[test]
443    fn test_parse_simple_xml() {
444        let xml = r#"<root><child>text</child></root>"#;
445        let parser = XmlParser::new(BaseNodeFactory);
446        let root = parser.parse_str(xml).unwrap();
447
448        // Root should be $ROOT$ with one child (the actual root element)
449        let root_borrowed = root.borrow();
450        assert_eq!(root_borrowed.child_count(), 1);
451
452        let root_content = root_borrowed.content().unwrap();
453        if let XmlContent::Element(e) = root_content {
454            assert_eq!(e.qname(), "$ROOT$");
455        } else {
456            panic!("Expected element");
457        }
458
459        // First child should be <root>
460        let root_elem = root_borrowed.children()[0].clone();
461        let root_elem_borrowed = root_elem.borrow();
462        if let Some(XmlContent::Element(e)) = root_elem_borrowed.content() {
463            assert_eq!(e.qname(), "root");
464        } else {
465            panic!("Expected element");
466        }
467    }
468
469    #[test]
470    fn test_parse_with_attributes() {
471        let xml = r#"<root id="foo" class="bar">content</root>"#;
472        let parser = XmlParser::new(BaseNodeFactory);
473        let root = parser.parse_str(xml).unwrap();
474
475        let root_borrowed = root.borrow();
476        let root_elem = root_borrowed.children()[0].clone();
477        let root_elem_borrowed = root_elem.borrow();
478
479        if let Some(XmlContent::Element(e)) = root_elem_borrowed.content() {
480            assert_eq!(e.qname(), "root");
481            assert_eq!(e.attributes().get("id"), Some(&"foo".to_string()));
482            assert_eq!(e.attributes().get("class"), Some(&"bar".to_string()));
483        } else {
484            panic!("Expected element");
485        }
486    }
487
488    #[test]
489    fn test_whitespace_normalization() {
490        let xml = r#"<root>  hello   world  </root>"#;
491        let parser = XmlParser::new(BaseNodeFactory);
492        let root = parser.parse_str(xml).unwrap();
493
494        let root_borrowed = root.borrow();
495        let root_elem = root_borrowed.children()[0].clone();
496        let root_elem_borrowed = root_elem.borrow();
497
498        // Should have one text child with normalized whitespace
499        assert_eq!(root_elem_borrowed.child_count(), 1);
500        let text_node = root_elem_borrowed.children()[0].clone();
501        let text_borrowed = text_node.borrow();
502
503        if let Some(XmlContent::Text(t)) = text_borrowed.content() {
504            let text: String = t.text().iter().collect();
505            assert_eq!(text, "hello world");
506        } else {
507            panic!("Expected text node");
508        }
509    }
510
511    #[test]
512    fn test_empty_element() {
513        let xml = r#"<root><empty /></root>"#;
514        let parser = XmlParser::new(BaseNodeFactory);
515        let root = parser.parse_str(xml).unwrap();
516
517        let root_borrowed = root.borrow();
518        let root_elem = root_borrowed.children()[0].clone();
519        let root_elem_borrowed = root_elem.borrow();
520
521        assert_eq!(root_elem_borrowed.child_count(), 1);
522        let empty_elem = root_elem_borrowed.children()[0].clone();
523        let empty_borrowed = empty_elem.borrow();
524
525        if let Some(XmlContent::Element(e)) = empty_borrowed.content() {
526            assert_eq!(e.qname(), "empty");
527        } else {
528            panic!("Expected element");
529        }
530        assert_eq!(empty_borrowed.child_count(), 0);
531    }
532
533    #[test]
534    fn test_nested_elements() {
535        let xml = r#"<a><b><c>deep</c></b></a>"#;
536        let parser = XmlParser::new(BaseNodeFactory);
537        let root = parser.parse_str(xml).unwrap();
538
539        // Navigate: $ROOT$ -> a -> b -> c -> text
540        let root_borrowed = root.borrow();
541        let a = root_borrowed.children()[0].clone();
542        let a_borrowed = a.borrow();
543        let b = a_borrowed.children()[0].clone();
544        let b_borrowed = b.borrow();
545        let c = b_borrowed.children()[0].clone();
546        let c_borrowed = c.borrow();
547        let text = c_borrowed.children()[0].clone();
548        let text_borrowed = text.borrow();
549
550        if let Some(XmlContent::Text(t)) = text_borrowed.content() {
551            let text_str: String = t.text().iter().collect();
552            assert_eq!(text_str, "deep");
553        } else {
554            panic!("Expected text node");
555        }
556    }
557
558    #[test]
559    fn test_whitespace_preservation() {
560        // Test xml:space="preserve" attribute
561        let xml = r#"<root xml:space="preserve">  hello   world  </root>"#;
562        let parser = XmlParser::new(BaseNodeFactory);
563        let root = parser.parse_str(xml).unwrap();
564
565        let root_borrowed = root.borrow();
566        let root_elem = root_borrowed.children()[0].clone();
567        let root_elem_borrowed = root_elem.borrow();
568
569        // Should have one text child with preserved whitespace
570        assert_eq!(root_elem_borrowed.child_count(), 1);
571        let text_node = root_elem_borrowed.children()[0].clone();
572        let text_borrowed = text_node.borrow();
573
574        if let Some(XmlContent::Text(t)) = text_borrowed.content() {
575            let text: String = t.text().iter().collect();
576            // Whitespace should be preserved exactly
577            assert_eq!(text, "  hello   world  ");
578        } else {
579            panic!("Expected text node");
580        }
581    }
582
583    #[test]
584    fn test_whitespace_preservation_inheritance() {
585        // Test that xml:space="preserve" is inherited by child elements
586        let xml = r#"<root xml:space="preserve"><child>  text  </child></root>"#;
587        let parser = XmlParser::new(BaseNodeFactory);
588        let root = parser.parse_str(xml).unwrap();
589
590        let root_borrowed = root.borrow();
591        let root_elem = root_borrowed.children()[0].clone();
592        let root_elem_borrowed = root_elem.borrow();
593
594        let child_elem = root_elem_borrowed.children()[0].clone();
595        let child_borrowed = child_elem.borrow();
596
597        assert_eq!(child_borrowed.child_count(), 1);
598        let text_node = child_borrowed.children()[0].clone();
599        let text_borrowed = text_node.borrow();
600
601        if let Some(XmlContent::Text(t)) = text_borrowed.content() {
602            let text: String = t.text().iter().collect();
603            // Whitespace should be preserved in child element
604            assert_eq!(text, "  text  ");
605        } else {
606            panic!("Expected text node");
607        }
608    }
609
610    #[test]
611    fn test_whitespace_preservation_override() {
612        // Test that xml:space="default" overrides inherited preserve mode
613        let xml =
614            r#"<root xml:space="preserve"><child xml:space="default">  text  </child></root>"#;
615        let parser = XmlParser::new(BaseNodeFactory);
616        let root = parser.parse_str(xml).unwrap();
617
618        let root_borrowed = root.borrow();
619        let root_elem = root_borrowed.children()[0].clone();
620        let root_elem_borrowed = root_elem.borrow();
621
622        let child_elem = root_elem_borrowed.children()[0].clone();
623        let child_borrowed = child_elem.borrow();
624
625        assert_eq!(child_borrowed.child_count(), 1);
626        let text_node = child_borrowed.children()[0].clone();
627        let text_borrowed = text_node.borrow();
628
629        if let Some(XmlContent::Text(t)) = text_borrowed.content() {
630            let text: String = t.text().iter().collect();
631            // Whitespace should be normalized in override element
632            assert_eq!(text, "text");
633        } else {
634            panic!("Expected text node");
635        }
636    }
637
638    #[test]
639    fn test_namespace_parsing() {
640        let xml = r#"<root xmlns="http://example.com" xmlns:ns="http://ns.example.com"><ns:child /></root>"#;
641        let parser = XmlParser::new(BaseNodeFactory);
642        let root = parser.parse_str(xml).unwrap();
643
644        let root_borrowed = root.borrow();
645        let root_elem = root_borrowed.children()[0].clone();
646        let root_elem_borrowed = root_elem.borrow();
647
648        if let Some(XmlContent::Element(e)) = root_elem_borrowed.content() {
649            assert_eq!(e.qname(), "root");
650            // Should have namespace declarations separated from attributes
651            assert_eq!(
652                e.namespace_decls().get(""),
653                Some(&"http://example.com".to_string())
654            );
655            assert_eq!(
656                e.namespace_decls().get("ns"),
657                Some(&"http://ns.example.com".to_string())
658            );
659            assert!(e.attributes().is_empty());
660            // Should have expanded name with default namespace
661            let expanded = e.expanded_name().expect("should have expanded name");
662            assert_eq!(expanded.namespace_uri.as_ref(), "http://example.com");
663            assert_eq!(expanded.local_name, "root");
664        } else {
665            panic!("Expected element");
666        }
667
668        // Check child has resolved prefix
669        let child = root_elem_borrowed.children()[0].clone();
670        let child_borrowed = child.borrow();
671        if let Some(XmlContent::Element(e)) = child_borrowed.content() {
672            assert_eq!(e.qname(), "ns:child");
673            let expanded = e.expanded_name().expect("should have expanded name");
674            assert_eq!(expanded.namespace_uri.as_ref(), "http://ns.example.com");
675            assert_eq!(expanded.local_name, "child");
676        } else {
677            panic!("Expected element");
678        }
679    }
680
681    #[test]
682    fn test_comment_flushes_text() {
683        // Text before a comment should be flushed as a text node before the comment
684        let xml = r#"<root>hello<!-- comment -->world</root>"#;
685        let parser = XmlParser::new(BaseNodeFactory);
686        let root = parser.parse_str(xml).unwrap();
687
688        let root_borrowed = root.borrow();
689        let root_elem = root_borrowed.children()[0].clone();
690        let root_elem_borrowed = root_elem.borrow();
691
692        // Should have 3 children: text("hello"), comment, text("world")
693        assert_eq!(root_elem_borrowed.child_count(), 3);
694
695        // First child should be text "hello"
696        let first = root_elem_borrowed.children()[0].clone();
697        let first_borrowed = first.borrow();
698        if let Some(XmlContent::Text(t)) = first_borrowed.content() {
699            let text: String = t.text().iter().collect();
700            assert_eq!(text, "hello");
701        } else {
702            panic!("Expected text node, got {:?}", first_borrowed.content());
703        }
704
705        // Second child should be comment
706        let second = root_elem_borrowed.children()[1].clone();
707        let second_borrowed = second.borrow();
708        assert!(matches!(
709            second_borrowed.content(),
710            Some(XmlContent::Comment(_))
711        ));
712
713        // Third child should be text "world"
714        let third = root_elem_borrowed.children()[2].clone();
715        let third_borrowed = third.borrow();
716        if let Some(XmlContent::Text(t)) = third_borrowed.content() {
717            let text: String = t.text().iter().collect();
718            assert_eq!(text, "world");
719        } else {
720            panic!("Expected text node");
721        }
722    }
723}