diffx_core/parser/
xml.rs

1use anyhow::{anyhow, Result};
2use quick_xml::events::Event;
3use quick_xml::Reader;
4use serde_json::Value;
5
6pub fn parse_xml(content: &str) -> Result<Value> {
7    let mut reader = Reader::from_str(content);
8    reader.trim_text(true);
9
10    // Stack-based parsing for nested structures
11    let mut stack: Vec<(String, serde_json::Map<String, Value>)> = Vec::new();
12    let mut root: Option<(String, serde_json::Map<String, Value>)> = None;
13    let mut current_text = String::new();
14
15    loop {
16        match reader.read_event() {
17            Ok(Event::Start(ref e)) => {
18                let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
19                let mut element = serde_json::Map::new();
20
21                // Parse attributes
22                for attr in e.attributes().flatten() {
23                    let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
24                    let value = String::from_utf8_lossy(&attr.value).to_string();
25                    element.insert(key, Value::String(value));
26                }
27
28                // If we have text content to add to parent
29                if !current_text.trim().is_empty() && !stack.is_empty() {
30                    let (_, parent) = stack.last_mut().unwrap();
31                    parent.insert(
32                        "text".to_string(),
33                        Value::String(current_text.trim().to_string()),
34                    );
35                }
36                current_text.clear();
37
38                // Push new element to stack
39                stack.push((tag_name, element));
40            }
41            Ok(Event::Text(e)) => {
42                let text = e.unescape().unwrap_or_default().to_string();
43                if !text.trim().is_empty() {
44                    current_text.push_str(&text);
45                }
46            }
47            Ok(Event::CData(e)) => {
48                // Handle CDATA sections
49                let cdata_text = String::from_utf8_lossy(&e).to_string();
50                current_text.push_str(&cdata_text);
51            }
52            Ok(Event::End(ref e)) => {
53                let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
54
55                if let Some((name, mut element)) = stack.pop() {
56                    if name == tag_name {
57                        // Add any remaining text content
58                        if !current_text.trim().is_empty() {
59                            // If element only has text (no attributes or children), make it a simple string
60                            if element.is_empty() {
61                                let text_value = Value::String(current_text.trim().to_string());
62                                current_text.clear();
63
64                                if let Some((_, parent)) = stack.last_mut() {
65                                    // Add to parent
66                                    add_to_parent(parent, &name, text_value);
67                                } else {
68                                    // This is the root element
69                                    root = Some((
70                                        name.clone(),
71                                        serde_json::Map::from_iter(vec![(name, text_value)]),
72                                    ));
73                                }
74                                continue;
75                            } else {
76                                element.insert(
77                                    "text".to_string(),
78                                    Value::String(current_text.trim().to_string()),
79                                );
80                            }
81                        }
82                        current_text.clear();
83
84                        // Convert element to Value
85                        let element_value = if element.is_empty() {
86                            Value::Object(serde_json::Map::new())
87                        } else if element.len() == 1 && element.contains_key("text") {
88                            element.get("text").unwrap().clone()
89                        } else {
90                            Value::Object(element)
91                        };
92
93                        if let Some((_, parent)) = stack.last_mut() {
94                            // Add to parent
95                            add_to_parent(parent, &name, element_value);
96                        } else {
97                            // This is the root element
98                            let mut root_map = serde_json::Map::new();
99                            root_map.insert(name.clone(), element_value);
100                            root = Some((name.clone(), root_map));
101                        }
102                    }
103                }
104            }
105            Ok(Event::Empty(ref e)) => {
106                let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
107                let mut element = serde_json::Map::new();
108
109                // Parse attributes
110                for attr in e.attributes().flatten() {
111                    let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
112                    let value = String::from_utf8_lossy(&attr.value).to_string();
113                    element.insert(key, Value::String(value));
114                }
115
116                let element_value = Value::Object(element);
117
118                if let Some((_, parent)) = stack.last_mut() {
119                    // Add to parent
120                    add_to_parent(parent, &tag_name, element_value);
121                } else {
122                    // This is a root-level empty element
123                    let mut root_map = serde_json::Map::new();
124                    root_map.insert(tag_name.clone(), element_value);
125                    root = Some((tag_name.clone(), root_map));
126                }
127            }
128            Ok(Event::Eof) => break,
129            Err(e) => return Err(anyhow!("XML parsing error: {e}")),
130            _ => {}
131        }
132    }
133
134    // Return the root element
135    if let Some((_, root_map)) = root {
136        Ok(Value::Object(root_map))
137    } else {
138        Ok(Value::Object(serde_json::Map::new()))
139    }
140}
141
142// Helper function to add a child element to a parent
143fn add_to_parent(parent: &mut serde_json::Map<String, Value>, key: &str, value: Value) {
144    if let Some(existing) = parent.get_mut(key) {
145        match existing {
146            Value::Array(arr) => {
147                arr.push(value);
148            }
149            other => {
150                let _ = std::mem::replace(other, Value::Array(vec![other.clone(), value]));
151            }
152        }
153    } else {
154        parent.insert(key.to_string(), value);
155    }
156}