Skip to main content

faucet_source_xml/
convert.rs

1//! XML to JSON conversion.
2//!
3//! Converts XML documents to `serde_json::Value` preserving the element
4//! hierarchy. Attributes are prefixed with `@`, text content uses `#text`.
5
6use faucet_core::FaucetError;
7use quick_xml::events::Event;
8use quick_xml::reader::Reader;
9use serde_json::{Map, Value, json};
10
11/// Convert an XML string to a JSON value.
12///
13/// Elements become objects, repeated elements become arrays, attributes
14/// are stored with `@` prefix, and text content uses `#text`.
15pub fn xml_to_json(xml: &str) -> Result<Value, FaucetError> {
16    let mut reader = Reader::from_str(xml);
17    let mut stack: Vec<(String, Map<String, Value>)> = vec![("$root".into(), Map::new())];
18
19    loop {
20        match reader.read_event() {
21            Ok(Event::Start(e)) => {
22                let name = String::from_utf8_lossy(e.name().as_ref()).into_owned();
23                let mut obj = Map::new();
24
25                // Collect attributes.
26                for attr in e.attributes().flatten() {
27                    let key = format!("@{}", String::from_utf8_lossy(attr.key.as_ref()));
28                    let val = String::from_utf8_lossy(&attr.value).into_owned();
29                    obj.insert(key, Value::String(val));
30                }
31
32                stack.push((name, obj));
33            }
34            Ok(Event::End(_)) => {
35                let (name, obj) = stack.pop().ok_or_else(|| {
36                    FaucetError::Transform("malformed XML: unexpected end tag".into())
37                })?;
38
39                let value = if obj.len() == 1 && obj.contains_key("#text") {
40                    // Simplify: element with only text becomes a string.
41                    obj.into_iter().next().unwrap().1
42                } else {
43                    Value::Object(obj)
44                };
45
46                let parent = stack.last_mut().ok_or_else(|| {
47                    FaucetError::Transform("malformed XML: no parent element".into())
48                })?;
49
50                // If the key already exists, convert to array.
51                match parent.1.get_mut(&name) {
52                    Some(Value::Array(arr)) => arr.push(value),
53                    Some(existing) => {
54                        let prev = existing.clone();
55                        *existing = Value::Array(vec![prev, value]);
56                    }
57                    None => {
58                        parent.1.insert(name, value);
59                    }
60                }
61            }
62            Ok(Event::Text(e)) => {
63                let text = e
64                    .unescape()
65                    .map_err(|err| FaucetError::Transform(format!("XML decode error: {err}")))?
66                    .trim()
67                    .to_string();
68
69                if !text.is_empty()
70                    && let Some(current) = stack.last_mut()
71                {
72                    match current.1.get_mut("#text") {
73                        Some(Value::String(s)) => {
74                            s.push(' ');
75                            s.push_str(&text);
76                        }
77                        _ => {
78                            current.1.insert("#text".into(), Value::String(text));
79                        }
80                    }
81                }
82            }
83            Ok(Event::Empty(e)) => {
84                let name = String::from_utf8_lossy(e.name().as_ref()).into_owned();
85                let mut obj = Map::new();
86                for attr in e.attributes().flatten() {
87                    let key = format!("@{}", String::from_utf8_lossy(attr.key.as_ref()));
88                    let val = String::from_utf8_lossy(&attr.value).into_owned();
89                    obj.insert(key, Value::String(val));
90                }
91                let value = if obj.is_empty() {
92                    json!(null)
93                } else {
94                    Value::Object(obj)
95                };
96
97                if let Some(parent) = stack.last_mut() {
98                    match parent.1.get_mut(&name) {
99                        Some(Value::Array(arr)) => arr.push(value),
100                        Some(existing) => {
101                            let prev = existing.clone();
102                            *existing = Value::Array(vec![prev, value]);
103                        }
104                        None => {
105                            parent.1.insert(name, value);
106                        }
107                    }
108                }
109            }
110            Ok(Event::Eof) => break,
111            Ok(_) => {} // Skip comments, processing instructions, etc.
112            Err(e) => {
113                return Err(FaucetError::Transform(format!("XML parse error: {e}")));
114            }
115        }
116    }
117
118    let (_, root) = stack
119        .pop()
120        .ok_or_else(|| FaucetError::Transform("empty XML document".into()))?;
121
122    Ok(Value::Object(root))
123}
124
125/// Navigate into a JSON value using a dot-separated path and extract
126/// matching records. If the final element is an array, its items are
127/// returned individually.
128pub fn extract_at_path(value: &Value, path: &str) -> Vec<Value> {
129    let segments: Vec<&str> = path.split('.').collect();
130    let mut current = value.clone();
131
132    for seg in &segments {
133        current = match current {
134            Value::Object(ref map) => match map.get(*seg) {
135                Some(v) => v.clone(),
136                None => return vec![],
137            },
138            _ => return vec![],
139        };
140    }
141
142    match current {
143        Value::Array(arr) => arr,
144        other => vec![other],
145    }
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151
152    #[test]
153    fn simple_xml_to_json() {
154        let xml = r#"<root><name>Alice</name><age>30</age></root>"#;
155        let json = xml_to_json(xml).unwrap();
156        assert_eq!(json["root"]["name"], "Alice");
157        assert_eq!(json["root"]["age"], "30");
158    }
159
160    #[test]
161    fn repeated_elements_become_array() {
162        let xml = r#"<root><item>a</item><item>b</item><item>c</item></root>"#;
163        let json = xml_to_json(xml).unwrap();
164        let items = json["root"]["item"].as_array().unwrap();
165        assert_eq!(items.len(), 3);
166        assert_eq!(items[0], "a");
167        assert_eq!(items[1], "b");
168    }
169
170    #[test]
171    fn attributes_prefixed() {
172        let xml = r#"<user id="42"><name>Bob</name></user>"#;
173        let json = xml_to_json(xml).unwrap();
174        assert_eq!(json["user"]["@id"], "42");
175        assert_eq!(json["user"]["name"], "Bob");
176    }
177
178    #[test]
179    fn nested_elements() {
180        let xml = r#"<root><user><address><city>NYC</city></address></user></root>"#;
181        let json = xml_to_json(xml).unwrap();
182        assert_eq!(json["root"]["user"]["address"]["city"], "NYC");
183    }
184
185    #[test]
186    fn empty_elements() {
187        let xml = r#"<root><flag/></root>"#;
188        let json = xml_to_json(xml).unwrap();
189        assert!(json["root"]["flag"].is_null());
190    }
191
192    #[test]
193    fn empty_element_with_attr() {
194        let xml = r#"<root><flag enabled="true"/></root>"#;
195        let json = xml_to_json(xml).unwrap();
196        assert_eq!(json["root"]["flag"]["@enabled"], "true");
197    }
198
199    #[test]
200    fn extract_at_path_nested() {
201        let val = json!({"root": {"users": {"user": [{"id": 1}, {"id": 2}]}}});
202        let records = extract_at_path(&val, "root.users.user");
203        assert_eq!(records.len(), 2);
204        assert_eq!(records[0]["id"], 1);
205    }
206
207    #[test]
208    fn extract_at_path_single_element() {
209        let val = json!({"root": {"user": {"id": 1}}});
210        let records = extract_at_path(&val, "root.user");
211        assert_eq!(records.len(), 1);
212        assert_eq!(records[0]["id"], 1);
213    }
214
215    #[test]
216    fn extract_at_path_missing() {
217        let val = json!({"root": {}});
218        let records = extract_at_path(&val, "root.users.user");
219        assert!(records.is_empty());
220    }
221
222    #[test]
223    fn soap_envelope() {
224        let xml = r#"
225        <soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
226            <soap:Body>
227                <GetUsersResponse>
228                    <User><Name>Alice</Name></User>
229                    <User><Name>Bob</Name></User>
230                </GetUsersResponse>
231            </soap:Body>
232        </soap:Envelope>"#;
233        let json = xml_to_json(xml).unwrap();
234        let users = extract_at_path(&json, "soap:Envelope.soap:Body.GetUsersResponse.User");
235        assert_eq!(users.len(), 2);
236    }
237}