Skip to main content

xml_disassembler/builders/
build_xml_string.rs

1//! Build XML string from XmlElement structure.
2
3use quick_xml::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
4use quick_xml::Writer;
5use serde_json::{Map, Value};
6
7use crate::types::XmlElement;
8
9fn value_to_string(v: &Value) -> String {
10    match v {
11        Value::String(s) => s.clone(),
12        Value::Number(n) => n.to_string(),
13        Value::Bool(b) => b.to_string(),
14        Value::Null => String::new(),
15        _ => serde_json::to_string(v).unwrap_or_default(),
16    }
17}
18
19fn write_element<W: std::io::Write>(
20    writer: &mut Writer<W>,
21    name: &str,
22    content: &Value,
23    indent_level: usize,
24) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
25    let indent = "    ".repeat(indent_level);
26    let child_indent = "    ".repeat(indent_level + 1);
27
28    match content {
29        Value::Object(obj) => {
30            let (attrs, children): (Vec<_>, Vec<_>) =
31                obj.iter().partition(|(k, _)| k.starts_with('@'));
32
33            let attr_name = |k: &str| k.trim_start_matches('@').to_string();
34
35            let mut text_content = String::new();
36            let mut comment_content = String::new();
37            let mut text_tail_content = String::new();
38            let mut cdata_content = String::new();
39            let child_elements: Vec<(&String, &Value)> = children
40                .iter()
41                .filter_map(|(k, v)| {
42                    if *k == "#text" {
43                        text_content = value_to_string(v);
44                        None
45                    } else if *k == "#comment" {
46                        comment_content = value_to_string(v);
47                        None
48                    } else if *k == "#text-tail" {
49                        text_tail_content = value_to_string(v);
50                        None
51                    } else if *k == "#cdata" {
52                        cdata_content = value_to_string(v);
53                        None
54                    } else {
55                        Some((*k, *v))
56                    }
57                })
58                .collect();
59
60            let has_children = child_elements.iter().any(|(_, v)| {
61                v.is_object()
62                    || (v.is_array() && v.as_array().map(|a| !a.is_empty()).unwrap_or(false))
63            });
64
65            let attrs: Vec<(String, String)> = attrs
66                .iter()
67                .map(|(k, v)| (attr_name(k), value_to_string(v)))
68                .collect();
69
70            let mut start = BytesStart::new(name);
71            for (k, v) in &attrs {
72                start.push_attribute((k.as_str(), v.as_str()));
73            }
74            writer.write_event(Event::Start(start))?;
75
76            if has_children || !child_elements.is_empty() {
77                writer.write_event(Event::Text(BytesText::new(
78                    format!("\n{}", child_indent).as_str(),
79                )))?;
80
81                let child_count = child_elements.len();
82                for (idx, (child_name, child_value)) in child_elements.iter().enumerate() {
83                    let is_last = idx == child_count - 1;
84                    match child_value {
85                        Value::Array(arr) => {
86                            let arr_len = arr.len();
87                            for (i, item) in arr.iter().enumerate() {
88                                let arr_last = i == arr_len - 1;
89                                write_element(writer, child_name, item, indent_level + 1)?;
90                                if !arr_last {
91                                    writer.write_event(Event::Text(BytesText::new(
92                                        format!("\n{}", child_indent).as_str(),
93                                    )))?;
94                                }
95                            }
96                            if !is_last {
97                                writer.write_event(Event::Text(BytesText::new(
98                                    format!("\n{}", child_indent).as_str(),
99                                )))?;
100                            }
101                        }
102                        Value::Object(_) => {
103                            write_element(writer, child_name, child_value, indent_level + 1)?;
104                            if !is_last {
105                                writer.write_event(Event::Text(BytesText::new(
106                                    format!("\n{}", child_indent).as_str(),
107                                )))?;
108                            }
109                        }
110                        _ => {
111                            writer
112                                .write_event(Event::Start(BytesStart::new(child_name.as_str())))?;
113                            // BytesText::new() expects unescaped content; the writer escapes when writing
114                            writer.write_event(Event::Text(BytesText::new(
115                                value_to_string(child_value).as_str(),
116                            )))?;
117                            writer.write_event(Event::End(BytesEnd::new(child_name.as_str())))?;
118                            if !is_last {
119                                writer.write_event(Event::Text(BytesText::new(
120                                    format!("\n{}", child_indent).as_str(),
121                                )))?;
122                            }
123                        }
124                    }
125                }
126
127                writer.write_event(Event::Text(BytesText::new(
128                    format!("\n{}", indent).as_str(),
129                )))?;
130            } else if !cdata_content.is_empty()
131                || !text_content.is_empty()
132                || !comment_content.is_empty()
133                || !text_tail_content.is_empty()
134            {
135                // Add newline+indent before content when no leading text (keeps CDATA/comment on separate line)
136                if text_content.is_empty() && comment_content.is_empty() {
137                    writer.write_event(Event::Text(BytesText::new(
138                        format!("\n{}", child_indent).as_str(),
139                    )))?;
140                }
141                // Output in order: #text, #comment, #text-tail, #cdata
142                if !text_content.is_empty() {
143                    writer.write_event(Event::Text(BytesText::new(text_content.as_str())))?;
144                }
145                if !comment_content.is_empty() {
146                    writer.write_event(Event::Comment(BytesText::new(comment_content.as_str())))?;
147                }
148                if !text_tail_content.is_empty() {
149                    writer.write_event(Event::Text(BytesText::new(text_tail_content.as_str())))?;
150                }
151                if !cdata_content.is_empty() {
152                    writer.write_event(Event::CData(BytesCData::new(cdata_content.as_str())))?;
153                }
154                // Add newline+indent before closing tag only for CDATA (keeps compact for text-only)
155                if !cdata_content.is_empty() {
156                    writer.write_event(Event::Text(BytesText::new(
157                        format!("\n{}", indent).as_str(),
158                    )))?;
159                }
160            }
161
162            writer.write_event(Event::End(BytesEnd::new(name)))?;
163        }
164        Value::Array(arr) => {
165            for item in arr {
166                write_element(writer, name, item, indent_level)?;
167            }
168        }
169        _ => {
170            writer.write_event(Event::Start(BytesStart::new(name)))?;
171            // BytesText::new() expects unescaped content; the writer escapes when writing
172            writer.write_event(Event::Text(BytesText::new(
173                value_to_string(content).as_str(),
174            )))?;
175            writer.write_event(Event::End(BytesEnd::new(name)))?;
176        }
177    }
178
179    Ok(())
180}
181
182fn build_xml_from_object(
183    element: &Map<String, Value>,
184) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
185    // Use Writer::new (no indent) so leaf elements stay compact and match fixture format
186    let mut writer = Writer::new(Vec::new());
187
188    let (declaration, root_key, root_value) = if let Some(decl) = element.get("?xml") {
189        let root_key = element
190            .keys()
191            .find(|k| *k != "?xml")
192            .cloned()
193            .unwrap_or_else(|| "root".to_string());
194        let root_value = element
195            .get(&root_key)
196            .cloned()
197            .unwrap_or_else(|| Value::Object(Map::new()));
198        (Some(decl), root_key, root_value)
199    } else {
200        let root_key = element
201            .keys()
202            .next()
203            .cloned()
204            .unwrap_or_else(|| "root".to_string());
205        let root_value = element
206            .get(&root_key)
207            .cloned()
208            .unwrap_or_else(|| Value::Object(Map::new()));
209        (None, root_key, root_value)
210    };
211
212    if let Some(obj) = declaration.and_then(|d| d.as_object()) {
213        let version = obj
214            .get("@version")
215            .and_then(|v| v.as_str())
216            .unwrap_or("1.0");
217        let encoding = obj.get("@encoding").and_then(|v| v.as_str());
218        let standalone = obj.get("@standalone").and_then(|v| v.as_str());
219        writer.write_event(Event::Decl(BytesDecl::new(version, encoding, standalone)))?;
220        writer.write_event(Event::Text(BytesText::new("\n")))?;
221    }
222
223    write_element(&mut writer, &root_key, &root_value, 0)?;
224
225    let result = String::from_utf8(writer.into_inner())?;
226    Ok(result.trim_end().to_string())
227}
228
229/// Build XML string from XmlElement.
230pub fn build_xml_string(element: &XmlElement) -> String {
231    match element {
232        Value::Object(obj) => build_xml_from_object(obj).unwrap_or_default(),
233        _ => String::new(),
234    }
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240    use serde_json::json;
241
242    #[test]
243    fn build_xml_string_non_object_returns_empty() {
244        assert!(build_xml_string(&Value::Array(vec![])).is_empty());
245        assert!(build_xml_string(&Value::Null).is_empty());
246    }
247
248    #[test]
249    fn build_xml_string_simple_root() {
250        let el = json!({
251            "?xml": { "@version": "1.0", "@encoding": "UTF-8" },
252            "root": { "child": "value" }
253        });
254        let out = build_xml_string(&el);
255        assert!(out.contains("<?xml"));
256        assert!(out.contains("<root>"));
257        assert!(out.contains("<child>value</child>"));
258        assert!(out.contains("</root>"));
259    }
260
261    #[test]
262    fn build_xml_string_with_attributes() {
263        let el = json!({
264            "root": { "@xmlns": "http://example.com", "a": "b" }
265        });
266        let out = build_xml_string(&el);
267        assert!(out.contains("xmlns"));
268        assert!(out.contains("http://example.com"));
269        assert!(out.contains("<a>b</a>"));
270    }
271
272    #[test]
273    fn build_xml_string_with_array() {
274        let el = json!({
275            "root": { "item": [ { "x": "1" }, { "x": "2" } ] }
276        });
277        let out = build_xml_string(&el);
278        assert!(out.contains("<item>"));
279        assert!(out.contains("<x>1</x>"));
280        assert!(out.contains("<x>2</x>"));
281    }
282
283    #[test]
284    fn build_xml_string_without_declaration() {
285        let el = json!({ "root": { "a": "b" } });
286        let out = build_xml_string(&el);
287        assert!(!out.contains("<?xml"));
288        assert!(out.contains("<root>"));
289    }
290
291    #[test]
292    fn build_xml_string_with_text_comment_cdata() {
293        let root = json!({
294            "#text": "text",
295            "#comment": " a comment ",
296            "#cdata": "<cdata>"
297        });
298        let el = json!({
299            "?xml": { "@version": "1.0" },
300            "root": root
301        });
302        let out = build_xml_string(&el);
303        assert!(out.contains("text"));
304        assert!(out.contains("<!--"));
305        assert!(out.contains(" a comment "));
306        assert!(out.contains("<![CDATA["));
307        assert!(out.contains("<cdata>"));
308    }
309
310    #[test]
311    fn build_xml_string_with_declaration_encoding_standalone() {
312        let el = json!({
313            "?xml": { "@version": "1.0", "@encoding": "UTF-8", "@standalone": "yes" },
314            "root": { "a": "b" }
315        });
316        let out = build_xml_string(&el);
317        assert!(out.contains("<?xml"));
318        assert!(out.contains("UTF-8"));
319        assert!(out.contains("standalone"));
320        assert!(out.contains("<root>"));
321    }
322
323    #[test]
324    fn build_xml_string_primitive_sibling_children() {
325        // Root with multiple children: one object, one primitive (hits _ => branch)
326        let el = json!({
327            "root": { "obj": { "x": "1" }, "num": 42, "flag": true }
328        });
329        let out = build_xml_string(&el);
330        assert!(out.contains("<obj>"));
331        assert!(out.contains("<num>42</num>"));
332        assert!(out.contains("<flag>true</flag>"));
333    }
334
335    #[test]
336    fn build_xml_string_null_child_value() {
337        let el = json!({
338            "root": { "empty": null }
339        });
340        let out = build_xml_string(&el);
341        assert!(out.contains("<empty>"));
342        assert!(out.contains("</empty>"));
343    }
344
345    #[test]
346    fn build_xml_string_cdata_only_no_text_or_comment() {
347        let root = json!({ "#cdata": "only cdata content" });
348        let el = json!({ "?xml": { "@version": "1.0" }, "root": root });
349        let out = build_xml_string(&el);
350        assert!(out.contains("<![CDATA["));
351        assert!(out.contains("only cdata content"));
352    }
353
354    #[test]
355    fn build_xml_string_declaration_only_defaults_root_key() {
356        let el = json!({ "?xml": { "@version": "1.0", "@encoding": "UTF-8" } });
357        let out = build_xml_string(&el);
358        assert!(out.contains("<?xml"));
359        assert!(out.contains("<root>"));
360    }
361
362    #[test]
363    fn build_xml_string_declaration_non_object_skips_decl_write() {
364        let el = json!({ "?xml": "not-an-object", "root": { "a": "b" } });
365        let out = build_xml_string(&el);
366        assert!(!out.contains("<?xml"));
367        assert!(out.contains("<root>"));
368    }
369
370    #[test]
371    fn build_xml_string_root_value_array_sibling_elements() {
372        // Root value is Array (write_element Value::Array branch)
373        let el = json!({
374            "root": [ { "a": "1" }, { "b": "2" } ]
375        });
376        let out = build_xml_string(&el);
377        assert!(out.contains("<root>"));
378        assert!(out.contains("<a>1</a>"));
379        assert!(out.contains("<b>2</b>"));
380        assert!(out.contains("</root>"));
381    }
382
383    #[test]
384    fn build_xml_string_root_value_primitive() {
385        // Root value is primitive (write_element _ branch for top-level content)
386        let el = json!({ "root": 42 });
387        let out = build_xml_string(&el);
388        assert!(out.contains("<root>42</root>"));
389    }
390
391    #[test]
392    fn build_xml_string_attribute_value_object_uses_serde_fallback() {
393        // Attribute value that is Object hits value_to_string _ branch (serde_json::to_string)
394        let el = json!({
395            "root": { "@complex": { "nested": true }, "child": "v" }
396        });
397        let out = build_xml_string(&el);
398        assert!(out.contains("child"));
399        assert!(out.contains("v"));
400    }
401}