Skip to main content

xml_disassembler/builders/
build_xml_string.rs

1//! Build XML string from XmlElement structure.
2
3use quick_xml::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
4use quick_xml::Writer;
5use serde_json::{Map, Value};
6
7use crate::types::XmlElement;
8
9fn value_to_string(v: &Value) -> String {
10    match v {
11        Value::String(s) => s.clone(),
12        Value::Number(n) => n.to_string(),
13        Value::Bool(b) => b.to_string(),
14        Value::Null => String::new(),
15        _ => serde_json::to_string(v).unwrap_or_default(),
16    }
17}
18
19fn write_element<W: std::io::Write>(
20    writer: &mut Writer<W>,
21    name: &str,
22    content: &Value,
23    indent_level: usize,
24) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
25    let indent = "    ".repeat(indent_level);
26    let child_indent = "    ".repeat(indent_level + 1);
27
28    match content {
29        Value::Object(obj) => {
30            let (attrs, children): (Vec<_>, Vec<_>) =
31                obj.iter().partition(|(k, _)| k.starts_with('@'));
32
33            let attr_name = |k: &str| k.trim_start_matches('@').to_string();
34
35            let mut text_content = String::new();
36            let mut comment_content = String::new();
37            let mut text_tail_content = String::new();
38            let mut cdata_content = String::new();
39            let child_elements: Vec<(&String, &Value)> = children
40                .iter()
41                .filter_map(|(k, v)| {
42                    if *k == "#text" {
43                        text_content = value_to_string(v);
44                        None
45                    } else if *k == "#comment" {
46                        comment_content = value_to_string(v);
47                        None
48                    } else if *k == "#text-tail" {
49                        text_tail_content = value_to_string(v);
50                        None
51                    } else if *k == "#cdata" {
52                        cdata_content = value_to_string(v);
53                        None
54                    } else {
55                        Some((*k, *v))
56                    }
57                })
58                .collect();
59
60            let has_children = child_elements.iter().any(|(_, v)| {
61                v.is_object()
62                    || (v.is_array() && v.as_array().map(|a| !a.is_empty()).unwrap_or(false))
63            });
64
65            let attrs: Vec<(String, String)> = attrs
66                .iter()
67                .map(|(k, v)| (attr_name(k), value_to_string(v)))
68                .collect();
69
70            let mut start = BytesStart::new(name);
71            for (k, v) in &attrs {
72                start.push_attribute((k.as_str(), v.as_str()));
73            }
74            writer.write_event(Event::Start(start))?;
75
76            if has_children || !child_elements.is_empty() {
77                writer.write_event(Event::Text(BytesText::new(
78                    format!("\n{}", child_indent).as_str(),
79                )))?;
80
81                let child_count = child_elements.len();
82                for (idx, (child_name, child_value)) in child_elements.iter().enumerate() {
83                    let is_last = idx == child_count - 1;
84                    match child_value {
85                        Value::Array(arr) => {
86                            let arr_len = arr.len();
87                            for (i, item) in arr.iter().enumerate() {
88                                let arr_last = i == arr_len - 1;
89                                write_element(writer, child_name, item, indent_level + 1)?;
90                                if !arr_last {
91                                    writer.write_event(Event::Text(BytesText::new(
92                                        format!("\n{}", child_indent).as_str(),
93                                    )))?;
94                                }
95                            }
96                            if !is_last {
97                                writer.write_event(Event::Text(BytesText::new(
98                                    format!("\n{}", child_indent).as_str(),
99                                )))?;
100                            }
101                        }
102                        Value::Object(_) => {
103                            write_element(writer, child_name, child_value, indent_level + 1)?;
104                            if !is_last {
105                                writer.write_event(Event::Text(BytesText::new(
106                                    format!("\n{}", child_indent).as_str(),
107                                )))?;
108                            }
109                        }
110                        _ => {
111                            writer
112                                .write_event(Event::Start(BytesStart::new(child_name.as_str())))?;
113                            // BytesText::new() expects unescaped content; the writer escapes when writing
114                            writer.write_event(Event::Text(BytesText::new(
115                                value_to_string(child_value).as_str(),
116                            )))?;
117                            writer.write_event(Event::End(BytesEnd::new(child_name.as_str())))?;
118                            if !is_last {
119                                writer.write_event(Event::Text(BytesText::new(
120                                    format!("\n{}", child_indent).as_str(),
121                                )))?;
122                            }
123                        }
124                    }
125                }
126
127                writer.write_event(Event::Text(BytesText::new(
128                    format!("\n{}", indent).as_str(),
129                )))?;
130            } else if !cdata_content.is_empty()
131                || !text_content.is_empty()
132                || !comment_content.is_empty()
133                || !text_tail_content.is_empty()
134            {
135                // Add newline+indent before content when no leading text (keeps CDATA/comment on separate line)
136                if text_content.is_empty() && comment_content.is_empty() {
137                    writer.write_event(Event::Text(BytesText::new(
138                        format!("\n{}", child_indent).as_str(),
139                    )))?;
140                }
141                // Output in order: #text, #comment, #text-tail, #cdata
142                if !text_content.is_empty() {
143                    writer.write_event(Event::Text(BytesText::new(text_content.as_str())))?;
144                }
145                if !comment_content.is_empty() {
146                    writer.write_event(Event::Comment(BytesText::new(comment_content.as_str())))?;
147                }
148                if !text_tail_content.is_empty() {
149                    writer.write_event(Event::Text(BytesText::new(text_tail_content.as_str())))?;
150                }
151                if !cdata_content.is_empty() {
152                    writer.write_event(Event::CData(BytesCData::new(cdata_content.as_str())))?;
153                }
154                // Add newline+indent before closing tag only for CDATA (keeps compact for text-only)
155                if !cdata_content.is_empty() {
156                    writer.write_event(Event::Text(BytesText::new(
157                        format!("\n{}", indent).as_str(),
158                    )))?;
159                }
160            }
161
162            writer.write_event(Event::End(BytesEnd::new(name)))?;
163        }
164        Value::Array(arr) => {
165            for item in arr {
166                write_element(writer, name, item, indent_level)?;
167            }
168        }
169        _ => {
170            writer.write_event(Event::Start(BytesStart::new(name)))?;
171            // BytesText::new() expects unescaped content; the writer escapes when writing
172            writer.write_event(Event::Text(BytesText::new(
173                value_to_string(content).as_str(),
174            )))?;
175            writer.write_event(Event::End(BytesEnd::new(name)))?;
176        }
177    }
178
179    Ok(())
180}
181
182fn build_xml_from_object(
183    element: &Map<String, Value>,
184) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
185    // Use Writer::new (no indent) so leaf elements stay compact and match fixture format
186    let mut writer = Writer::new(Vec::new());
187
188    let (declaration, root_key, root_value) = if let Some(decl) = element.get("?xml") {
189        let root_key = element
190            .keys()
191            .find(|k| *k != "?xml")
192            .cloned()
193            .unwrap_or_else(|| "root".to_string());
194        let root_value = element
195            .get(&root_key)
196            .cloned()
197            .unwrap_or_else(|| Value::Object(Map::new()));
198        (Some(decl), root_key, root_value)
199    } else {
200        let root_key = element
201            .keys()
202            .next()
203            .cloned()
204            .unwrap_or_else(|| "root".to_string());
205        let root_value = element
206            .get(&root_key)
207            .cloned()
208            .unwrap_or_else(|| Value::Object(Map::new()));
209        (None, root_key, root_value)
210    };
211
212    if declaration.is_some() {
213        if let Some(decl) = declaration {
214            if let Some(obj) = decl.as_object() {
215                let version = obj
216                    .get("@version")
217                    .and_then(|v| v.as_str())
218                    .unwrap_or("1.0");
219                let encoding = obj.get("@encoding").and_then(|v| v.as_str());
220                let standalone = obj.get("@standalone").and_then(|v| v.as_str());
221                writer.write_event(Event::Decl(BytesDecl::new(version, encoding, standalone)))?;
222                writer.write_event(Event::Text(BytesText::new("\n")))?;
223            }
224        }
225    }
226
227    write_element(&mut writer, &root_key, &root_value, 0)?;
228
229    let result = String::from_utf8(writer.into_inner())?;
230    Ok(result.trim_end().to_string())
231}
232
233/// Build XML string from XmlElement.
234pub fn build_xml_string(element: &XmlElement) -> String {
235    match element {
236        Value::Object(obj) => build_xml_from_object(obj).unwrap_or_default(),
237        _ => String::new(),
238    }
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244    use serde_json::json;
245
246    #[test]
247    fn build_xml_string_non_object_returns_empty() {
248        assert!(build_xml_string(&Value::Array(vec![])).is_empty());
249        assert!(build_xml_string(&Value::Null).is_empty());
250    }
251
252    #[test]
253    fn build_xml_string_simple_root() {
254        let el = json!({
255            "?xml": { "@version": "1.0", "@encoding": "UTF-8" },
256            "root": { "child": "value" }
257        });
258        let out = build_xml_string(&el);
259        assert!(out.contains("<?xml"));
260        assert!(out.contains("<root>"));
261        assert!(out.contains("<child>value</child>"));
262        assert!(out.contains("</root>"));
263    }
264
265    #[test]
266    fn build_xml_string_with_attributes() {
267        let el = json!({
268            "root": { "@xmlns": "http://example.com", "a": "b" }
269        });
270        let out = build_xml_string(&el);
271        assert!(out.contains("xmlns"));
272        assert!(out.contains("http://example.com"));
273        assert!(out.contains("<a>b</a>"));
274    }
275
276    #[test]
277    fn build_xml_string_with_array() {
278        let el = json!({
279            "root": { "item": [ { "x": "1" }, { "x": "2" } ] }
280        });
281        let out = build_xml_string(&el);
282        assert!(out.contains("<item>"));
283        assert!(out.contains("<x>1</x>"));
284        assert!(out.contains("<x>2</x>"));
285    }
286
287    #[test]
288    fn build_xml_string_without_declaration() {
289        let el = json!({ "root": { "a": "b" } });
290        let out = build_xml_string(&el);
291        assert!(!out.contains("<?xml"));
292        assert!(out.contains("<root>"));
293    }
294
295    #[test]
296    fn build_xml_string_with_text_comment_cdata() {
297        let root = json!({
298            "#text": "text",
299            "#comment": " a comment ",
300            "#cdata": "<cdata>"
301        });
302        let el = json!({
303            "?xml": { "@version": "1.0" },
304            "root": root
305        });
306        let out = build_xml_string(&el);
307        assert!(out.contains("text"));
308        assert!(out.contains("<!--"));
309        assert!(out.contains(" a comment "));
310        assert!(out.contains("<![CDATA["));
311        assert!(out.contains("<cdata>"));
312    }
313
314    #[test]
315    fn build_xml_string_with_declaration_encoding_standalone() {
316        let el = json!({
317            "?xml": { "@version": "1.0", "@encoding": "UTF-8", "@standalone": "yes" },
318            "root": { "a": "b" }
319        });
320        let out = build_xml_string(&el);
321        assert!(out.contains("<?xml"));
322        assert!(out.contains("UTF-8"));
323        assert!(out.contains("standalone"));
324        assert!(out.contains("<root>"));
325    }
326
327    #[test]
328    fn build_xml_string_primitive_sibling_children() {
329        // Root with multiple children: one object, one primitive (hits _ => branch)
330        let el = json!({
331            "root": { "obj": { "x": "1" }, "num": 42, "flag": true }
332        });
333        let out = build_xml_string(&el);
334        assert!(out.contains("<obj>"));
335        assert!(out.contains("<num>42</num>"));
336        assert!(out.contains("<flag>true</flag>"));
337    }
338
339    #[test]
340    fn build_xml_string_null_child_value() {
341        let el = json!({
342            "root": { "empty": null }
343        });
344        let out = build_xml_string(&el);
345        assert!(out.contains("<empty>"));
346        assert!(out.contains("</empty>"));
347    }
348
349    #[test]
350    fn build_xml_string_cdata_only_no_text_or_comment() {
351        let root = json!({ "#cdata": "only cdata content" });
352        let el = json!({ "?xml": { "@version": "1.0" }, "root": root });
353        let out = build_xml_string(&el);
354        assert!(out.contains("<![CDATA["));
355        assert!(out.contains("only cdata content"));
356    }
357
358    #[test]
359    fn build_xml_string_declaration_only_defaults_root_key() {
360        let el = json!({ "?xml": { "@version": "1.0", "@encoding": "UTF-8" } });
361        let out = build_xml_string(&el);
362        assert!(out.contains("<?xml"));
363        assert!(out.contains("<root>"));
364    }
365
366    #[test]
367    fn build_xml_string_declaration_non_object_skips_decl_write() {
368        let el = json!({ "?xml": "not-an-object", "root": { "a": "b" } });
369        let out = build_xml_string(&el);
370        assert!(!out.contains("<?xml"));
371        assert!(out.contains("<root>"));
372    }
373
374    #[test]
375    fn build_xml_string_root_value_array_sibling_elements() {
376        // Root value is Array (write_element Value::Array branch)
377        let el = json!({
378            "root": [ { "a": "1" }, { "b": "2" } ]
379        });
380        let out = build_xml_string(&el);
381        assert!(out.contains("<root>"));
382        assert!(out.contains("<a>1</a>"));
383        assert!(out.contains("<b>2</b>"));
384        assert!(out.contains("</root>"));
385    }
386
387    #[test]
388    fn build_xml_string_root_value_primitive() {
389        // Root value is primitive (write_element _ branch for top-level content)
390        let el = json!({ "root": 42 });
391        let out = build_xml_string(&el);
392        assert!(out.contains("<root>42</root>"));
393    }
394
395    #[test]
396    fn build_xml_string_attribute_value_object_uses_serde_fallback() {
397        // Attribute value that is Object hits value_to_string _ branch (serde_json::to_string)
398        let el = json!({
399            "root": { "@complex": { "nested": true }, "child": "v" }
400        });
401        let out = build_xml_string(&el);
402        assert!(out.contains("child"));
403        assert!(out.contains("v"));
404    }
405}