Skip to main content

xml_disassembler/builders/
build_disassembled_files.rs

1//! Build disassembled files from source XML file.
2
3use crate::builders::{build_disassembled_file, extract_root_attributes};
4use crate::parsers::{
5    extract_xml_declaration_from_raw, extract_xmlns_from_raw, parse_element_unified,
6};
7use crate::types::{BuildDisassembledFilesOptions, XmlElementArrayMap, XmlElementParams};
8use serde_json::{Map, Value};
9use tokio::fs;
10
11const BATCH_SIZE: usize = 20;
12
13fn get_root_info(parsed_xml: &Value) -> Option<(String, Value, Option<Value>)> {
14    let obj = parsed_xml.as_object()?;
15    let xml_declaration = obj.get("?xml").cloned();
16    let root_element_name = obj.keys().find(|k| *k != "?xml")?.clone();
17    let root_element = obj.get(&root_element_name)?.clone();
18    Some((root_element_name, root_element, xml_declaration))
19}
20
21fn order_xml_element_keys(content: &Map<String, Value>, key_order: &[String]) -> Value {
22    let mut ordered = Map::new();
23    for key in key_order {
24        if let Some(v) = content.get(key) {
25            ordered.insert(key.clone(), v.clone());
26        }
27    }
28    Value::Object(ordered)
29}
30
31#[allow(clippy::too_many_arguments)]
32async fn disassemble_element_keys(
33    root_element: &Value,
34    key_order: &[String],
35    disassembled_path: &str,
36    root_element_name: &str,
37    root_attributes: &Value,
38    xml_declaration: Option<&Value>,
39    unique_id_elements: Option<&str>,
40    strategy: &str,
41    format: &str,
42) -> (Map<String, Value>, XmlElementArrayMap, usize, bool) {
43    let mut leaf_content = Map::new();
44    let mut nested_groups = XmlElementArrayMap::new();
45    let mut leaf_count = 0usize;
46    let mut has_nested_elements = false;
47
48    let empty_map = Map::new();
49    let root_obj = root_element.as_object().unwrap_or(&empty_map);
50
51    for key in key_order {
52        let elements = if let Some(val) = root_obj.get(key) {
53            if val.is_array() {
54                val.as_array().unwrap().clone()
55            } else {
56                vec![val.clone()]
57            }
58        } else {
59            continue;
60        };
61
62        for chunk in elements.chunks(BATCH_SIZE) {
63            for element in chunk {
64                let result = parse_element_unified(XmlElementParams {
65                    element: element.clone(),
66                    disassembled_path,
67                    unique_id_elements,
68                    root_element_name,
69                    root_attributes: root_attributes.clone(),
70                    key,
71                    leaf_content: Value::Object(Map::new()),
72                    leaf_count,
73                    has_nested_elements,
74                    format,
75                    xml_declaration: xml_declaration.cloned(),
76                    strategy,
77                })
78                .await;
79
80                if let Some(obj) = result.leaf_content.as_object() {
81                    if let Some(arr) = obj.get(key) {
82                        if let Some(existing) = leaf_content.get_mut(key) {
83                            if let Some(existing_arr) = existing.as_array_mut() {
84                                if let Some(new_arr) = arr.as_array() {
85                                    existing_arr.extend(new_arr.iter().cloned());
86                                }
87                            }
88                        } else {
89                            leaf_content.insert(key.clone(), arr.clone());
90                        }
91                    }
92                }
93
94                if strategy == "grouped-by-tag" {
95                    if let Some(groups) = result.nested_groups {
96                        for (tag, arr) in groups {
97                            nested_groups.entry(tag).or_default().extend(arr);
98                        }
99                    }
100                }
101
102                leaf_count = result.leaf_count;
103                has_nested_elements = result.has_nested_elements;
104            }
105        }
106    }
107
108    (leaf_content, nested_groups, leaf_count, has_nested_elements)
109}
110
111async fn write_nested_groups(
112    nested_groups: &XmlElementArrayMap,
113    strategy: &str,
114    options: &WriteNestedOptions<'_>,
115) {
116    if strategy != "grouped-by-tag" {
117        return;
118    }
119    for (tag, arr) in nested_groups {
120        let _ = build_disassembled_file(crate::types::BuildDisassembledFileOptions {
121            content: Value::Array(arr.clone()),
122            disassembled_path: options.disassembled_path,
123            output_file_name: Some(&format!("{}.{}", tag, options.format)),
124            subdirectory: None,
125            wrap_key: Some(tag),
126            is_grouped_array: true,
127            root_element_name: options.root_element_name,
128            root_attributes: options.root_attributes.clone(),
129            format: options.format,
130            xml_declaration: options.xml_declaration.clone(),
131            unique_id_elements: None,
132        })
133        .await;
134    }
135}
136
137struct WriteNestedOptions<'a> {
138    disassembled_path: &'a str,
139    root_element_name: &'a str,
140    root_attributes: Value,
141    xml_declaration: Option<Value>,
142    format: &'a str,
143}
144
145pub async fn build_disassembled_files_unified(
146    options: BuildDisassembledFilesOptions<'_>,
147) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
148    let BuildDisassembledFilesOptions {
149        file_path,
150        disassembled_path,
151        base_name,
152        post_purge,
153        format,
154        unique_id_elements,
155        strategy,
156    } = options;
157
158    let xml_content = match fs::read_to_string(file_path).await {
159        Ok(c) => c,
160        Err(_) => return Ok(()),
161    };
162
163    let parsed_xml = match crate::parsers::parse_xml_from_str(&xml_content, file_path) {
164        Some(p) => p,
165        None => return Ok(()),
166    };
167
168    let (root_element_name, root_element, xml_declaration_from_parse) =
169        match get_root_info(&parsed_xml) {
170            Some(info) => info,
171            None => return Ok(()),
172        };
173    // quickxml_to_serde drops the declaration - extract from raw XML if missing
174    let xml_declaration =
175        xml_declaration_from_parse.or_else(|| extract_xml_declaration_from_raw(&xml_content));
176
177    let mut root_attributes = extract_root_attributes(&root_element);
178    // quickxml_to_serde drops xmlns - extract from raw XML and add if missing
179    if root_attributes.get("@xmlns").is_none() {
180        if let Some(xmlns) = extract_xmlns_from_raw(&xml_content) {
181            if let Some(obj) = root_attributes.as_object_mut() {
182                obj.insert("@xmlns".to_string(), Value::String(xmlns));
183            }
184        }
185    }
186    let key_order: Vec<String> = root_element
187        .as_object()
188        .map(|o| o.keys().filter(|k| !k.starts_with('@')).cloned().collect())
189        .unwrap_or_default();
190
191    let (leaf_content, nested_groups, leaf_count, has_nested_elements) = disassemble_element_keys(
192        &root_element,
193        &key_order,
194        disassembled_path,
195        &root_element_name,
196        &root_attributes,
197        xml_declaration.as_ref(),
198        unique_id_elements,
199        strategy,
200        format,
201    )
202    .await;
203
204    if !has_nested_elements && leaf_count > 0 {
205        log::error!(
206            "The XML file {} only has leaf elements. This file will not be disassembled.",
207            file_path
208        );
209        return Ok(());
210    }
211
212    let write_opts = WriteNestedOptions {
213        disassembled_path,
214        root_element_name: &root_element_name,
215        root_attributes: root_attributes.clone(),
216        xml_declaration: xml_declaration.clone(),
217        format,
218    };
219    write_nested_groups(&nested_groups, strategy, &write_opts).await;
220
221    // Persist root key order so reassembly can match original document order.
222    let key_order_path = std::path::Path::new(disassembled_path).join(".key_order.json");
223    if let Ok(json) = serde_json::to_string(&key_order) {
224        let _ = fs::write(key_order_path, json).await;
225    }
226
227    if leaf_count > 0 {
228        let final_leaf_content = if strategy == "grouped-by-tag" {
229            order_xml_element_keys(&leaf_content, &key_order)
230        } else {
231            Value::Object(leaf_content.clone())
232        };
233
234        let _ = build_disassembled_file(crate::types::BuildDisassembledFileOptions {
235            content: final_leaf_content,
236            disassembled_path,
237            output_file_name: Some(&format!("{}.{}", base_name, format)),
238            subdirectory: None,
239            wrap_key: None,
240            is_grouped_array: false,
241            root_element_name: &root_element_name,
242            root_attributes: root_attributes.clone(),
243            format,
244            xml_declaration: xml_declaration.clone(),
245            unique_id_elements: None,
246        })
247        .await;
248    }
249
250    if post_purge {
251        let _ = fs::remove_file(file_path).await;
252    }
253
254    Ok(())
255}