xml_disassembler/builders/
build_disassembled_files.rs1use crate::builders::{build_disassembled_file, extract_root_attributes};
4use crate::parsers::{
5 extract_xml_declaration_from_raw, extract_xmlns_from_raw, parse_element_unified,
6};
7use crate::types::{BuildDisassembledFilesOptions, XmlElementArrayMap, XmlElementParams};
8use serde_json::{Map, Value};
9use tokio::fs;
10
11const BATCH_SIZE: usize = 20;
12
13fn get_root_info(parsed_xml: &Value) -> Option<(String, Value, Option<Value>)> {
14 let obj = parsed_xml.as_object()?;
15 let xml_declaration = obj.get("?xml").cloned();
16 let root_element_name = obj.keys().find(|k| *k != "?xml")?.clone();
17 let root_element = obj.get(&root_element_name)?.clone();
18 Some((root_element_name, root_element, xml_declaration))
19}
20
21fn order_xml_element_keys(content: &Map<String, Value>, key_order: &[String]) -> Value {
22 let mut ordered = Map::new();
23 for key in key_order {
24 if let Some(v) = content.get(key) {
25 ordered.insert(key.clone(), v.clone());
26 }
27 }
28 Value::Object(ordered)
29}
30
31#[allow(clippy::too_many_arguments)]
32async fn disassemble_element_keys(
33 root_element: &Value,
34 key_order: &[String],
35 disassembled_path: &str,
36 root_element_name: &str,
37 root_attributes: &Value,
38 xml_declaration: Option<&Value>,
39 unique_id_elements: Option<&str>,
40 strategy: &str,
41 format: &str,
42) -> (Map<String, Value>, XmlElementArrayMap, usize, bool) {
43 let mut leaf_content = Map::new();
44 let mut nested_groups = XmlElementArrayMap::new();
45 let mut leaf_count = 0usize;
46 let mut has_nested_elements = false;
47
48 let empty_map = Map::new();
49 let root_obj = root_element.as_object().unwrap_or(&empty_map);
50
51 for key in key_order {
52 let elements = if let Some(val) = root_obj.get(key) {
53 if val.is_array() {
54 val.as_array().unwrap().clone()
55 } else {
56 vec![val.clone()]
57 }
58 } else {
59 continue;
60 };
61
62 for chunk in elements.chunks(BATCH_SIZE) {
63 for element in chunk {
64 let result = parse_element_unified(XmlElementParams {
65 element: element.clone(),
66 disassembled_path,
67 unique_id_elements,
68 root_element_name,
69 root_attributes: root_attributes.clone(),
70 key,
71 leaf_content: Value::Object(Map::new()),
72 leaf_count,
73 has_nested_elements,
74 format,
75 xml_declaration: xml_declaration.cloned(),
76 strategy,
77 })
78 .await;
79
80 if let Some(obj) = result.leaf_content.as_object() {
81 if let Some(arr) = obj.get(key) {
82 if let Some(existing) = leaf_content.get_mut(key) {
83 if let Some(existing_arr) = existing.as_array_mut() {
84 if let Some(new_arr) = arr.as_array() {
85 existing_arr.extend(new_arr.iter().cloned());
86 }
87 }
88 } else {
89 leaf_content.insert(key.clone(), arr.clone());
90 }
91 }
92 }
93
94 if strategy == "grouped-by-tag" {
95 if let Some(groups) = result.nested_groups {
96 for (tag, arr) in groups {
97 nested_groups.entry(tag).or_default().extend(arr);
98 }
99 }
100 }
101
102 leaf_count = result.leaf_count;
103 has_nested_elements = result.has_nested_elements;
104 }
105 }
106 }
107
108 (leaf_content, nested_groups, leaf_count, has_nested_elements)
109}
110
111async fn write_nested_groups(
112 nested_groups: &XmlElementArrayMap,
113 strategy: &str,
114 options: &WriteNestedOptions<'_>,
115) {
116 if strategy != "grouped-by-tag" {
117 return;
118 }
119 for (tag, arr) in nested_groups {
120 let _ = build_disassembled_file(crate::types::BuildDisassembledFileOptions {
121 content: Value::Array(arr.clone()),
122 disassembled_path: options.disassembled_path,
123 output_file_name: Some(&format!("{}.{}", tag, options.format)),
124 subdirectory: None,
125 wrap_key: Some(tag),
126 is_grouped_array: true,
127 root_element_name: options.root_element_name,
128 root_attributes: options.root_attributes.clone(),
129 format: options.format,
130 xml_declaration: options.xml_declaration.clone(),
131 unique_id_elements: None,
132 })
133 .await;
134 }
135}
136
137struct WriteNestedOptions<'a> {
138 disassembled_path: &'a str,
139 root_element_name: &'a str,
140 root_attributes: Value,
141 xml_declaration: Option<Value>,
142 format: &'a str,
143}
144
145pub async fn build_disassembled_files_unified(
146 options: BuildDisassembledFilesOptions<'_>,
147) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
148 let BuildDisassembledFilesOptions {
149 file_path,
150 disassembled_path,
151 base_name,
152 post_purge,
153 format,
154 unique_id_elements,
155 strategy,
156 } = options;
157
158 let xml_content = match fs::read_to_string(file_path).await {
159 Ok(c) => c,
160 Err(_) => return Ok(()),
161 };
162
163 let parsed_xml = match crate::parsers::parse_xml_from_str(&xml_content, file_path) {
164 Some(p) => p,
165 None => return Ok(()),
166 };
167
168 let (root_element_name, root_element, xml_declaration_from_parse) =
169 match get_root_info(&parsed_xml) {
170 Some(info) => info,
171 None => return Ok(()),
172 };
173 let xml_declaration =
175 xml_declaration_from_parse.or_else(|| extract_xml_declaration_from_raw(&xml_content));
176
177 let mut root_attributes = extract_root_attributes(&root_element);
178 if root_attributes.get("@xmlns").is_none() {
180 if let Some(xmlns) = extract_xmlns_from_raw(&xml_content) {
181 if let Some(obj) = root_attributes.as_object_mut() {
182 obj.insert("@xmlns".to_string(), Value::String(xmlns));
183 }
184 }
185 }
186 let key_order: Vec<String> = root_element
187 .as_object()
188 .map(|o| o.keys().filter(|k| !k.starts_with('@')).cloned().collect())
189 .unwrap_or_default();
190
191 let (leaf_content, nested_groups, leaf_count, has_nested_elements) = disassemble_element_keys(
192 &root_element,
193 &key_order,
194 disassembled_path,
195 &root_element_name,
196 &root_attributes,
197 xml_declaration.as_ref(),
198 unique_id_elements,
199 strategy,
200 format,
201 )
202 .await;
203
204 if !has_nested_elements && leaf_count > 0 {
205 log::error!(
206 "The XML file {} only has leaf elements. This file will not be disassembled.",
207 file_path
208 );
209 return Ok(());
210 }
211
212 let write_opts = WriteNestedOptions {
213 disassembled_path,
214 root_element_name: &root_element_name,
215 root_attributes: root_attributes.clone(),
216 xml_declaration: xml_declaration.clone(),
217 format,
218 };
219 write_nested_groups(&nested_groups, strategy, &write_opts).await;
220
221 let key_order_path = std::path::Path::new(disassembled_path).join(".key_order.json");
223 if let Ok(json) = serde_json::to_string(&key_order) {
224 let _ = fs::write(key_order_path, json).await;
225 }
226
227 if leaf_count > 0 {
228 let final_leaf_content = if strategy == "grouped-by-tag" {
229 order_xml_element_keys(&leaf_content, &key_order)
230 } else {
231 Value::Object(leaf_content.clone())
232 };
233
234 let _ = build_disassembled_file(crate::types::BuildDisassembledFileOptions {
235 content: final_leaf_content,
236 disassembled_path,
237 output_file_name: Some(&format!("{}.{}", base_name, format)),
238 subdirectory: None,
239 wrap_key: None,
240 is_grouped_array: false,
241 root_element_name: &root_element_name,
242 root_attributes: root_attributes.clone(),
243 format,
244 xml_declaration: xml_declaration.clone(),
245 unique_id_elements: None,
246 })
247 .await;
248 }
249
250 if post_purge {
251 let _ = fs::remove_file(file_path).await;
252 }
253
254 Ok(())
255}