1use crate::builders::{build_disassembled_file, extract_root_attributes};
4use crate::parsers::{extract_xml_declaration_from_raw, parse_element_unified};
5use crate::types::{
6 BuildDisassembledFilesOptions, DecomposeRule, XmlElementArrayMap, XmlElementParams,
7};
8use crate::utils::normalize_path_unix;
9use serde_json::{Map, Value};
10use std::collections::HashMap;
11use tokio::fs;
12
13const BATCH_SIZE: usize = 20;
14
15fn get_root_info(parsed_xml: &Value) -> Option<(String, Value)> {
16 let obj = parsed_xml.as_object()?;
17 let root_element_name = obj.keys().find(|k| *k != "?xml")?.clone();
18 let root_element = obj.get(&root_element_name)?.clone();
19 Some((root_element_name, root_element))
20}
21
22fn order_xml_element_keys(content: &Map<String, Value>, key_order: &[String]) -> Value {
23 let mut ordered = Map::new();
24 for key in key_order {
25 if let Some(v) = content.get(key) {
26 ordered.insert(key.clone(), v.clone());
27 }
28 }
29 Value::Object(ordered)
30}
31
32#[allow(clippy::too_many_arguments)]
33async fn disassemble_element_keys(
34 root_element: &Value,
35 key_order: &[String],
36 disassembled_path: &str,
37 root_element_name: &str,
38 root_attributes: &Value,
39 xml_declaration: Option<&Value>,
40 unique_id_elements: Option<&str>,
41 strategy: &str,
42 format: &str,
43) -> (Map<String, Value>, XmlElementArrayMap, usize, bool) {
44 let mut leaf_content = Map::new();
45 let mut nested_groups = XmlElementArrayMap::new();
46 let mut leaf_count = 0usize;
47 let mut has_nested_elements = false;
48
49 let empty_map = Map::new();
50 let root_obj = root_element.as_object().unwrap_or(&empty_map);
51
52 let ordered: Vec<(&String, &Value)> = key_order
55 .iter()
56 .filter_map(|k| root_obj.get_key_value(k))
57 .collect();
58 for (key, val) in ordered {
59 let elements: Vec<Value> = match val.as_array() {
60 Some(arr) => arr.clone(),
61 None => vec![val.clone()],
62 };
63
64 for chunk in elements.chunks(BATCH_SIZE) {
65 for element in chunk {
66 let result = parse_element_unified(XmlElementParams {
67 element: element.clone(),
68 disassembled_path,
69 unique_id_elements,
70 root_element_name,
71 root_attributes: root_attributes.clone(),
72 key,
73 leaf_content: Value::Object(Map::new()),
74 leaf_count,
75 has_nested_elements,
76 format,
77 xml_declaration: xml_declaration.cloned(),
78 strategy,
79 })
80 .await;
81
82 if let Some(arr) = result.leaf_content.as_object().and_then(|o| o.get(key)) {
83 match leaf_content.get_mut(key).and_then(|v| v.as_array_mut()) {
84 Some(existing_arr) => {
85 if let Some(new_arr) = arr.as_array() {
86 existing_arr.extend(new_arr.iter().cloned());
87 }
88 }
89 None => {
90 leaf_content.insert(key.clone(), arr.clone());
91 }
92 }
93 }
94
95 if strategy == "grouped-by-tag" {
96 if let Some(groups) = result.nested_groups {
97 for (tag, arr) in groups {
98 nested_groups.entry(tag).or_default().extend(arr);
99 }
100 }
101 }
102
103 leaf_count = result.leaf_count;
104 has_nested_elements = result.has_nested_elements;
105 }
106 }
107 }
108
109 (leaf_content, nested_groups, leaf_count, has_nested_elements)
110}
111
112fn get_field_value(element: &Value, field: &str) -> Option<String> {
114 let v = element.as_object()?.get(field)?;
115 if let Some(s) = v.as_str() {
116 return Some(s.to_string());
117 }
118 v.as_object()
119 .and_then(|child| child.get("#text"))
120 .and_then(|t| t.as_str())
121 .map(|s| s.to_string())
122}
123
124fn group_key_from_field_value(s: &str) -> &str {
126 s.find('.').map(|i| &s[..i]).unwrap_or(s)
127}
128
129fn sanitize_filename(s: &str) -> String {
131 s.chars()
132 .map(|c| {
133 if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
134 c
135 } else {
136 '_'
137 }
138 })
139 .collect()
140}
141
142async fn write_nested_groups(
143 nested_groups: &XmlElementArrayMap,
144 strategy: &str,
145 options: &WriteNestedOptions<'_>,
146) {
147 if strategy != "grouped-by-tag" {
148 return;
149 }
150 let decompose_by_tag: HashMap<&str, &DecomposeRule> = options
151 .decompose_rules
152 .map(|rules| rules.iter().map(|r| (r.tag.as_str(), r)).collect())
153 .unwrap_or_default();
154
155 for (tag, arr) in nested_groups {
156 let rule = decompose_by_tag.get(tag.as_str());
157 let path_segment = rule
158 .map(|r| {
159 if r.path_segment.is_empty() {
160 &r.tag
161 } else {
162 &r.path_segment
163 }
164 })
165 .unwrap_or(tag);
166
167 if let Some(r) = rule {
168 if r.mode == "split" {
169 for (idx, item) in arr.iter().enumerate() {
170 let name = get_field_value(item, &r.field)
171 .as_deref()
172 .map(sanitize_filename)
173 .filter(|s: &String| !s.is_empty())
174 .unwrap_or_else(|| idx.to_string());
175 let file_name = format!("{}.{}-meta.{}", name, tag, options.format);
176 let _ = build_disassembled_file(crate::types::BuildDisassembledFileOptions {
177 content: item.clone(),
178 disassembled_path: options.disassembled_path,
179 output_file_name: Some(&file_name),
180 subdirectory: Some(path_segment),
181 wrap_key: Some(tag),
182 is_grouped_array: false,
183 root_element_name: options.root_element_name,
184 root_attributes: options.root_attributes.clone(),
185 format: options.format,
186 xml_declaration: options.xml_declaration.clone(),
187 unique_id_elements: None,
188 })
189 .await;
190 }
191 } else if r.mode == "group" {
192 let mut by_key: HashMap<String, Vec<Value>> = HashMap::new();
193 for item in arr {
194 let key = get_field_value(item, &r.field)
195 .as_deref()
196 .map(group_key_from_field_value)
197 .map(sanitize_filename)
198 .filter(|s: &String| !s.is_empty())
199 .unwrap_or_else(|| "unknown".to_string());
200 by_key.entry(key).or_default().push(item.clone());
201 }
202 let mut sorted_keys: Vec<_> = by_key.keys().cloned().collect();
204 sorted_keys.sort();
205 for key in sorted_keys {
206 let group = by_key.remove(&key).unwrap();
207 let file_name = format!("{}.{}-meta.{}", key, tag, options.format);
208 let _ = build_disassembled_file(crate::types::BuildDisassembledFileOptions {
209 content: Value::Array(group),
210 disassembled_path: options.disassembled_path,
211 output_file_name: Some(&file_name),
212 subdirectory: Some(path_segment),
213 wrap_key: Some(tag),
214 is_grouped_array: true,
215 root_element_name: options.root_element_name,
216 root_attributes: options.root_attributes.clone(),
217 format: options.format,
218 xml_declaration: options.xml_declaration.clone(),
219 unique_id_elements: None,
220 })
221 .await;
222 }
223 } else {
224 fallback_write_one_file(tag, arr, path_segment, options).await;
225 }
226 } else {
227 fallback_write_one_file(tag, arr, path_segment, options).await;
228 }
229 }
230}
231
232async fn fallback_write_one_file(
233 tag: &str,
234 arr: &[Value],
235 _path_segment: &str,
236 options: &WriteNestedOptions<'_>,
237) {
238 let _ = build_disassembled_file(crate::types::BuildDisassembledFileOptions {
239 content: Value::Array(arr.to_vec()),
240 disassembled_path: options.disassembled_path,
241 output_file_name: Some(&format!("{}.{}", tag, options.format)),
242 subdirectory: None,
243 wrap_key: Some(tag),
244 is_grouped_array: true,
245 root_element_name: options.root_element_name,
246 root_attributes: options.root_attributes.clone(),
247 format: options.format,
248 xml_declaration: options.xml_declaration.clone(),
249 unique_id_elements: None,
250 })
251 .await;
252}
253
254struct WriteNestedOptions<'a> {
255 disassembled_path: &'a str,
256 root_element_name: &'a str,
257 root_attributes: Value,
258 xml_declaration: Option<Value>,
259 format: &'a str,
260 decompose_rules: Option<&'a [DecomposeRule]>,
261}
262
263pub async fn build_disassembled_files_unified(
264 options: BuildDisassembledFilesOptions<'_>,
265) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
266 let BuildDisassembledFilesOptions {
267 file_path,
268 disassembled_path,
269 base_name,
270 post_purge,
271 format,
272 unique_id_elements,
273 strategy,
274 decompose_rules,
275 } = options;
276
277 let file_path = normalize_path_unix(file_path);
278
279 let xml_content = match fs::read_to_string(&file_path).await {
280 Ok(c) => c,
281 Err(_) => return Ok(()),
282 };
283
284 let parsed_xml = match crate::parsers::parse_xml_from_str(&xml_content, &file_path) {
285 Some(p) => p,
286 None => return Ok(()),
287 };
288
289 let (root_element_name, root_element) = match get_root_info(&parsed_xml) {
290 Some(info) => info,
291 None => return Ok(()),
292 };
293 let xml_declaration = extract_xml_declaration_from_raw(&xml_content);
295
296 let root_attributes = extract_root_attributes(&root_element);
297 let key_order: Vec<String> = root_element
298 .as_object()
299 .map(|o| o.keys().filter(|k| !k.starts_with('@')).cloned().collect())
300 .unwrap_or_default();
301
302 let (leaf_content, nested_groups, leaf_count, has_nested_elements) = disassemble_element_keys(
303 &root_element,
304 &key_order,
305 disassembled_path,
306 &root_element_name,
307 &root_attributes,
308 xml_declaration.as_ref(),
309 unique_id_elements,
310 strategy,
311 format,
312 )
313 .await;
314
315 if !has_nested_elements && leaf_count > 0 {
316 log::error!(
317 "The XML file {} only has leaf elements. This file will not be disassembled.",
318 &file_path
319 );
320 return Ok(());
321 }
322
323 let write_opts = WriteNestedOptions {
324 disassembled_path,
325 root_element_name: &root_element_name,
326 root_attributes: root_attributes.clone(),
327 xml_declaration: xml_declaration.clone(),
328 format,
329 decompose_rules,
330 };
331 write_nested_groups(&nested_groups, strategy, &write_opts).await;
332
333 let key_order_path = std::path::Path::new(disassembled_path).join(".key_order.json");
336 let json = serde_json::to_string(&key_order).unwrap_or_else(|_| "[]".to_string());
337 let _ = fs::write(key_order_path, json).await;
338
339 if leaf_count > 0 {
340 let final_leaf_content = if strategy == "grouped-by-tag" {
341 order_xml_element_keys(&leaf_content, &key_order)
342 } else {
343 Value::Object(leaf_content.clone())
344 };
345
346 let _ = build_disassembled_file(crate::types::BuildDisassembledFileOptions {
347 content: final_leaf_content,
348 disassembled_path,
349 output_file_name: Some(&format!("{}.{}", base_name, format)),
350 subdirectory: None,
351 wrap_key: None,
352 is_grouped_array: false,
353 root_element_name: &root_element_name,
354 root_attributes: root_attributes.clone(),
355 format,
356 xml_declaration: xml_declaration.clone(),
357 unique_id_elements: None,
358 })
359 .await;
360 }
361
362 if post_purge {
363 let _ = fs::remove_file(&file_path).await;
365 }
366
367 Ok(())
368}
369
370#[cfg(test)]
371mod tests {
372 use super::*;
373 use serde_json::json;
374
375 #[test]
376 fn get_field_value_returns_direct_string() {
377 let el = json!({ "field": "value" });
378 assert_eq!(get_field_value(&el, "field"), Some("value".to_string()));
379 }
380
381 #[test]
382 fn get_field_value_returns_nested_text() {
383 let el = json!({ "field": { "#text": "value" } });
384 assert_eq!(get_field_value(&el, "field"), Some("value".to_string()));
385 }
386
387 #[test]
388 fn get_field_value_returns_none_when_missing_or_non_string() {
389 let el = json!({ "field": { "nested": { "#text": "x" } } });
390 assert!(get_field_value(&el, "field").is_none());
391 assert!(get_field_value(&el, "missing").is_none());
392 let el = json!("not-an-object");
393 assert!(get_field_value(&el, "field").is_none());
394 }
395
396 #[test]
397 fn group_key_from_field_value_takes_prefix_before_dot() {
398 assert_eq!(group_key_from_field_value("Account.Name"), "Account");
399 assert_eq!(group_key_from_field_value("NoDot"), "NoDot");
400 }
401
402 #[test]
403 fn sanitize_filename_replaces_disallowed_chars_with_underscore() {
404 assert_eq!(sanitize_filename("a/b c:d"), "a_b_c_d");
405 assert_eq!(sanitize_filename("ok-name_1.xml"), "ok-name_1.xml");
406 }
407
408 #[test]
409 fn order_xml_element_keys_preserves_order_and_drops_absent() {
410 let mut m = Map::new();
411 m.insert("b".to_string(), json!(2));
412 m.insert("a".to_string(), json!(1));
413 let ordered =
414 order_xml_element_keys(&m, &["a".to_string(), "c".to_string(), "b".to_string()]);
415 let obj = ordered.as_object().unwrap();
416 let keys: Vec<&String> = obj.keys().collect();
417 assert_eq!(keys, vec![&"a".to_string(), &"b".to_string()]);
418 }
419
420 #[test]
421 fn get_root_info_returns_name_and_element() {
422 let parsed = json!({ "?xml": {"@version": "1.0"}, "Root": { "child": 1 } });
423 let (name, element) = get_root_info(&parsed).unwrap();
424 assert_eq!(name, "Root");
425 assert!(element.as_object().unwrap().contains_key("child"));
426 }
427
428 #[test]
429 fn get_root_info_returns_none_for_non_object_or_decl_only() {
430 assert!(get_root_info(&json!("s")).is_none());
431 assert!(get_root_info(&json!({ "?xml": {} })).is_none());
432 }
433
434 #[tokio::test]
435 async fn unified_build_returns_ok_when_source_unreadable() {
436 let dir = tempfile::tempdir().unwrap();
438 let disassembled = dir.path().join("out");
439 let missing = dir.path().join("does_not_exist.xml");
440 build_disassembled_files_unified(BuildDisassembledFilesOptions {
441 file_path: missing.to_str().unwrap(),
442 disassembled_path: disassembled.to_str().unwrap(),
443 base_name: "does_not_exist",
444 post_purge: false,
445 format: "xml",
446 unique_id_elements: None,
447 strategy: "unique-id",
448 decompose_rules: None,
449 })
450 .await
451 .unwrap();
452 assert!(!disassembled.exists());
453 }
454}