Skip to main content

config_disassembler/xml/
multi_level.rs

1//! Multi-level disassembly: strip a root element and re-disassemble with different unique-id elements.
2
3use serde_json::{Map, Value};
4
5use crate::xml::builders::build_xml_string;
6use crate::xml::types::{MultiLevelConfig, XmlElement};
7
8/// Strip the given element and build a new XML string.
9/// - If it is the root element: its inner content becomes the new document (with ?xml preserved).
10/// - If it is a child of the root (e.g. programProcesses under LoyaltyProgramSetup): unwrap it so
11///   its inner content becomes the direct children of the root; the root element is kept.
12pub fn strip_root_and_build_xml(parsed: &XmlElement, element_to_strip: &str) -> Option<String> {
13    let obj = parsed.as_object()?;
14    let root_key = obj.keys().find(|k| *k != "?xml")?.clone();
15    let root_val = obj.get(&root_key)?.as_object()?;
16    let decl = obj.get("?xml").cloned().unwrap_or_else(|| {
17        let mut d = Map::new();
18        d.insert("@version".to_string(), Value::String("1.0".to_string()));
19        d.insert("@encoding".to_string(), Value::String("UTF-8".to_string()));
20        Value::Object(d)
21    });
22
23    if root_key == element_to_strip {
24        // Strip the root: new doc = ?xml + inner content of root (element keys only, not @attributes)
25        let mut new_obj = Map::new();
26        new_obj.insert("?xml".to_string(), decl);
27        for (k, v) in root_val {
28            if !k.starts_with('@') {
29                new_obj.insert(k.clone(), v.clone());
30            }
31        }
32        return Some(build_xml_string(&Value::Object(new_obj)));
33    }
34
35    // Strip a child of the root: unwrap it so its inner content becomes direct children of the root
36    let inner = root_val.get(element_to_strip)?.as_object()?;
37    let mut new_root_val = Map::new();
38    for (k, v) in root_val {
39        if k != element_to_strip {
40            new_root_val.insert(k.clone(), v.clone());
41        }
42    }
43    for (k, v) in inner {
44        new_root_val.insert(k.clone(), v.clone());
45    }
46    let mut new_obj = Map::new();
47    new_obj.insert("?xml".to_string(), decl);
48    new_obj.insert(root_key, Value::Object(new_root_val));
49    Some(build_xml_string(&Value::Object(new_obj)))
50}
51
52/// Capture xmlns from the root element (e.g. LoyaltyProgramSetup) for later wrap.
53pub fn capture_xmlns_from_root(parsed: &XmlElement) -> Option<String> {
54    let obj = parsed.as_object()?;
55    let root_key = obj.keys().find(|k| *k != "?xml")?.clone();
56    let root_val = obj.get(&root_key)?.as_object()?;
57    let xmlns = root_val.get("@xmlns")?.as_str()?;
58    Some(xmlns.to_string())
59}
60
61/// Derive path_segment from file_pattern (e.g. "programProcesses-meta" -> "programProcesses").
62pub fn path_segment_from_file_pattern(file_pattern: &str) -> String {
63    // `split('-').next()` always returns `Some(_)` for any string - even an empty one -
64    // so falling back to the original `file_pattern` is unreachable.
65    file_pattern
66        .split('-')
67        .next()
68        .unwrap_or(file_pattern)
69        .to_string()
70}
71
72/// Load multi-level config from a directory (reads .multi_level.json).
73pub async fn load_multi_level_config(dir_path: &std::path::Path) -> Option<MultiLevelConfig> {
74    let path = dir_path.join(".multi_level.json");
75    let content = tokio::fs::read_to_string(&path).await.ok()?;
76    serde_json::from_str(&content).ok()
77}
78
79/// Persist multi-level config to a directory.
80pub async fn save_multi_level_config(
81    dir_path: &std::path::Path,
82    config: &MultiLevelConfig,
83) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
84    let path = dir_path.join(".multi_level.json");
85    let content = serde_json::to_string_pretty(config)?;
86    tokio::fs::write(path, content).await?;
87    Ok(())
88}
89
90/// True when the root element's only non-attribute child has the
91/// inner-wrapper name we're looking for. Pure helper extracted from
92/// `ensure_segment_files_structure` so the
93/// `non_attr_keys.len() == 1 && non_attr_keys[0] == inner_wrapper`
94/// conjunction can be exercised in isolation.
95fn has_single_inner_wrapper(
96    root_val: &serde_json::Map<String, serde_json::Value>,
97    inner_wrapper: &str,
98) -> bool {
99    let non_attr_keys: Vec<&String> = root_val.keys().filter(|k| *k != "@xmlns").collect();
100    non_attr_keys.len() == 1 && non_attr_keys[0].as_str() == inner_wrapper
101}
102
103/// True when an already-disassembled segment file is shaped as
104/// `<document_root>…<inner_wrapper>X</inner_wrapper></document_root>`
105/// and we should unwrap the inner content (`X`) before re-wrapping
106/// with a fresh xmlns. The else branch in
107/// `ensure_segment_files_structure` keeps the existing root_val
108/// intact, which produces the *double-wrapped* output
109/// `<document_root>…<inner_wrapper><inner_wrapper>X</inner_wrapper>…</inner_wrapper></document_root>`
110/// — never what we want for a "thin" wrapper file.
111fn should_unwrap_inner_segment(
112    current_root_key: &str,
113    document_root: &str,
114    single_inner: bool,
115) -> bool {
116    current_root_key == document_root && single_inner
117}
118
119/// Ensure all XML files in a segment directory have structure:
120/// document_root (with xmlns) > inner_wrapper (no xmlns) > content.
121/// Used after inner-level reassembly for multi-level (e.g. LoyaltyProgramSetup > programProcesses).
122pub async fn ensure_segment_files_structure(
123    dir_path: &std::path::Path,
124    document_root: &str,
125    inner_wrapper: &str,
126    xmlns: &str,
127) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
128    use crate::xml::parsers::parse_xml_from_str;
129    use serde_json::Map;
130
131    let mut entries = Vec::new();
132    let mut read_dir = tokio::fs::read_dir(dir_path).await?;
133    while let Some(entry) = read_dir.next_entry().await? {
134        entries.push(entry);
135    }
136    // Sort for deterministic cross-platform ordering
137    entries.sort_by_key(|e| e.file_name());
138
139    for entry in entries {
140        let path = entry.path();
141        if !path.is_file() {
142            continue;
143        }
144        let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
145        if !name.ends_with(".xml") {
146            continue;
147        }
148        let path_str = path.to_string_lossy();
149        // Read errors on a file the walker just reported as present are essentially impossible
150        // (concurrent deletion); treat the content as empty so downstream lookups skip naturally.
151        let content = tokio::fs::read_to_string(&path).await.unwrap_or_default();
152        let Some(parsed) = parse_xml_from_str(&content, &path_str) else {
153            continue;
154        };
155        // parse_xml_from_str always yields a JSON object when it returns Some; fall back to an
156        // empty map for any unexpected shape so subsequent lookups simply produce None.
157        let obj = parsed.as_object().cloned().unwrap_or_default();
158        let Some(current_root_key) = obj.keys().find(|k| *k != "?xml").cloned() else {
159            continue;
160        };
161        let root_val = obj
162            .get(&current_root_key)
163            .and_then(|v| v.as_object())
164            .cloned()
165            .unwrap_or_default();
166
167        let decl = obj.get("?xml").cloned().unwrap_or_else(|| {
168            let mut d = Map::new();
169            d.insert(
170                "@version".to_string(),
171                serde_json::Value::String("1.0".to_string()),
172            );
173            d.insert(
174                "@encoding".to_string(),
175                serde_json::Value::String("UTF-8".to_string()),
176            );
177            serde_json::Value::Object(d)
178        });
179
180        let single_inner = has_single_inner_wrapper(&root_val, inner_wrapper);
181        let inner_content: serde_json::Value =
182            if should_unwrap_inner_segment(&current_root_key, document_root, single_inner) {
183                let inner_obj = root_val
184                    .get(inner_wrapper)
185                    .and_then(|v| v.as_object())
186                    .cloned()
187                    .unwrap_or_else(Map::new);
188                let mut inner_clean = Map::new();
189                for (k, v) in &inner_obj {
190                    if k != "@xmlns" {
191                        inner_clean.insert(k.clone(), v.clone());
192                    }
193                }
194                serde_json::Value::Object(inner_clean)
195            } else {
196                // The inner wrapper must not carry an `xmlns` attribute (only the document
197                // root keeps it). Strip it from the cloned content so nested-rule wrapping
198                // doesn't emit `<inner_wrapper xmlns="...">` siblings.
199                let mut inner_clean = Map::new();
200                for (k, v) in &root_val {
201                    if k != "@xmlns" {
202                        inner_clean.insert(k.clone(), v.clone());
203                    }
204                }
205                serde_json::Value::Object(inner_clean)
206            };
207
208        let already_correct = current_root_key == document_root
209            && root_val.get("@xmlns").is_some()
210            && single_inner
211            && root_val
212                .get(inner_wrapper)
213                .and_then(|v| v.as_object())
214                .map(|o| !o.contains_key("@xmlns"))
215                .unwrap_or(true);
216        if already_correct {
217            continue;
218        }
219
220        // Build document_root (with @xmlns only on root) > inner_wrapper (no xmlns) > content
221        let mut root_val_new = Map::new();
222        if !xmlns.is_empty() {
223            root_val_new.insert(
224                "@xmlns".to_string(),
225                serde_json::Value::String(xmlns.to_string()),
226            );
227        }
228        root_val_new.insert(inner_wrapper.to_string(), inner_content);
229
230        let mut top = Map::new();
231        top.insert("?xml".to_string(), decl);
232        top.insert(
233            document_root.to_string(),
234            serde_json::Value::Object(root_val_new),
235        );
236        let wrapped = serde_json::Value::Object(top);
237        let xml_string = build_xml_string(&wrapped);
238        tokio::fs::write(&path, xml_string).await?;
239    }
240    Ok(())
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246    use serde_json::json;
247
248    #[test]
249    fn path_segment_from_file_pattern_strips_suffix() {
250        assert_eq!(
251            path_segment_from_file_pattern("programProcesses-meta"),
252            "programProcesses"
253        );
254    }
255
256    #[test]
257    fn path_segment_from_file_pattern_no_dash() {
258        assert_eq!(path_segment_from_file_pattern("foo"), "foo");
259    }
260
261    #[test]
262    fn strip_root_and_build_xml_strips_child_not_root() {
263        let parsed = json!({
264            "?xml": { "@version": "1.0" },
265            "Root": {
266                "programProcesses": { "a": "1", "b": "2" },
267                "label": "x"
268            }
269        });
270        let out = strip_root_and_build_xml(&parsed, "programProcesses").unwrap();
271        assert!(out.contains("<Root>"));
272        assert!(out.contains("<a>1</a>"));
273        assert!(out.contains("<b>2</b>"));
274        assert!(out.contains("<label>x</label>"));
275    }
276
277    #[test]
278    fn strip_root_and_build_xml_strips_root_excludes_attributes() {
279        let parsed = json!({
280            "?xml": { "@version": "1.0" },
281            "LoyaltyProgramSetup": {
282                "@xmlns": "http://example.com",
283                "programProcesses": { "x": "1" }
284            }
285        });
286        let out = strip_root_and_build_xml(&parsed, "LoyaltyProgramSetup").unwrap();
287        assert!(!out.contains("@xmlns"));
288        assert!(out.contains("programProcesses"));
289    }
290
291    #[test]
292    fn capture_xmlns_from_root_returns_some() {
293        let parsed = json!({
294            "Root": { "@xmlns": "http://ns.example.com" }
295        });
296        assert_eq!(
297            capture_xmlns_from_root(&parsed),
298            Some("http://ns.example.com".to_string())
299        );
300    }
301
302    #[test]
303    fn capture_xmlns_from_root_returns_none_when_absent() {
304        let parsed = json!({ "Root": { "child": "x" } });
305        assert!(capture_xmlns_from_root(&parsed).is_none());
306    }
307
308    #[tokio::test]
309    async fn save_and_load_multi_level_config() {
310        let dir = tempfile::tempdir().unwrap();
311        let config = MultiLevelConfig {
312            rules: vec![crate::xml::types::MultiLevelRule {
313                file_pattern: "test-meta".to_string(),
314                root_to_strip: "Root".to_string(),
315                unique_id_elements: "id".to_string(),
316                path_segment: "test".to_string(),
317                wrap_root_element: "Root".to_string(),
318                wrap_xmlns: "http://example.com".to_string(),
319            }],
320        };
321        save_multi_level_config(dir.path(), &config).await.unwrap();
322        let loaded = load_multi_level_config(dir.path()).await.unwrap();
323        assert_eq!(loaded.rules.len(), 1);
324        assert_eq!(loaded.rules[0].path_segment, "test");
325    }
326
327    #[tokio::test]
328    async fn load_multi_level_config_missing_file_returns_none() {
329        let dir = tempfile::tempdir().unwrap();
330        assert!(load_multi_level_config(dir.path()).await.is_none());
331    }
332
333    #[tokio::test]
334    async fn ensure_segment_files_structure_adds_xmlns_and_rewrites() {
335        let dir = tempfile::tempdir().unwrap();
336        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
337<Root>
338  <programProcesses><x>1</x></programProcesses>
339</Root>"#;
340        let path = dir.path().join("segment.xml");
341        tokio::fs::write(&path, xml).await.unwrap();
342        ensure_segment_files_structure(
343            dir.path(),
344            "Root",
345            "programProcesses",
346            "http://example.com",
347        )
348        .await
349        .unwrap();
350        let out = tokio::fs::read_to_string(&path).await.unwrap();
351        assert!(out.contains("http://example.com"));
352        assert!(out.contains("<programProcesses>"));
353        assert!(out.contains("<x>1</x>"));
354    }
355
356    #[tokio::test]
357    async fn ensure_segment_files_structure_skips_already_correct_files() {
358        // Root wraps inner_wrapper and has xmlns; inner has no xmlns -> no rewrite.
359        let dir = tempfile::tempdir().unwrap();
360        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
361<Root xmlns="http://example.com"><programProcesses><x>1</x></programProcesses></Root>"#;
362        let path = dir.path().join("ok.xml");
363        tokio::fs::write(&path, xml).await.unwrap();
364        let before = tokio::fs::metadata(&path).await.unwrap().modified().ok();
365        ensure_segment_files_structure(
366            dir.path(),
367            "Root",
368            "programProcesses",
369            "http://example.com",
370        )
371        .await
372        .unwrap();
373        let after = tokio::fs::metadata(&path).await.unwrap().modified().ok();
374        assert_eq!(before, after, "already-correct files must be left as-is");
375    }
376
377    #[tokio::test]
378    async fn ensure_segment_files_structure_skips_non_xml_and_subdirs() {
379        let dir = tempfile::tempdir().unwrap();
380        tokio::fs::create_dir(dir.path().join("nested"))
381            .await
382            .unwrap();
383        tokio::fs::write(dir.path().join("notes.txt"), "hello")
384            .await
385            .unwrap();
386        tokio::fs::write(dir.path().join("broken.xml"), "<<not xml>")
387            .await
388            .unwrap();
389        // No XML payload that matches; should succeed without writing anything.
390        ensure_segment_files_structure(
391            dir.path(),
392            "Root",
393            "programProcesses",
394            "http://example.com",
395        )
396        .await
397        .unwrap();
398        // broken.xml remains unchanged
399        let raw = tokio::fs::read_to_string(dir.path().join("broken.xml"))
400            .await
401            .unwrap();
402        assert_eq!(raw, "<<not xml>");
403    }
404
405    #[tokio::test]
406    async fn ensure_segment_files_structure_skips_xml_missing_root() {
407        // Only a declaration, no root element (empty document)
408        let dir = tempfile::tempdir().unwrap();
409        tokio::fs::write(dir.path().join("empty.xml"), "")
410            .await
411            .unwrap();
412        ensure_segment_files_structure(dir.path(), "Root", "programProcesses", "")
413            .await
414            .unwrap();
415    }
416
417    fn map_from(pairs: &[(&str, serde_json::Value)]) -> serde_json::Map<String, serde_json::Value> {
418        let mut m = serde_json::Map::new();
419        for (k, v) in pairs {
420            m.insert((*k).to_string(), v.clone());
421        }
422        m
423    }
424
425    #[test]
426    fn has_single_inner_wrapper_true_for_single_matching_child() {
427        let m = map_from(&[("inner", json!({"a": 1}))]);
428        assert!(has_single_inner_wrapper(&m, "inner"));
429    }
430
431    #[test]
432    fn has_single_inner_wrapper_true_when_only_attribute_is_xmlns_sibling() {
433        // The `@xmlns` filter on `non_attr_keys` must be honoured so an
434        // xmlns-carrying root still counts as a "thin" wrapper when its
435        // single non-attribute child matches.
436        let m = map_from(&[
437            ("@xmlns", json!("http://example.com")),
438            ("inner", json!({"a": 1})),
439        ]);
440        assert!(has_single_inner_wrapper(&m, "inner"));
441    }
442
443    #[test]
444    fn has_single_inner_wrapper_false_when_multiple_non_attribute_children() {
445        let m = map_from(&[("inner", json!({})), ("other", json!({}))]);
446        assert!(!has_single_inner_wrapper(&m, "inner"));
447    }
448
449    #[test]
450    fn has_single_inner_wrapper_false_when_only_child_name_differs() {
451        let m = map_from(&[("notInner", json!({"a": 1}))]);
452        assert!(!has_single_inner_wrapper(&m, "inner"));
453    }
454
455    #[test]
456    fn has_single_inner_wrapper_false_when_empty() {
457        let m = serde_json::Map::new();
458        assert!(!has_single_inner_wrapper(&m, "inner"));
459    }
460
461    #[test]
462    fn should_unwrap_inner_segment_true_when_root_matches_and_single_inner() {
463        // Document root matches and the file already has the thin
464        // `<doc_root>…<inner_wrapper>…</inner_wrapper></doc_root>` shape.
465        // Returning true triggers the inner-content unwrap so we don't
466        // emit a double-wrapped file on the next write.
467        assert!(should_unwrap_inner_segment("Doc", "Doc", true));
468    }
469
470    #[test]
471    fn should_unwrap_inner_segment_false_when_current_root_differs() {
472        // A nested segment file whose current root is the inner
473        // wrapper itself (not the document root) must NOT be unwrapped —
474        // its existing content already lives one level below the inner
475        // wrapper that we'll re-add.
476        assert!(!should_unwrap_inner_segment("Other", "Doc", true));
477    }
478
479    #[test]
480    fn should_unwrap_inner_segment_false_when_not_single_inner() {
481        // Even when the document root matches, a file with multiple
482        // non-attribute children is not the thin-wrapper case.
483        assert!(!should_unwrap_inner_segment("Doc", "Doc", false));
484    }
485}