Skip to main content

config_disassembler/xml/handlers/
reassemble.rs

1//! Reassemble XML from disassembled directory.
2
3use crate::xml::builders::{build_xml_string, merge_xml_elements, reorder_root_keys};
4use crate::xml::multi_level::{ensure_segment_files_structure, load_multi_level_config};
5use crate::xml::parsers::parse_to_xml_object;
6use crate::xml::types::{MultiLevelRule, SidecarSpec, XmlElement};
7use crate::xml::utils::normalize_path_unix;
8use serde_json::Value;
9use std::collections::HashSet;
10use std::ffi::OsString;
11use std::future::Future;
12use std::path::{Path, PathBuf};
13use std::pin::Pin;
14use tokio::fs;
15
16/// Read a `.key_order.json` file (if present) and parse it as a list of root key names.
17async fn read_key_order(path: &Path) -> Option<Vec<String>> {
18    let bytes = fs::read(path).await.ok()?;
19    serde_json::from_slice::<Vec<String>>(&bytes).ok()
20}
21
22/// Remove @xmlns from an object so the reassembled segment wrapper (e.g. programProcesses) has no xmlns.
23fn strip_xmlns_from_value(v: Value) -> Value {
24    match v {
25        Value::Object(obj) => {
26            Value::Object(obj.into_iter().filter(|(k, _)| k != "@xmlns").collect())
27        }
28        other => other,
29    }
30}
31
32/// When recursing into a nested multi-level rule's `path_segment`, the
33/// deeper-level recursion needs the *sibling* rules — every rule
34/// except the one we just matched — so a sub-directory that happens
35/// to share its parent's `path_segment` doesn't re-enter the same
36/// rule. Returns the cloned slice with the matched segment filtered
37/// out. Pure helper extracted from
38/// `reassemble_multi_level_segment_inner`.
39fn deeper_candidate_rules(
40    all_rules: &[MultiLevelRule],
41    exclude_path_segment: &str,
42) -> Vec<MultiLevelRule> {
43    all_rules
44        .iter()
45        .filter(|r| r.path_segment != exclude_path_segment)
46        .cloned()
47        .collect()
48}
49
50/// True when the current directory is the disassembly root for any
51/// of the supplied multi-level rules. Each rule stores the base path
52/// it was disassembled from; if `dir_path` matches one, the caller is
53/// allowed to match that rule's child segments. Pure helper extracted
54/// from `process_files_in_directory` so the `dir_path == base`
55/// equality is testable without a temporary directory tree.
56fn is_at_base_path(dir_path: &str, base_segments: &[(String, String, bool)]) -> bool {
57    base_segments.iter().any(|(base, _, _)| dir_path == base)
58}
59
60type ProcessDirFuture<'a> = Pin<
61    Box<
62        dyn Future<Output = Result<Vec<XmlElement>, Box<dyn std::error::Error + Send + Sync>>>
63            + Send
64            + 'a,
65    >,
66>;
67
68type SegmentFuture<'a> =
69    Pin<Box<dyn Future<Output = Result<(), Box<dyn std::error::Error + Send + Sync>>> + Send + 'a>>;
70
71pub struct ReassembleXmlFileHandler;
72
73impl ReassembleXmlFileHandler {
74    pub fn new() -> Self {
75        Self
76    }
77
78    pub async fn reassemble(
79        &self,
80        file_path: &str,
81        file_extension: Option<&str>,
82        post_purge: bool,
83        sidecar_specs: Option<&[SidecarSpec]>,
84    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
85        let file_path = normalize_path_unix(file_path);
86        if !self.validate_directory(&file_path).await? {
87            return Ok(());
88        }
89
90        let path = Path::new(&file_path);
91        let config = load_multi_level_config(path).await;
92        if let Some(ref config) = config {
93            // Process each rule whose path_segment exists as a directory at the
94            // disassembly root. Inner-only rules (whose segment lives nested under another
95            // rule's item dir) are handled dynamically when the parent rule walks its
96            // items; we hand them in as `nested_rules` candidates here.
97            for (i, rule) in config.rules.iter().enumerate() {
98                let segment_path = path.join(&rule.path_segment);
99                if !segment_path.is_dir() {
100                    continue;
101                }
102                let nested: Vec<MultiLevelRule> = config
103                    .rules
104                    .iter()
105                    .enumerate()
106                    .filter(|(j, _)| *j != i)
107                    .map(|(_, r)| r.clone())
108                    .collect();
109                self.reassemble_multi_level_segment(&segment_path, rule, &nested)
110                    .await?;
111            }
112        }
113
114        // Build one base-segment entry per multi-level rule so the recursive walker can
115        // recognize each rule's path_segment under the disassembly root.
116        let base_segments: Vec<(String, String, bool)> = config
117            .as_ref()
118            .map(|c| {
119                c.rules
120                    .iter()
121                    .map(|r| (file_path.clone(), r.path_segment.clone(), true))
122                    .collect()
123            })
124            .unwrap_or_default();
125        // When multi-level reassembly is done, purge the entire disassembled directory
126        let post_purge_final = post_purge || config.is_some();
127        self.reassemble_plain(
128            &file_path,
129            file_extension,
130            post_purge_final,
131            &base_segments,
132            sidecar_specs,
133        )
134        .await
135    }
136
137    /// Reassemble a single multi-level segment directory.
138    ///
139    /// For each item directory under `segment_path` (e.g. each `<dialog>/` under
140    /// `botDialogs/`):
141    ///
142    /// 1. **Phase 1 — nested rules first.** For every immediate sub-directory whose name
143    ///    matches a `nested_rules` candidate's `path_segment`, recursively reassemble
144    ///    that sub-directory as its own segment. This wraps each per-step file in
145    ///    `<wrap_root_element><inner_segment>...</inner_segment></wrap_root_element>` *before*
146    ///    the outer-level merge sees it, so multiple inner items survive as siblings
147    ///    rather than collapsing into a single bag of leaves.
148    ///
149    /// 2. **Phase 2 — flat sub-directories.** Any remaining sub-directory (anything not
150    ///    consumed by phase 1) is collapsed into a per-item `.xml` at the parent level
151    ///    via [`Self::reassemble_plain`], the original behaviour for things like
152    ///    decompose-rule outputs.
153    ///
154    /// 3. **Phase 3 — merge item.** Everything in the item directory (the `.xml` files
155    ///    written by phases 1 and 2 plus any leaf `.xml` already there) is merged into
156    ///    a single `.xml` at the parent level.
157    ///
158    /// Finally, [`ensure_segment_files_structure`] wraps every `.xml` in `segment_path`
159    /// in `<wrap_root_element><path_segment>...</path_segment></wrap_root_element>` so
160    /// the parent reassembly sees correctly-wrapped siblings.
161    fn reassemble_multi_level_segment<'a>(
162        &'a self,
163        segment_path: &'a Path,
164        rule: &'a MultiLevelRule,
165        nested_rules: &'a [MultiLevelRule],
166    ) -> SegmentFuture<'a> {
167        let segment_path = segment_path.to_path_buf();
168        let rule = rule.clone();
169        let nested_rules = nested_rules.to_vec();
170        Box::pin(async move {
171            self.reassemble_multi_level_segment_inner(&segment_path, &rule, &nested_rules)
172                .await
173        })
174    }
175
176    async fn reassemble_multi_level_segment_inner(
177        &self,
178        segment_path: &Path,
179        rule: &MultiLevelRule,
180        nested_rules: &[MultiLevelRule],
181    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
182        if !segment_path.is_dir() {
183            return Ok(());
184        }
185        let mut entries = Vec::new();
186        let mut read_dir = fs::read_dir(segment_path).await?;
187        while let Some(entry) = read_dir.next_entry().await? {
188            entries.push(entry);
189        }
190        entries.sort_by_key(|e| e.file_name());
191        for entry in entries {
192            let process_path = entry.path();
193            if !process_path.is_dir() {
194                continue;
195            }
196            let process_path_str = normalize_path_unix(&process_path.to_string_lossy());
197            let mut sub_entries = Vec::new();
198            let mut sub_read = fs::read_dir(&process_path).await?;
199            while let Some(e) = sub_read.next_entry().await? {
200                sub_entries.push(e);
201            }
202            sub_entries.sort_by_key(|e| e.file_name());
203
204            // Phase 1: drain any sub-directory that matches a nested rule's
205            // `path_segment` so it is re-wrapped before the outer merge runs.
206            let mut handled: HashSet<OsString> = HashSet::new();
207            for sub_entry in &sub_entries {
208                let sub_path: PathBuf = sub_entry.path();
209                if !sub_path.is_dir() {
210                    continue;
211                }
212                let sub_name = sub_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
213                let Some(nested_rule) = nested_rules.iter().find(|r| r.path_segment == sub_name)
214                else {
215                    continue;
216                };
217                // Pass everything *except* the rule we just matched as deeper candidates.
218                // Sibling rules remain candidates further down the tree without re-entering
219                // the same rule on a sub-dir that happens to share its name.
220                let deeper = deeper_candidate_rules(nested_rules, &nested_rule.path_segment);
221                self.reassemble_multi_level_segment(&sub_path, nested_rule, &deeper)
222                    .await?;
223                handled.insert(sub_entry.file_name());
224            }
225
226            // Phase 2: collapse remaining sub-directories into per-item .xml files at
227            // the parent level (preserves existing behaviour for non-nested-rule subdirs).
228            for sub_entry in &sub_entries {
229                let sub_path = sub_entry.path();
230                if !sub_path.is_dir() {
231                    continue;
232                }
233                if handled.contains(&sub_entry.file_name()) {
234                    continue;
235                }
236                let sub_path_str = normalize_path_unix(&sub_path.to_string_lossy());
237                self.reassemble_plain(&sub_path_str, Some("xml"), true, &[], None)
238                    .await?;
239            }
240
241            // Phase 3: merge everything in the item dir into a single .xml at the parent.
242            self.reassemble_plain(&process_path_str, Some("xml"), true, &[], None)
243                .await?;
244        }
245        ensure_segment_files_structure(
246            segment_path,
247            &rule.wrap_root_element,
248            &rule.path_segment,
249            &rule.wrap_xmlns,
250        )
251        .await?;
252        Ok(())
253    }
254
255    /// Merge and write reassembled XML (no multi-level pre-step). Used internally.
256    /// `base_segments` carries one tuple `(base_path, segment_name, extract_inner)` per
257    /// multi-level rule. When the recursive walker reaches `base_path` and finds a subdir
258    /// whose name matches one of the segment_names, that subdir's XML files are folded
259    /// into a single array under the segment_name key. When extract_inner is true, each
260    /// file's structure is `document_root > segment_name > content` and only the content
261    /// is collected; otherwise the whole root is kept.
262    async fn reassemble_plain(
263        &self,
264        file_path: &str,
265        file_extension: Option<&str>,
266        post_purge: bool,
267        base_segments: &[(String, String, bool)],
268        sidecar_specs: Option<&[SidecarSpec]>,
269    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
270        let file_path = normalize_path_unix(file_path);
271        log::debug!("Parsing directory to reassemble: {}", file_path);
272        let parsed_objects = self
273            .process_files_in_directory(file_path.to_string(), base_segments.to_vec())
274            .await?;
275
276        if parsed_objects.is_empty() {
277            log::error!(
278                "No files under {} were parsed successfully. A reassembled XML file was not created.",
279                file_path
280            );
281            return Ok(());
282        }
283
284        // merge_xml_elements only returns None when every parsed element is empty or
285        // declaration-only (no usable root). Treat that the same as "nothing parsed"
286        // rather than emitting an `<root></root>` stub.
287        let Some(mut merged) = merge_xml_elements(&parsed_objects) else {
288            log::error!(
289                "No usable root element found while merging files under {}. A reassembled XML file was not created.",
290                file_path
291            );
292            return Ok(());
293        };
294
295        // Resolve sidecar specs: use the caller-supplied slice when non-empty,
296        // otherwise auto-detect from .sidecars.json written by disassembly.
297        let auto_specs: Vec<crate::xml::types::SidecarSpec>;
298        let effective_specs: Option<&[crate::xml::types::SidecarSpec]> =
299            if sidecar_specs.is_some_and(|s| !s.is_empty()) {
300                sidecar_specs
301            } else {
302                let meta_path = Path::new(&file_path).join(".sidecars.json");
303                if let Ok(content) = fs::read_to_string(&meta_path).await {
304                    if let Ok(parsed) =
305                        serde_json::from_str::<Vec<crate::xml::types::SidecarSpec>>(&content)
306                    {
307                        auto_specs = parsed;
308                        Some(auto_specs.as_slice())
309                    } else {
310                        None
311                    }
312                } else {
313                    None
314                }
315            };
316
317        // Inject sidecar element content before key reordering so the element
318        // lands at its original position rather than being appended at the end.
319        if let Some(specs) = effective_specs {
320            inject_sidecar_elements(&file_path, &mut merged, specs).await?;
321        }
322
323        // Apply stored key order so reassembled XML matches original document order.
324        let key_order_path = Path::new(&file_path).join(".key_order.json");
325        if let Some(reordered) = read_key_order(&key_order_path)
326            .await
327            .and_then(|order| reorder_root_keys(&merged, &order))
328        {
329            merged = reordered;
330        }
331
332        let final_xml = build_xml_string(&merged);
333        let output_path = self.get_output_path(&file_path, file_extension);
334
335        fs::write(&output_path, &final_xml).await?;
336
337        // Remove sidecar files and metadata after successful injection. They are
338        // decomposition artifacts: the content is now inside the reassembled XML.
339        // Only done when post_purge is requested.
340        if post_purge {
341            if let Some(specs) = effective_specs {
342                let path = Path::new(&file_path);
343                let base = path
344                    .file_name()
345                    .and_then(|n| n.to_str())
346                    .unwrap_or("output");
347                for spec in specs {
348                    let sidecar = path.join(format!("{}.{}", base, spec.extension));
349                    fs::remove_file(&sidecar).await.ok();
350                }
351            }
352            fs::remove_dir_all(file_path).await.ok();
353        }
354
355        Ok(())
356    }
357
358    fn process_files_in_directory<'a>(
359        &'a self,
360        dir_path: String,
361        base_segments: Vec<(String, String, bool)>,
362    ) -> ProcessDirFuture<'a> {
363        Box::pin(async move {
364            let mut parsed = Vec::new();
365            let mut entries = Vec::new();
366            let mut read_dir = fs::read_dir(&dir_path).await?;
367            while let Some(entry) = read_dir.next_entry().await? {
368                entries.push(entry);
369            }
370            // Sort by full filename for deterministic cross-platform ordering
371            entries.sort_by(|a, b| {
372                let a_name = a.file_name().to_string_lossy().to_string();
373                let b_name = b.file_name().to_string_lossy().to_string();
374                a_name.cmp(&b_name)
375            });
376
377            // We are at the disassembly root for a given rule when our dir_path matches
378            // the base_path stored on that rule. Each rule shares the same base_path in
379            // the current implementation, but tracking them per-entry keeps the door open
380            // for future per-rule base_paths without another signature change.
381            let is_base = is_at_base_path(&dir_path, &base_segments);
382
383            for entry in entries {
384                let path = entry.path();
385                let file_path = normalize_path_unix(&path.to_string_lossy()).to_string();
386
387                if path.is_file() {
388                    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
389                    if !name.starts_with('.') && self.is_parsable_file(name) {
390                        if let Some(parsed_obj) = parse_to_xml_object(&file_path).await {
391                            parsed.push(parsed_obj);
392                        }
393                    }
394                } else {
395                    // Anything not a regular file is treated as a directory; symlinks and
396                    // other exotic entries simply recurse via read_dir below.
397                    let dir_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
398                    let matched_segment = if is_base {
399                        base_segments
400                            .iter()
401                            .find(|(_, seg_name, _)| seg_name == dir_name)
402                            .cloned()
403                    } else {
404                        None
405                    };
406                    if let Some((_, segment_name, extract_inner)) = matched_segment {
407                        let segment_element = self
408                            .collect_segment_as_array(&file_path, &segment_name, extract_inner)
409                            .await?;
410                        if let Some(el) = segment_element {
411                            parsed.push(el);
412                        }
413                    } else {
414                        let sub_parsed = self
415                            .process_files_in_directory(file_path, base_segments.clone())
416                            .await?;
417                        parsed.extend(sub_parsed);
418                    }
419                }
420            }
421
422            Ok(parsed)
423        })
424    }
425
426    /// Collect all .xml files in a directory, parse each, and build one element with
427    /// root_key and single key segment_name whose value is array of each file's content.
428    /// When extract_inner is true, each file has root > segment_name > content; we push that content.
429    async fn collect_segment_as_array(
430        &self,
431        segment_dir: &str,
432        segment_name: &str,
433        extract_inner: bool,
434    ) -> Result<Option<XmlElement>, Box<dyn std::error::Error + Send + Sync>> {
435        let mut xml_files = Vec::new();
436        let mut read_dir = fs::read_dir(segment_dir).await?;
437        while let Some(entry) = read_dir.next_entry().await? {
438            let path = entry.path();
439            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
440            if path.is_file() && !name.starts_with('.') && self.is_parsable_file(name) {
441                xml_files.push(normalize_path_unix(&path.to_string_lossy()));
442            }
443        }
444        xml_files.sort();
445
446        let mut root_contents = Vec::new();
447        let mut first_xml: Option<(String, Option<Value>)> = None;
448        for file_path in &xml_files {
449            // parse_to_xml_object always yields a JSON object on success; treat any other
450            // shape (including parse failure) as a skip without branching explicitly.
451            let Some(parsed) = parse_to_xml_object(file_path).await else {
452                continue;
453            };
454            let obj_owned = parsed.as_object().cloned().unwrap_or_default();
455            let obj = &obj_owned;
456            let Some(root_key) = obj.keys().find(|k| *k != "?xml").cloned() else {
457                continue;
458            };
459            let root_val = obj
460                .get(&root_key)
461                .cloned()
462                .unwrap_or(Value::Object(serde_json::Map::new()));
463            let mut content = if extract_inner {
464                root_val
465                    .get(segment_name)
466                    .cloned()
467                    .unwrap_or_else(|| Value::Object(serde_json::Map::new()))
468            } else {
469                root_val
470            };
471            // Inner segment element (e.g. programProcesses) should not have xmlns in output
472            if extract_inner {
473                content = strip_xmlns_from_value(content);
474            }
475            root_contents.push(content);
476            if first_xml.is_none() {
477                first_xml = Some((root_key, obj.get("?xml").cloned()));
478            }
479        }
480        if root_contents.is_empty() {
481            return Ok(None);
482        }
483        let (root_key, decl_opt) = first_xml.unwrap();
484        let mut content = serde_json::Map::new();
485        content.insert(segment_name.to_string(), Value::Array(root_contents));
486        let mut top = serde_json::Map::new();
487        if let Some(decl) = decl_opt {
488            top.insert("?xml".to_string(), decl);
489        } else {
490            let mut d = serde_json::Map::new();
491            d.insert("@version".to_string(), Value::String("1.0".to_string()));
492            d.insert("@encoding".to_string(), Value::String("UTF-8".to_string()));
493            top.insert("?xml".to_string(), Value::Object(d));
494        }
495        top.insert(root_key, Value::Object(content));
496        Ok(Some(Value::Object(top)))
497    }
498
499    fn is_parsable_file(&self, file_name: &str) -> bool {
500        let lower = file_name.to_lowercase();
501        lower.ends_with(".xml")
502            || lower.ends_with(".json")
503            || lower.ends_with(".json5")
504            || lower.ends_with(".yaml")
505            || lower.ends_with(".yml")
506    }
507
508    async fn validate_directory(
509        &self,
510        path: &str,
511    ) -> Result<bool, Box<dyn std::error::Error + Send + Sync>> {
512        let meta = fs::metadata(path).await?;
513        if !meta.is_dir() {
514            log::error!(
515                "The provided path to reassemble is not a directory: {}",
516                path
517            );
518            return Ok(false);
519        }
520        Ok(true)
521    }
522
523    fn get_output_path(&self, dir_path: &str, extension: Option<&str>) -> String {
524        let path = Path::new(dir_path);
525        let parent = path.parent().unwrap_or(Path::new("."));
526        let base_name = path
527            .file_name()
528            .and_then(|n| n.to_str())
529            .unwrap_or("output");
530        let ext = extension.unwrap_or("xml");
531        parent
532            .join(format!("{}.{}", base_name, ext))
533            .to_string_lossy()
534            .to_string()
535    }
536}
537
538impl Default for ReassembleXmlFileHandler {
539    fn default() -> Self {
540        Self::new()
541    }
542}
543
544/// Convert `content` (in whatever format the sidecar file uses) to `target_format`.
545///
546/// - `"json"` → parse via serde_yaml (superset of JSON) then emit as pretty JSON
547/// - `"yaml"` / `"yml"` → only convert when content is strict JSON; YAML passes through
548/// - anything else → pass through unchanged
549///
550/// Falls back to raw content with a warning when conversion fails.
551fn convert_to_format(content: &str, target_format: &str) -> String {
552    match target_format.to_ascii_lowercase().as_str() {
553        "json" => {
554            match serde_yaml::from_str::<serde_yaml::Value>(content) {
555                Ok(val) => match serde_json::to_string_pretty(&val) {
556                    Ok(json) => json,
557                    Err(e) => {
558                        log::warn!("sidecar reassemble: JSON serialization failed ({e}); using raw content");
559                        content.to_string()
560                    }
561                },
562                Err(e) => {
563                    log::warn!("sidecar reassemble: could not parse content for JSON conversion ({e}); using raw content");
564                    content.to_string()
565                }
566            }
567        }
568        "yaml" | "yml" => {
569            if serde_json::from_str::<serde_json::Value>(content).is_ok() {
570                match serde_yaml::from_str::<serde_yaml::Value>(content)
571                    .ok()
572                    .and_then(|v| serde_yaml::to_string(&v).ok())
573                {
574                    Some(yaml) => yaml,
575                    None => {
576                        log::warn!(
577                            "sidecar reassemble: YAML serialization failed; using raw content"
578                        );
579                        content.to_string()
580                    }
581                }
582            } else {
583                content.to_string()
584            }
585        }
586        _ => content.to_string(),
587    }
588}
589
590/// Read sidecar files and inject their content back into the merged XML value
591/// before serialisation.
592///
593/// Each sidecar is located inside the decomposed directory itself
594/// (e.g. `MySvc/MySvc.yaml`). If a sidecar is absent (element was not in
595/// the original XML, or was never extracted), the spec is silently skipped.
596///
597/// The injected value uses the `{"#raw-text": content}` shape that `build_xml_string`
598/// writes with `partial_escape`: mandatory XML chars (`<`, `>`, `&`) are escaped
599/// but `"` is left as-is so YAML content round-trips without `&quot;` inflation.
600async fn inject_sidecar_elements(
601    dir_path: &str,
602    merged: &mut XmlElement,
603    specs: &[SidecarSpec],
604) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
605    let path = Path::new(dir_path);
606    let base = path
607        .file_name()
608        .and_then(|n| n.to_str())
609        .unwrap_or("output");
610
611    let root_key = merged
612        .as_object()
613        .and_then(|o| o.keys().find(|k| *k != "?xml").cloned());
614    let Some(root_key) = root_key else {
615        return Ok(());
616    };
617
618    if let Some(root_val) = merged.as_object_mut().and_then(|o| o.get_mut(&root_key)) {
619        if let Some(root_obj) = root_val.as_object_mut() {
620            for spec in specs {
621                let sidecar_path = path.join(format!("{}.{}", base, spec.extension));
622                let Ok(content) = fs::read_to_string(&sidecar_path).await else {
623                    continue;
624                };
625                // When the original XML embedded a different format than the sidecar
626                // extension (e.g. JSON schema extracted to a .yaml sidecar), convert
627                // the sidecar content back to the original format before injecting.
628                let final_content = match &spec.original_format {
629                    Some(fmt) => convert_to_format(&content, fmt),
630                    None => content,
631                };
632                root_obj.insert(
633                    spec.element.clone(),
634                    serde_json::json!({ "#raw-text": final_content }),
635                );
636            }
637        }
638    }
639
640    Ok(())
641}
642
643#[cfg(test)]
644mod tests {
645    use super::*;
646    use serde_json::json;
647
648    #[test]
649    #[allow(clippy::default_constructed_unit_structs)]
650    fn reassemble_handler_default_equals_new() {
651        let _ = ReassembleXmlFileHandler::default();
652    }
653
654    #[test]
655    fn strip_xmlns_from_value_passes_non_object_through() {
656        let s = Value::String("hello".to_string());
657        assert_eq!(
658            strip_xmlns_from_value(s),
659            Value::String("hello".to_string())
660        );
661        let arr = json!([1, 2]);
662        assert_eq!(strip_xmlns_from_value(arr.clone()), arr);
663    }
664
665    #[test]
666    fn strip_xmlns_from_value_removes_xmlns_key() {
667        let obj = json!({ "@xmlns": "ns", "child": 1 });
668        let stripped = strip_xmlns_from_value(obj);
669        let map = stripped.as_object().unwrap();
670        assert!(map.get("@xmlns").is_none());
671        assert_eq!(map.get("child").and_then(|v| v.as_i64()), Some(1));
672    }
673
674    #[test]
675    fn is_parsable_file_recognises_supported_extensions() {
676        let h = ReassembleXmlFileHandler::new();
677        assert!(h.is_parsable_file("a.xml"));
678        assert!(h.is_parsable_file("a.json"));
679        assert!(h.is_parsable_file("a.json5"));
680        assert!(h.is_parsable_file("a.yaml"));
681        assert!(h.is_parsable_file("a.yml"));
682        assert!(h.is_parsable_file("A.XML"));
683        assert!(!h.is_parsable_file("a.txt"));
684    }
685
686    #[test]
687    fn get_output_path_appends_extension_and_uses_parent_dir() {
688        let h = ReassembleXmlFileHandler::new();
689        let out = h.get_output_path("/tmp/foo", Some("xml"));
690        assert!(out.ends_with("foo.xml"));
691        let out_default = h.get_output_path("/tmp/bar", None);
692        assert!(out_default.ends_with("bar.xml"));
693        // No parent - uses "." fallback
694        assert_eq!(h.get_output_path("only", Some("json")), "only.json");
695    }
696
697    #[tokio::test]
698    async fn reassemble_multi_level_segment_noop_when_not_dir() {
699        let h = ReassembleXmlFileHandler::new();
700        let tmp = tempfile::tempdir().unwrap();
701        let file = tmp.path().join("not_a_dir.txt");
702        tokio::fs::write(&file, "hi").await.unwrap();
703        let rule = crate::xml::types::MultiLevelRule {
704            file_pattern: String::new(),
705            root_to_strip: String::new(),
706            unique_id_elements: String::new(),
707            path_segment: String::new(),
708            wrap_root_element: "Root".to_string(),
709            wrap_xmlns: String::new(),
710        };
711        h.reassemble_multi_level_segment(&file, &rule, &[])
712            .await
713            .unwrap();
714    }
715
716    #[tokio::test]
717    async fn reassemble_multi_level_segment_skips_files_in_segment_root() {
718        let h = ReassembleXmlFileHandler::new();
719        let tmp = tempfile::tempdir().unwrap();
720        let segment = tmp.path().join("segment");
721        tokio::fs::create_dir(&segment).await.unwrap();
722        // A bare file inside the segment dir should be skipped (not a subdir).
723        tokio::fs::write(segment.join("stray.txt"), "x")
724            .await
725            .unwrap();
726        let rule = crate::xml::types::MultiLevelRule {
727            file_pattern: String::new(),
728            root_to_strip: String::new(),
729            unique_id_elements: String::new(),
730            path_segment: "segment".to_string(),
731            wrap_root_element: "Root".to_string(),
732            wrap_xmlns: "http://example.com".to_string(),
733        };
734        h.reassemble_multi_level_segment(&segment, &rule, &[])
735            .await
736            .unwrap();
737    }
738
739    #[tokio::test]
740    async fn collect_segment_as_array_returns_none_for_empty_dir() {
741        let h = ReassembleXmlFileHandler::new();
742        let tmp = tempfile::tempdir().unwrap();
743        let out = h
744            .collect_segment_as_array(tmp.path().to_str().unwrap(), "seg", true)
745            .await
746            .unwrap();
747        assert!(out.is_none());
748    }
749
750    #[tokio::test]
751    async fn collect_segment_as_array_skips_unparseable_and_empty_roots() {
752        let h = ReassembleXmlFileHandler::new();
753        let tmp = tempfile::tempdir().unwrap();
754        // Unparseable XML
755        tokio::fs::write(tmp.path().join("bad.xml"), "<<")
756            .await
757            .unwrap();
758        // Valid XML but only declaration and no root after parse
759        tokio::fs::write(tmp.path().join("only-decl.xml"), "")
760            .await
761            .unwrap();
762        // Hidden file is skipped
763        tokio::fs::write(tmp.path().join(".hidden.xml"), "<r/>")
764            .await
765            .unwrap();
766        let out = h
767            .collect_segment_as_array(tmp.path().to_str().unwrap(), "seg", false)
768            .await
769            .unwrap();
770        assert!(out.is_none());
771    }
772
773    #[tokio::test]
774    async fn collect_segment_as_array_without_xml_decl_inserts_default_decl() {
775        // XML files without a `<?xml ?>` declaration: the `else` branch in
776        // collect_segment_as_array must synthesize a default declaration with
777        // version="1.0" and encoding="UTF-8".
778        let h = ReassembleXmlFileHandler::new();
779        let tmp = tempfile::tempdir().unwrap();
780        tokio::fs::write(
781            tmp.path().join("a.xml"),
782            r#"<Root><seg><x>1</x></seg></Root>"#,
783        )
784        .await
785        .unwrap();
786        let out = h
787            .collect_segment_as_array(tmp.path().to_str().unwrap(), "seg", true)
788            .await
789            .unwrap()
790            .unwrap();
791        let obj = out.as_object().unwrap();
792        let decl = obj
793            .get("?xml")
794            .and_then(|v| v.as_object())
795            .expect("default declaration must be inserted when XML has none");
796        assert_eq!(decl.get("@version").and_then(|v| v.as_str()), Some("1.0"));
797        assert_eq!(
798            decl.get("@encoding").and_then(|v| v.as_str()),
799            Some("UTF-8")
800        );
801    }
802
803    #[tokio::test]
804    async fn collect_segment_as_array_with_xml_decl_preserves_it() {
805        // XML file WITH a `<?xml ?>` declaration: `decl_opt` is Some → line 438
806        // `top.insert("?xml", decl)` is executed (the `if let Some(decl)` branch).
807        let h = ReassembleXmlFileHandler::new();
808        let tmp = tempfile::tempdir().unwrap();
809        tokio::fs::write(
810            tmp.path().join("a.xml"),
811            r#"<?xml version="1.0" encoding="UTF-8"?><Root><seg><x>1</x></seg></Root>"#,
812        )
813        .await
814        .unwrap();
815        let out = h
816            .collect_segment_as_array(tmp.path().to_str().unwrap(), "seg", true)
817            .await
818            .unwrap()
819            .unwrap();
820        let obj = out.as_object().unwrap();
821        let decl = obj
822            .get("?xml")
823            .and_then(|v| v.as_object())
824            .expect("?xml declaration must be preserved from source");
825        assert_eq!(decl.get("@version").and_then(|v| v.as_str()), Some("1.0"));
826        assert_eq!(
827            decl.get("@encoding").and_then(|v| v.as_str()),
828            Some("UTF-8")
829        );
830    }
831
832    #[tokio::test]
833    async fn collect_segment_as_array_without_extract_inner_wraps_root() {
834        let h = ReassembleXmlFileHandler::new();
835        let tmp = tempfile::tempdir().unwrap();
836        tokio::fs::write(tmp.path().join("a.xml"), r#"<Root><child>1</child></Root>"#)
837            .await
838            .unwrap();
839        let out = h
840            .collect_segment_as_array(tmp.path().to_str().unwrap(), "seg", false)
841            .await
842            .unwrap()
843            .unwrap();
844        let obj = out.as_object().unwrap();
845        assert!(obj.contains_key("?xml"));
846        let root = obj.get("Root").and_then(|r| r.as_object()).unwrap();
847        assert!(root.get("seg").and_then(|v| v.as_array()).is_some());
848    }
849
850    fn rule_with_segment(segment: &str) -> MultiLevelRule {
851        MultiLevelRule {
852            file_pattern: String::new(),
853            root_to_strip: String::new(),
854            unique_id_elements: String::new(),
855            path_segment: segment.to_string(),
856            wrap_root_element: String::new(),
857            wrap_xmlns: String::new(),
858        }
859    }
860
861    #[test]
862    fn deeper_candidate_rules_excludes_the_matched_segment() {
863        // The matched rule must be filtered out, otherwise the
864        // recursion would re-enter that rule when a child directory
865        // happens to share its `path_segment`.
866        let rules = vec![rule_with_segment("seg_a"), rule_with_segment("seg_b")];
867        let deeper = deeper_candidate_rules(&rules, "seg_a");
868        assert_eq!(deeper.len(), 1);
869        assert_eq!(deeper[0].path_segment, "seg_b");
870    }
871
872    #[test]
873    fn deeper_candidate_rules_keeps_all_when_no_segment_matches() {
874        // When `exclude_path_segment` doesn't correspond to any rule
875        // the input is forwarded unchanged. Pins the `!= -> ==` mutant
876        // which would otherwise return an empty vec here.
877        let rules = vec![rule_with_segment("seg_a"), rule_with_segment("seg_b")];
878        let deeper = deeper_candidate_rules(&rules, "missing");
879        assert_eq!(deeper.len(), 2);
880    }
881
882    #[test]
883    fn deeper_candidate_rules_returns_empty_for_empty_input() {
884        let deeper: Vec<MultiLevelRule> = deeper_candidate_rules(&[], "anything");
885        assert!(deeper.is_empty());
886    }
887
888    #[test]
889    fn is_at_base_path_true_when_dir_matches_any_segment() {
890        let segs = vec![
891            ("/base/other".to_string(), "seg1".to_string(), false),
892            ("/base/here".to_string(), "seg2".to_string(), false),
893        ];
894        assert!(is_at_base_path("/base/here", &segs));
895    }
896
897    #[test]
898    fn is_at_base_path_false_when_dir_matches_nothing() {
899        let segs = vec![("/base/a".to_string(), "seg".to_string(), false)];
900        assert!(!is_at_base_path("/base/b", &segs));
901    }
902
903    #[test]
904    fn is_at_base_path_false_for_empty_segments() {
905        let segs: Vec<(String, String, bool)> = Vec::new();
906        assert!(!is_at_base_path("/anywhere", &segs));
907    }
908
909    // Pins the `delete ! in is_some_and(|s| !s.is_empty())` mutant at line 299.
910    // When sidecar_specs is Some(&[]) the caller supplied nothing useful, so the
911    // code must fall through to auto-detect via .sidecars.json — not treat the
912    // empty slice as authoritative.
913    #[tokio::test]
914    async fn reassemble_plain_some_empty_sidecar_specs_falls_through_to_auto_detect() {
915        let h = ReassembleXmlFileHandler::new();
916        let tmp = tempfile::tempdir().unwrap();
917        let dir = tmp.path().join("mydir");
918        tokio::fs::create_dir(&dir).await.unwrap();
919
920        tokio::fs::write(
921            dir.join("a.xml"),
922            r#"<?xml version="1.0" encoding="UTF-8"?><Root><Child>hello</Child></Root>"#,
923        )
924        .await
925        .unwrap();
926
927        tokio::fs::write(
928            dir.join(".sidecars.json"),
929            r#"[{"element":"Notes","extension":"yaml"}]"#,
930        )
931        .await
932        .unwrap();
933
934        // Sidecar file name = directory name + extension (matches inject_sidecar_elements logic).
935        tokio::fs::write(dir.join("mydir.yaml"), "key: value")
936            .await
937            .unwrap();
938
939        h.reassemble_plain(dir.to_str().unwrap(), Some("xml"), false, &[], Some(&[]))
940            .await
941            .unwrap();
942
943        let output = tokio::fs::read_to_string(tmp.path().join("mydir.xml"))
944            .await
945            .unwrap();
946        assert!(
947            output.contains("key: value"),
948            "sidecar content missing — auto-detect did not run:\n{output}"
949        );
950    }
951
952    // Pins `delete match arm "json"` mutant: without the arm, YAML input to
953    // convert_to_format("json") falls through to `_ =>` and returns raw YAML.
954    #[test]
955    fn convert_to_format_yaml_to_json() {
956        let yaml = "openapi: 3.0.1\ninfo:\n  title: \"Test API\"\n  version: 1.0.0\n";
957        let out = convert_to_format(yaml, "json");
958        let val: serde_json::Value = serde_json::from_str(&out).expect("output must be valid JSON");
959        assert_eq!(val["openapi"], "3.0.1");
960        assert_eq!(val["info"]["title"], "Test API");
961        assert_eq!(val["info"]["version"], "1.0.0");
962    }
963
964    // Pins `delete match arm "yaml" | "yml"` mutant: without the arm, JSON input
965    // to convert_to_format("yaml") falls through to `_ =>` and returns raw JSON.
966    #[test]
967    fn convert_to_format_json_to_yaml() {
968        let json = r#"{"key":"value","num":42}"#;
969        let out = convert_to_format(json, "yaml");
970        assert!(
971            serde_json::from_str::<serde_json::Value>(&out).is_err(),
972            "output must be YAML format, not raw JSON: {out}"
973        );
974        let val: serde_json::Value = serde_yaml::from_str(&out).expect("output must be valid YAML");
975        assert_eq!(val["key"], "value");
976        assert_eq!(val["num"], 42);
977    }
978
979    #[test]
980    fn convert_to_format_yml_extension_same_as_yaml() {
981        let json = r#"{"x":true}"#;
982        let out = convert_to_format(json, "yml");
983        let val: serde_json::Value = serde_yaml::from_str(&out).unwrap();
984        assert_eq!(val["x"], true);
985    }
986
987    #[test]
988    fn convert_to_format_yaml_passes_through_unchanged() {
989        // Pure YAML fails serde_json parse → returned as-is without re-serialization.
990        let yaml = "title: \"@AuraEnabled\"\nversion: 1.0.0\n";
991        assert_eq!(convert_to_format(yaml, "yaml"), yaml);
992    }
993
994    #[test]
995    fn convert_to_format_unknown_extension_passes_through() {
996        let raw = "arbitrary content";
997        assert_eq!(convert_to_format(raw, "txt"), raw);
998        assert_eq!(convert_to_format(raw, ""), raw);
999    }
1000
1001    #[test]
1002    fn convert_to_format_malformed_falls_back_to_raw() {
1003        let bad = "{{{{ not valid json or yaml at all >>>>>";
1004        assert_eq!(convert_to_format(bad, "json"), bad);
1005    }
1006}