Skip to main content

config_disassembler/xml/handlers/
disassemble.rs

1//! Disassemble XML file handler.
2
3use crate::xml::builders::{build_disassembled_files_unified, build_xml_string};
4use crate::xml::multi_level::{
5    capture_xmlns_from_root, path_segment_from_file_pattern, save_multi_level_config,
6    strip_root_and_build_xml,
7};
8use crate::xml::parsers::{extract_xml_declaration_from_raw, parse_xml, parse_xml_from_str};
9use crate::xml::types::{
10    BuildDisassembledFilesOptions, DecomposeRule, MultiLevelRule, SidecarSpec,
11};
12use crate::xml::utils::normalize_path_unix;
13use ignore::gitignore::GitignoreBuilder;
14use std::io::Write as _;
15use std::path::Path;
16use tokio::fs;
17
18pub struct DisassembleXmlFileHandler {
19    ign: Option<ignore::gitignore::Gitignore>,
20}
21
22impl DisassembleXmlFileHandler {
23    pub fn new() -> Self {
24        Self { ign: None }
25    }
26
27    async fn load_ignore_rules(&mut self, ignore_path: &str) {
28        let path = Path::new(ignore_path);
29        let content = match fs::read_to_string(path).await {
30            Ok(c) => c,
31            Err(_) => return,
32        };
33        let root = path.parent().unwrap_or(Path::new("."));
34        let mut builder = GitignoreBuilder::new(root);
35        for line in content.lines() {
36            let _ = builder.add_line(None, line);
37        }
38        // `GitignoreBuilder::build` only fails on unlikely I/O errors; treat as absent rules.
39        self.ign = builder.build().ok();
40    }
41
42    fn posix_path(path: &str) -> String {
43        path.replace('\\', "/")
44    }
45
46    fn is_xml_file(file_path: &str) -> bool {
47        file_path.to_lowercase().ends_with(".xml")
48    }
49
50    /// True when a directory entry is both a regular file and an `.xml`.
51    /// Pure helper extracted from `handle_directory` so the
52    /// `is_file && is_xml_file` predicate can be exercised without a
53    /// real filesystem entry.
54    fn is_processable_xml_entry(is_file: bool, file_name: &str) -> bool {
55        is_file && Self::is_xml_file(file_name)
56    }
57
58    /// True when the unified-build output directory should be purged
59    /// before re-disassembling. Both the flag *and* the existence check
60    /// must hold; `pre_purge=true` against a missing directory is a
61    /// no-op rather than an error.
62    fn should_pre_purge_output(pre_purge: bool, output_exists: bool) -> bool {
63        pre_purge && output_exists
64    }
65
66    /// True when a file inside the disassembly tree should be
67    /// considered by a multi-level rule: it must be `.xml` and either
68    /// its bare name or its full path must contain the rule's pattern.
69    fn file_matches_multi_level_rule(file_name: &str, full_path: &str, file_pattern: &str) -> bool {
70        file_name.ends_with(".xml")
71            && (file_name.contains(file_pattern) || full_path.contains(file_pattern))
72    }
73
74    /// True when the parsed XML document has the multi-level rule's
75    /// `root_to_strip` either as its root element or as a direct child
76    /// of its root element.
77    fn has_element_to_strip(parsed: &serde_json::Value, root_to_strip: &str) -> bool {
78        parsed
79            .as_object()
80            .and_then(|o| {
81                let root_key = o.keys().find(|k| *k != "?xml")?;
82                let root_val = o.get(root_key)?.as_object()?;
83                Some(root_key == root_to_strip || root_val.contains_key(root_to_strip))
84            })
85            .unwrap_or(false)
86    }
87
88    /// Two multi-level rules share an "identity" — i.e. should be
89    /// deduplicated in `.multi_level.json` — when both their
90    /// `file_pattern` and their `root_to_strip` match. The other
91    /// fields (`unique_id_elements`, `path_segment`, …) are derived
92    /// per-file and may legitimately drift.
93    fn rules_have_same_identity(a: &MultiLevelRule, b: &MultiLevelRule) -> bool {
94        a.file_pattern == b.file_pattern && a.root_to_strip == b.root_to_strip
95    }
96
97    /// First non-`?xml` key of the parsed document, used as the
98    /// `wrap_root_element` for a multi-level rule. Falls back to
99    /// `fallback` when the parsed value is not an object or contains
100    /// only the declaration.
101    fn root_element_name_from_parsed(parsed: &serde_json::Value, fallback: &str) -> String {
102        parsed
103            .as_object()
104            .and_then(|o| o.keys().find(|k| *k != "?xml").cloned())
105            .unwrap_or_else(|| fallback.to_string())
106    }
107
108    fn is_ignored(&self, path: &str) -> bool {
109        self.ign
110            .as_ref()
111            .map(|ign| ign.matched(path, false).is_ignore())
112            .unwrap_or(false)
113    }
114
115    /// Derive the disassembled-output directory name from a file stem.
116    ///
117    /// We strip only the trailing extension-like segment (everything after the **last** `.`),
118    /// so `HR_Admin.permissionset-meta` collapses to `HR_Admin` while
119    /// `Account.MyApprovalProcess.approvalProcess-meta` collapses to `Account.MyApprovalProcess`.
120    /// Splitting at the *first* dot — the previous behaviour — was lossy for metadata types
121    /// whose fullName itself contains a dot (e.g. Salesforce approval processes, quick actions,
122    /// custom-metadata records) because two files like `A.X.foo-meta.xml` and `A.Y.foo-meta.xml`
123    /// both resolved to `A/`, silently merging unrelated components.
124    fn output_dir_basename(file_stem: &str) -> &str {
125        file_stem
126            .rsplit_once('.')
127            .map(|(prefix, _)| prefix)
128            .unwrap_or(file_stem)
129    }
130
131    #[allow(clippy::too_many_arguments)]
132    pub async fn disassemble(
133        &mut self,
134        file_path: &str,
135        unique_id_elements: Option<&str>,
136        strategy: Option<&str>,
137        pre_purge: bool,
138        post_purge: bool,
139        ignore_path: &str,
140        format: &str,
141        multi_level_rules: Option<&[MultiLevelRule]>,
142        decompose_rules: Option<&[DecomposeRule]>,
143        sidecar_specs: Option<&[SidecarSpec]>,
144    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
145        let strategy = strategy.unwrap_or("unique-id");
146        let strategy = if ["unique-id", "grouped-by-tag"].contains(&strategy) {
147            strategy
148        } else {
149            log::warn!(
150                "Unsupported strategy \"{}\", defaulting to \"unique-id\".",
151                strategy
152            );
153            "unique-id"
154        };
155
156        self.load_ignore_rules(ignore_path).await;
157
158        let path = Path::new(file_path);
159        let meta = fs::metadata(path).await?;
160        let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
161        let relative_path = path.strip_prefix(&cwd).unwrap_or(path).to_string_lossy();
162        let relative_path = Self::posix_path(&relative_path);
163
164        // Treat an empty rules slice as "no multi-level".
165        let multi_level_rules = multi_level_rules.filter(|rules| !rules.is_empty());
166
167        if meta.is_file() {
168            self.handle_file(
169                file_path,
170                &relative_path,
171                unique_id_elements,
172                strategy,
173                pre_purge,
174                post_purge,
175                format,
176                multi_level_rules,
177                decompose_rules,
178                sidecar_specs,
179            )
180            .await?;
181        } else {
182            // Anything that isn't a regular file is treated as a directory; fs::metadata on
183            // the caller already errored out if the path didn't exist.
184            self.handle_directory(
185                file_path,
186                unique_id_elements,
187                strategy,
188                pre_purge,
189                post_purge,
190                format,
191                multi_level_rules,
192                decompose_rules,
193                sidecar_specs,
194            )
195            .await?;
196        }
197
198        Ok(())
199    }
200
201    #[allow(clippy::too_many_arguments)]
202    async fn handle_file(
203        &self,
204        file_path: &str,
205        relative_path: &str,
206        unique_id_elements: Option<&str>,
207        strategy: &str,
208        pre_purge: bool,
209        post_purge: bool,
210        format: &str,
211        multi_level_rules: Option<&[MultiLevelRule]>,
212        decompose_rules: Option<&[DecomposeRule]>,
213        sidecar_specs: Option<&[SidecarSpec]>,
214    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
215        let resolved = Path::new(file_path)
216            .canonicalize()
217            .unwrap_or_else(|_| Path::new(file_path).to_path_buf());
218        let resolved_str = normalize_path_unix(&resolved.to_string_lossy());
219
220        if !Self::is_xml_file(&resolved_str) {
221            log::error!(
222                "The file path provided is not an XML file: {}",
223                resolved_str
224            );
225            return Ok(());
226        }
227
228        if self.is_ignored(relative_path) {
229            log::warn!("File ignored by ignore rules: {}", resolved_str);
230            return Ok(());
231        }
232
233        let dir_path = resolved.parent().unwrap_or(Path::new("."));
234        let dir_path_str = normalize_path_unix(&dir_path.to_string_lossy());
235        self.process_file(
236            &dir_path_str,
237            strategy,
238            &resolved_str,
239            unique_id_elements,
240            pre_purge,
241            post_purge,
242            format,
243            multi_level_rules,
244            decompose_rules,
245            sidecar_specs,
246        )
247        .await
248    }
249
250    #[allow(clippy::too_many_arguments)]
251    async fn handle_directory(
252        &self,
253        dir_path: &str,
254        unique_id_elements: Option<&str>,
255        strategy: &str,
256        pre_purge: bool,
257        post_purge: bool,
258        format: &str,
259        multi_level_rules: Option<&[MultiLevelRule]>,
260        decompose_rules: Option<&[DecomposeRule]>,
261        sidecar_specs: Option<&[SidecarSpec]>,
262    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
263        let dir_path = normalize_path_unix(dir_path);
264        let mut entries = fs::read_dir(&dir_path).await?;
265        let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
266
267        while let Some(entry) = entries.next_entry().await? {
268            let sub_path = entry.path();
269            let sub_file_path = sub_path.to_string_lossy();
270            let relative_sub = sub_path
271                .strip_prefix(&cwd)
272                .unwrap_or(&sub_path)
273                .to_string_lossy();
274            let relative_sub = Self::posix_path(&relative_sub);
275
276            if !Self::is_processable_xml_entry(sub_path.is_file(), &sub_file_path) {
277                continue;
278            }
279            if self.is_ignored(&relative_sub) {
280                log::warn!("File ignored by ignore rules: {}", sub_file_path);
281                continue;
282            }
283            let sub_file_path_norm = normalize_path_unix(&sub_file_path);
284            self.process_file(
285                &dir_path,
286                strategy,
287                &sub_file_path_norm,
288                unique_id_elements,
289                pre_purge,
290                post_purge,
291                format,
292                multi_level_rules,
293                decompose_rules,
294                sidecar_specs,
295            )
296            .await?;
297        }
298        Ok(())
299    }
300
301    #[allow(clippy::too_many_arguments)]
302    async fn process_file(
303        &self,
304        dir_path: &str,
305        strategy: &str,
306        file_path: &str,
307        unique_id_elements: Option<&str>,
308        pre_purge: bool,
309        post_purge: bool,
310        format: &str,
311        multi_level_rules: Option<&[MultiLevelRule]>,
312        decompose_rules: Option<&[DecomposeRule]>,
313        sidecar_specs: Option<&[SidecarSpec]>,
314    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
315        log::debug!("Parsing file to disassemble: {}", file_path);
316
317        let file_name = Path::new(file_path)
318            .file_stem()
319            .and_then(|s| s.to_str())
320            .unwrap_or("output");
321        let base_name = Self::output_dir_basename(file_name);
322        let output_path = Path::new(dir_path).join(base_name);
323
324        if Self::should_pre_purge_output(pre_purge, output_path.exists()) {
325            fs::remove_dir_all(&output_path).await.ok();
326        }
327
328        // Capture root key order BEFORE sidecar extraction so the sidecar element
329        // names appear at their original positions in .key_order.json.
330        let pre_extraction_key_order: Option<Vec<String>> =
331            if sidecar_specs.is_some_and(|s| !s.is_empty()) {
332                parse_xml(file_path).await.and_then(|parsed| {
333                    let obj = parsed.as_object()?;
334                    let root_key = obj.keys().find(|k| *k != "?xml")?;
335                    obj.get(root_key)?.as_object().map(|root_obj| {
336                        root_obj
337                            .keys()
338                            .filter(|k| !k.starts_with('@'))
339                            .cloned()
340                            .collect()
341                    })
342                })
343            } else {
344                None
345            };
346
347        // Extract sidecar elements before normal disassembly so the disassembler
348        // sees schema-free XML and does not try to shard the embedded blob.
349        // The original file is never modified; stripped content is written to a
350        // temp file that is deleted after disassembly. Sidecar files are written
351        // into the output directory after disassembly creates it.
352        let extraction_result = if let Some(specs) = sidecar_specs {
353            if !specs.is_empty() {
354                extract_sidecar_elements(file_path, specs).await?
355            } else {
356                None
357            }
358        } else {
359            None
360        };
361
362        let temp_file: Option<tempfile::NamedTempFile>;
363        let disassemble_path: &str;
364        if let Some((xml, _)) = &extraction_result {
365            let mut tmp = tempfile::Builder::new()
366                .suffix(".xml")
367                .tempfile_in(Path::new(file_path).parent().unwrap_or(Path::new(".")))?;
368            tmp.write_all(xml.as_bytes())?;
369            temp_file = Some(tmp);
370            disassemble_path = temp_file
371                .as_ref()
372                .unwrap()
373                .path()
374                .to_str()
375                .unwrap_or(file_path);
376        } else {
377            temp_file = None;
378            disassemble_path = file_path;
379        }
380
381        build_disassembled_files_unified(BuildDisassembledFilesOptions {
382            file_path: disassemble_path,
383            disassembled_path: output_path.to_str().unwrap_or("."),
384            base_name: file_name,
385            post_purge,
386            format,
387            unique_id_elements,
388            strategy,
389            decompose_rules,
390        })
391        .await?;
392
393        drop(temp_file); // deletes the temp file
394
395        // Write sidecar files into the output directory, plus a .sidecars.json
396        // metadata file so reassembly can auto-detect specs without CLI flags.
397        if let Some((_, sidecars)) = &extraction_result {
398            for (_, extension, content, _) in sidecars {
399                let sidecar_path = output_path.join(format!("{}.{}", base_name, extension));
400                fs::write(&sidecar_path, content).await?;
401            }
402            if let Some(specs) = sidecar_specs {
403                // Enrich each spec with the original_format detected at extraction time
404                // so reassembly can convert the sidecar content back to the correct format.
405                let enriched: Vec<SidecarSpec> = specs
406                    .iter()
407                    .map(|spec| {
408                        let original_format = sidecars
409                            .iter()
410                            .find(|(el, _, _, _)| el == &spec.element)
411                            .and_then(|(_, _, _, fmt)| fmt.clone());
412                        SidecarSpec {
413                            element: spec.element.clone(),
414                            extension: spec.extension.clone(),
415                            original_format,
416                        }
417                    })
418                    .collect();
419                if let Ok(json) = serde_json::to_string(&enriched) {
420                    let _ = fs::write(output_path.join(".sidecars.json"), json).await;
421                }
422            }
423        }
424
425        // Overwrite .key_order.json with the pre-extraction order so sidecar
426        // element names appear at their original positions during reassembly.
427        if let Some(full_order) = pre_extraction_key_order {
428            let key_order_path = output_path.join(".key_order.json");
429            if let Ok(json) = serde_json::to_string(&full_order) {
430                let _ = fs::write(&key_order_path, json).await;
431            }
432        }
433
434        // Apply each multi-level rule in order. Each rule walks the same disassembly tree
435        // independently; rules are merged into the shared `.multi_level.json` so reassembly
436        // can replay them in order.
437        if let Some(rules) = multi_level_rules {
438            for rule in rules {
439                self.recursively_disassemble_multi_level(&output_path, rule, format)
440                    .await?;
441            }
442        }
443
444        Ok(())
445    }
446
447    /// Recursively walk the disassembly output; for XML files matching the rule's file_pattern,
448    /// strip the root and re-disassemble with the rule's unique_id_elements.
449    async fn recursively_disassemble_multi_level(
450        &self,
451        dir_path: &Path,
452        rule: &MultiLevelRule,
453        format: &str,
454    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
455        let mut config = crate::xml::multi_level::load_multi_level_config(dir_path)
456            .await
457            .unwrap_or_default();
458
459        let mut stack = vec![dir_path.to_path_buf()];
460        while let Some(current) = stack.pop() {
461            let mut entries = Vec::new();
462            let mut read_dir = fs::read_dir(&current).await?;
463            while let Some(entry) = read_dir.next_entry().await? {
464                entries.push(entry);
465            }
466
467            for entry in entries {
468                let path = entry.path();
469                let path_str = path.to_string_lossy().to_string();
470
471                if path.is_dir() {
472                    stack.push(path);
473                    continue;
474                }
475                // Anything not a directory is processed as a regular file below.
476                {
477                    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
478                    let path_str_check = path.to_string_lossy();
479                    if !Self::file_matches_multi_level_rule(
480                        name,
481                        &path_str_check,
482                        &rule.file_pattern,
483                    ) {
484                        continue;
485                    }
486
487                    let parsed = match parse_xml(&path_str).await {
488                        Some(p) => p,
489                        None => continue,
490                    };
491                    if !Self::has_element_to_strip(&parsed, &rule.root_to_strip) {
492                        continue;
493                    }
494
495                    let wrap_xmlns = capture_xmlns_from_root(&parsed).unwrap_or_default();
496
497                    let stripped_xml = match strip_root_and_build_xml(&parsed, &rule.root_to_strip)
498                    {
499                        Some(xml) => xml,
500                        None => continue,
501                    };
502
503                    fs::write(&path, stripped_xml).await?;
504
505                    let file_stem = path
506                        .file_stem()
507                        .and_then(|s| s.to_str())
508                        .unwrap_or("output");
509                    let output_dir_name = Self::output_dir_basename(file_stem);
510                    let parent = path.parent().unwrap_or(dir_path);
511                    let second_level_output = parent.join(output_dir_name);
512
513                    build_disassembled_files_unified(BuildDisassembledFilesOptions {
514                        file_path: &path_str,
515                        disassembled_path: second_level_output.to_str().unwrap_or("."),
516                        base_name: output_dir_name,
517                        post_purge: true,
518                        format,
519                        unique_id_elements: Some(&rule.unique_id_elements),
520                        strategy: "unique-id",
521                        decompose_rules: None,
522                    })
523                    .await?;
524
525                    // Find an existing entry for this rule by (file_pattern, root_to_strip).
526                    // Multiple rules may co-exist in `.multi_level.json` (one per logical
527                    // segment); per-rule deduplication keeps each one a singleton.
528                    let existing_idx = config
529                        .rules
530                        .iter()
531                        .position(|r| Self::rules_have_same_identity(r, rule));
532                    match existing_idx {
533                        None => {
534                            let wrap_root = Self::root_element_name_from_parsed(
535                                &parsed,
536                                &rule.wrap_root_element,
537                            );
538                            let path_segment = if rule.path_segment.is_empty() {
539                                path_segment_from_file_pattern(&rule.file_pattern)
540                            } else {
541                                rule.path_segment.clone()
542                            };
543                            let stored_xmlns = if rule.wrap_xmlns.is_empty() {
544                                wrap_xmlns
545                            } else {
546                                rule.wrap_xmlns.clone()
547                            };
548                            config.rules.push(MultiLevelRule {
549                                file_pattern: rule.file_pattern.clone(),
550                                root_to_strip: rule.root_to_strip.clone(),
551                                unique_id_elements: rule.unique_id_elements.clone(),
552                                path_segment,
553                                // Persist document root (e.g. LoyaltyProgramSetup) so reassembly uses it
554                                // as root with xmlns; path_segment is the inner wrapper in each file.
555                                wrap_root_element: wrap_root,
556                                wrap_xmlns: stored_xmlns,
557                            });
558                        }
559                        Some(idx) => {
560                            // Backfill xmlns from the source if we didn't have one yet; otherwise
561                            // leave the existing entry alone (the first observed file wins).
562                            if config.rules[idx].wrap_xmlns.is_empty() {
563                                config.rules[idx].wrap_xmlns = wrap_xmlns;
564                            }
565                        }
566                    }
567                }
568            }
569        }
570
571        if !config.rules.is_empty() {
572            save_multi_level_config(dir_path, &config).await?;
573        }
574
575        Ok(())
576    }
577}
578
579impl Default for DisassembleXmlFileHandler {
580    fn default() -> Self {
581        Self::new()
582    }
583}
584
585/// Extract the text content of named XML elements in memory and return the
586/// stripped XML plus the sidecar payloads. The caller is responsible for
587/// writing sidecar files; the original file on disk is never modified.
588///
589/// Returns `None` when no matching element was found.
590/// Returns `Some((stripped_xml, sidecars))` where each sidecar entry is
591/// `(element, extension, content, original_format)`.
592///
593/// Quick-xml's parser automatically unescapes entity references in text
594/// content, so the sidecar receives the raw, unescaped bytes of the embedded
595/// document — exactly what you'd write by hand.
596async fn extract_sidecar_elements(
597    file_path: &str,
598    specs: &[SidecarSpec],
599) -> Result<
600    Option<(String, Vec<(String, String, String, Option<String>)>)>,
601    Box<dyn std::error::Error + Send + Sync>,
602> {
603    let raw = fs::read_to_string(file_path).await?;
604    let Some(mut parsed) = parse_xml_from_str(&raw, file_path) else {
605        return Ok(None);
606    };
607
608    // parse_xml_cdata drops the XML declaration; recover it from the raw bytes and
609    // re-inject so build_xml_string emits it in the temp file. Without this the
610    // shards produced by build_disassembled_files_unified lack the declaration and
611    // the reassembler falls back to a synthetic default instead of the original.
612    if let (Some(obj), Some(decl)) = (
613        parsed.as_object_mut(),
614        extract_xml_declaration_from_raw(&raw),
615    ) {
616        obj.insert("?xml".to_string(), decl);
617    }
618
619    let root_key = parsed
620        .as_object()
621        .and_then(|o| o.keys().find(|k| *k != "?xml").cloned());
622    let Some(root_key) = root_key else {
623        return Ok(None);
624    };
625
626    // (element, extension, content, original_format)
627    let mut sidecars: Vec<(String, String, String, Option<String>)> = Vec::new();
628    if let Some(root_val) = parsed.as_object_mut().and_then(|o| o.get_mut(&root_key)) {
629        if let Some(root_obj) = root_val.as_object_mut() {
630            for spec in specs {
631                let Some(elem_val) = root_obj.remove(&spec.element) else {
632                    continue;
633                };
634                // The XML parser always yields Value::Object for element values;
635                // non-Object shapes are unexpected — restore and skip to preserve data.
636                let text = match &elem_val {
637                    serde_json::Value::Object(obj) => obj
638                        .get("#text")
639                        .and_then(|v| v.as_str())
640                        .unwrap_or("")
641                        .to_string(),
642                    _ => {
643                        root_obj.insert(spec.element.clone(), elem_val);
644                        continue;
645                    }
646                };
647                let original_format = detect_content_format(&text);
648                sidecars.push((
649                    spec.element.clone(),
650                    spec.extension.clone(),
651                    convert_sidecar_content(&text, &spec.extension),
652                    original_format,
653                ));
654            }
655        }
656    }
657
658    if sidecars.is_empty() {
659        Ok(None)
660    } else {
661        Ok(Some((build_xml_string(&parsed), sidecars)))
662    }
663}
664
665/// Detect whether `text` is JSON or YAML. Returns `Some("json")`, `Some("yaml")`,
666/// or `None` for content that cannot be parsed as either.
667fn detect_content_format(text: &str) -> Option<String> {
668    if serde_json::from_str::<serde_json::Value>(text).is_ok() {
669        Some("json".to_string())
670    } else if serde_yaml::from_str::<serde_yaml::Value>(text).is_ok() {
671        Some("yaml".to_string())
672    } else {
673        None
674    }
675}
676
677/// Convert raw text extracted from an XML element to the format implied by `extension`.
678///
679/// - `json` → parse as YAML (superset of JSON) then re-emit as pretty JSON
680/// - `yaml` / `yml` → convert only when source is strict JSON; YAML content passes through
681///   unchanged so quote style, indentation, and formatting are preserved on round-trip
682/// - anything else → pass through unchanged
683///
684/// Falls back to raw text with a warning when the content cannot be parsed.
685fn convert_sidecar_content(text: &str, extension: &str) -> String {
686    match extension.to_ascii_lowercase().as_str() {
687        "json" => {
688            // Parse into serde_yaml::Value first (the native representation) then
689            // serialize to JSON. Going directly to serde_json::Value fails for
690            // complex YAML in serde_yaml 0.9 due to cross-crate numeric type conflicts.
691            match serde_yaml::from_str::<serde_yaml::Value>(text) {
692                Ok(val) => match serde_json::to_string_pretty(&val) {
693                    Ok(json) => json,
694                    Err(e) => {
695                        log::warn!("sidecar: JSON serialization failed ({e}); using raw text");
696                        text.to_string()
697                    }
698                },
699                Err(e) => {
700                    log::warn!(
701                        "sidecar: could not parse content for JSON conversion ({e}); using raw text"
702                    );
703                    text.to_string()
704                }
705            }
706        }
707        "yaml" | "yml" => {
708            // Only convert when the source is strict JSON — YAML content passes through
709            // unchanged to avoid re-serialization changing quote style or formatting.
710            if serde_json::from_str::<serde_json::Value>(text).is_ok() {
711                match serde_yaml::from_str::<serde_yaml::Value>(text)
712                    .ok()
713                    .and_then(|v| serde_yaml::to_string(&v).ok())
714                {
715                    Some(yaml) => yaml,
716                    None => {
717                        log::warn!("sidecar: YAML serialization failed; using raw text");
718                        text.to_string()
719                    }
720                }
721            } else {
722                text.to_string()
723            }
724        }
725        _ => text.to_string(),
726    }
727}
728
729#[cfg(test)]
730mod tests {
731    use super::*;
732
733    #[test]
734    #[allow(clippy::default_constructed_unit_structs)]
735    fn disassemble_handler_default_equals_new() {
736        let _ = DisassembleXmlFileHandler::default();
737    }
738
739    #[test]
740    fn is_xml_file_matches_case_insensitively() {
741        assert!(DisassembleXmlFileHandler::is_xml_file("foo.xml"));
742        assert!(DisassembleXmlFileHandler::is_xml_file("BAR.XML"));
743        assert!(!DisassembleXmlFileHandler::is_xml_file("foo.txt"));
744    }
745
746    #[test]
747    fn posix_path_converts_backslashes() {
748        assert_eq!(
749            DisassembleXmlFileHandler::posix_path(r"C:\Users\name\file.xml"),
750            "C:/Users/name/file.xml"
751        );
752    }
753
754    #[tokio::test]
755    async fn load_ignore_rules_noop_when_path_missing() {
756        let mut handler = DisassembleXmlFileHandler::new();
757        handler
758            .load_ignore_rules("/definitely/does/not/exist/.ignore")
759            .await;
760        assert!(handler.ign.is_none());
761    }
762
763    #[tokio::test]
764    async fn load_ignore_rules_builds_matcher() {
765        let temp = tempfile::tempdir().unwrap();
766        let path = temp.path().join(".ignore");
767        tokio::fs::write(&path, "*.xml\n").await.unwrap();
768        let mut handler = DisassembleXmlFileHandler::new();
769        handler.load_ignore_rules(path.to_str().unwrap()).await;
770        assert!(handler.ign.is_some());
771        assert!(handler.is_ignored("file.xml"));
772        assert!(!handler.is_ignored("file.txt"));
773    }
774
775    #[test]
776    fn is_ignored_default_false_without_rules() {
777        let handler = DisassembleXmlFileHandler::new();
778        assert!(!handler.is_ignored("some/path.xml"));
779    }
780
781    #[test]
782    fn output_dir_basename_strips_only_last_dot_segment() {
783        // Plain Salesforce-style metadata: strip the `.<suffix>-meta` tail.
784        assert_eq!(
785            DisassembleXmlFileHandler::output_dir_basename("HR_Admin.permissionset-meta"),
786            "HR_Admin"
787        );
788        assert_eq!(
789            DisassembleXmlFileHandler::output_dir_basename("Get_Info.flow-meta"),
790            "Get_Info"
791        );
792    }
793
794    #[test]
795    fn output_dir_basename_preserves_dotted_full_names() {
796        // Approval processes are named `<sobject>.<process>` which yields a stem containing
797        // *two* dots. The old `split('.').next()` returned just `<sobject>`, causing
798        // distinct processes for the same sobject to land in the same output directory and
799        // silently merge during reassembly. The new behaviour keeps the dotted fullName.
800        assert_eq!(
801            DisassembleXmlFileHandler::output_dir_basename(
802                "Account_Merge__c.New_Account_Merges_2.approvalProcess-meta"
803            ),
804            "Account_Merge__c.New_Account_Merges_2"
805        );
806        assert_eq!(
807            DisassembleXmlFileHandler::output_dir_basename(
808                "Account_Merge__c.New_Account_Merges_3.approvalProcess-meta"
809            ),
810            "Account_Merge__c.New_Account_Merges_3"
811        );
812        // Quick actions follow the same `<sobject>.<action>` pattern.
813        assert_eq!(
814            DisassembleXmlFileHandler::output_dir_basename("Case.LogACall.quickAction-meta"),
815            "Case.LogACall"
816        );
817    }
818
819    #[test]
820    fn is_processable_xml_entry_true_only_for_regular_xml_files() {
821        // Pin both the `is_file && is_xml_file` conjunction and the
822        // outer `!` at the call site. All four quadrants of
823        // (is_file, is_xml) are covered.
824        assert!(DisassembleXmlFileHandler::is_processable_xml_entry(
825            true, "foo.xml"
826        ));
827        assert!(!DisassembleXmlFileHandler::is_processable_xml_entry(
828            false, "foo.xml"
829        ));
830        assert!(!DisassembleXmlFileHandler::is_processable_xml_entry(
831            true, "foo.txt"
832        ));
833        assert!(!DisassembleXmlFileHandler::is_processable_xml_entry(
834            false, "foo.txt"
835        ));
836    }
837
838    #[test]
839    fn should_pre_purge_output_requires_both_flag_and_existing_dir() {
840        // `pre_purge=true` alone must not delete a missing directory
841        // (that's a benign no-op, not an error); an existing directory
842        // alone must not be deleted unless the caller asked for purge.
843        assert!(DisassembleXmlFileHandler::should_pre_purge_output(
844            true, true
845        ));
846        assert!(!DisassembleXmlFileHandler::should_pre_purge_output(
847            true, false
848        ));
849        assert!(!DisassembleXmlFileHandler::should_pre_purge_output(
850            false, true
851        ));
852        assert!(!DisassembleXmlFileHandler::should_pre_purge_output(
853            false, false
854        ));
855    }
856
857    #[test]
858    fn file_matches_multi_level_rule_requires_xml_extension() {
859        // Non-`.xml` files are skipped regardless of pattern membership.
860        assert!(!DisassembleXmlFileHandler::file_matches_multi_level_rule(
861            "Foo.txt",
862            "/dir/Foo.txt",
863            "Foo"
864        ));
865    }
866
867    #[test]
868    fn file_matches_multi_level_rule_when_filename_contains_pattern() {
869        assert!(DisassembleXmlFileHandler::file_matches_multi_level_rule(
870            "MyPattern.xml",
871            "/dir/MyPattern.xml",
872            "MyPattern"
873        ));
874    }
875
876    #[test]
877    fn file_matches_multi_level_rule_when_only_full_path_contains_pattern() {
878        // The pattern may live in a parent directory name even if the
879        // bare file name is something generic like `meta.xml`.
880        assert!(DisassembleXmlFileHandler::file_matches_multi_level_rule(
881            "child.xml",
882            "/parentPattern/child.xml",
883            "parentPattern"
884        ));
885    }
886
887    #[test]
888    fn file_matches_multi_level_rule_false_when_pattern_absent_everywhere() {
889        assert!(!DisassembleXmlFileHandler::file_matches_multi_level_rule(
890            "Foo.xml",
891            "/dir/Foo.xml",
892            "MissingPattern"
893        ));
894    }
895
896    #[test]
897    fn has_element_to_strip_when_root_key_matches() {
898        let parsed = serde_json::json!({"Foo": {"a": "b"}});
899        assert!(DisassembleXmlFileHandler::has_element_to_strip(
900            &parsed, "Foo"
901        ));
902    }
903
904    #[test]
905    fn has_element_to_strip_when_root_contains_target_child() {
906        let parsed = serde_json::json!({"Foo": {"Bar": {"a": "b"}}});
907        assert!(DisassembleXmlFileHandler::has_element_to_strip(
908            &parsed, "Bar"
909        ));
910    }
911
912    #[test]
913    fn has_element_to_strip_false_when_target_absent() {
914        let parsed = serde_json::json!({"Foo": {"a": "b"}});
915        assert!(!DisassembleXmlFileHandler::has_element_to_strip(
916            &parsed, "Missing"
917        ));
918    }
919
920    #[test]
921    fn has_element_to_strip_false_for_non_object_or_decl_only() {
922        assert!(!DisassembleXmlFileHandler::has_element_to_strip(
923            &serde_json::json!("primitive"),
924            "Foo"
925        ));
926        assert!(!DisassembleXmlFileHandler::has_element_to_strip(
927            &serde_json::json!({"?xml": {}}),
928            "Foo"
929        ));
930    }
931
932    fn rule(pattern: &str, root: &str) -> MultiLevelRule {
933        MultiLevelRule {
934            file_pattern: pattern.to_string(),
935            root_to_strip: root.to_string(),
936            unique_id_elements: String::new(),
937            path_segment: String::new(),
938            wrap_root_element: String::new(),
939            wrap_xmlns: String::new(),
940        }
941    }
942
943    #[test]
944    fn rules_share_identity_when_pattern_and_root_match() {
945        assert!(DisassembleXmlFileHandler::rules_have_same_identity(
946            &rule("p", "R"),
947            &rule("p", "R"),
948        ));
949    }
950
951    #[test]
952    fn rules_differ_when_file_pattern_differs() {
953        assert!(!DisassembleXmlFileHandler::rules_have_same_identity(
954            &rule("p1", "R"),
955            &rule("p2", "R"),
956        ));
957    }
958
959    #[test]
960    fn rules_differ_when_root_to_strip_differs() {
961        assert!(!DisassembleXmlFileHandler::rules_have_same_identity(
962            &rule("p", "R1"),
963            &rule("p", "R2"),
964        ));
965    }
966
967    #[test]
968    fn root_element_name_finds_first_non_declaration_key() {
969        let parsed = serde_json::json!({"?xml": {}, "MyRoot": {"a": "b"}});
970        assert_eq!(
971            DisassembleXmlFileHandler::root_element_name_from_parsed(&parsed, "fallback"),
972            "MyRoot"
973        );
974    }
975
976    #[test]
977    fn root_element_name_falls_back_when_only_declaration_present() {
978        let parsed = serde_json::json!({"?xml": {}});
979        assert_eq!(
980            DisassembleXmlFileHandler::root_element_name_from_parsed(&parsed, "FallbackRoot"),
981            "FallbackRoot"
982        );
983    }
984
985    #[test]
986    fn root_element_name_falls_back_for_non_object() {
987        let parsed = serde_json::json!("primitive");
988        assert_eq!(
989            DisassembleXmlFileHandler::root_element_name_from_parsed(&parsed, "Fb"),
990            "Fb"
991        );
992    }
993
994    #[test]
995    fn output_dir_basename_no_dot_returns_stem_unchanged() {
996        // Stems without any dot are passed through verbatim (no extension to strip).
997        assert_eq!(DisassembleXmlFileHandler::output_dir_basename("Foo"), "Foo");
998        assert_eq!(DisassembleXmlFileHandler::output_dir_basename(""), "");
999    }
1000
1001    #[test]
1002    fn convert_sidecar_content_yaml_to_json() {
1003        // Uses nested YAML matching the fixture shape (quoted strings, string-keyed
1004        // mappings, dotted version strings) to catch serde_yaml→serde_json cross-crate
1005        // numeric type failures that affect simple-key tests but not complex YAML.
1006        let yaml = "openapi: 3.0.1\ninfo:\n  title: \"@AuraEnabled Apex method APIs\"\n  version: 1.0.0\npaths:\n  /uploadFile:\n    post:\n      operationId: uploadFile\n      responses:\n        \"200\":\n          description: OK\n";
1007        let out = convert_sidecar_content(yaml, "json");
1008        let val: serde_json::Value = serde_json::from_str(&out).expect("output must be valid JSON");
1009        assert_eq!(val["openapi"], "3.0.1");
1010        assert_eq!(val["info"]["title"], "@AuraEnabled Apex method APIs");
1011        assert_eq!(val["info"]["version"], "1.0.0");
1012        assert_eq!(
1013            val["paths"]["/uploadFile"]["post"]["operationId"],
1014            "uploadFile"
1015        );
1016    }
1017
1018    #[test]
1019    fn convert_sidecar_content_json_to_yaml() {
1020        let json = r#"{"key":"value","num":42}"#;
1021        let out = convert_sidecar_content(json, "yaml");
1022        // Output must be YAML, not raw JSON — if the yaml arm were deleted, the `_ =>` fallback
1023        // would return the original JSON string, which is also parseable as YAML and would fool
1024        // a parse-only assertion. Asserting strict-JSON parse fails pins the arm deletion mutant.
1025        assert!(
1026            serde_json::from_str::<serde_json::Value>(&out).is_err(),
1027            "output must be YAML format, not raw JSON: {out}"
1028        );
1029        let val: serde_json::Value = serde_yaml::from_str(&out).expect("output must be valid YAML");
1030        assert_eq!(val["key"], "value");
1031        assert_eq!(val["num"], 42);
1032    }
1033
1034    #[test]
1035    fn convert_sidecar_content_json_to_json_prettifies() {
1036        let compact = r#"{"a":1}"#;
1037        let out = convert_sidecar_content(compact, "json");
1038        // Pretty JSON has newlines and indentation.
1039        assert!(out.contains('\n'), "expected pretty JSON, got: {out}");
1040        let val: serde_json::Value = serde_json::from_str(&out).unwrap();
1041        assert_eq!(val["a"], 1);
1042    }
1043
1044    #[test]
1045    fn convert_sidecar_content_unknown_extension_passes_through() {
1046        let raw = "arbitrary: content: here";
1047        assert_eq!(convert_sidecar_content(raw, "txt"), raw);
1048        assert_eq!(convert_sidecar_content(raw, ""), raw);
1049    }
1050
1051    #[test]
1052    fn convert_sidecar_content_malformed_falls_back_to_raw() {
1053        // Tabs inside a YAML flow scalar make it unparseable as YAML/JSON.
1054        let bad = "{{{{ not valid json or yaml at all >>>>>";
1055        assert_eq!(convert_sidecar_content(bad, "json"), bad);
1056        assert_eq!(convert_sidecar_content(bad, "yaml"), bad);
1057    }
1058
1059    #[test]
1060    fn convert_sidecar_content_yml_extension_same_as_yaml() {
1061        let json = r#"{"x":true}"#;
1062        let out = convert_sidecar_content(json, "yml");
1063        let val: serde_json::Value = serde_yaml::from_str(&out).unwrap();
1064        assert_eq!(val["x"], true);
1065    }
1066
1067    #[test]
1068    fn convert_sidecar_content_yaml_passes_through_unchanged() {
1069        // YAML content with a yaml extension must NOT be re-serialized — serde_yaml changes
1070        // double quotes to single quotes, breaking byte-for-byte round-trip assertions.
1071        let yaml = "title: \"@AuraEnabled Apex method APIs\"\nversion: 1.0.0\n";
1072        assert_eq!(convert_sidecar_content(yaml, "yaml"), yaml);
1073        assert_eq!(convert_sidecar_content(yaml, "yml"), yaml);
1074    }
1075}