Skip to main content

config_disassembler/xml/handlers/
disassemble.rs

1//! Disassemble XML file handler.
2
3use crate::xml::builders::build_disassembled_files_unified;
4use crate::xml::multi_level::{
5    capture_xmlns_from_root, path_segment_from_file_pattern, save_multi_level_config,
6    strip_root_and_build_xml,
7};
8use crate::xml::parsers::parse_xml;
9use crate::xml::types::{BuildDisassembledFilesOptions, DecomposeRule, MultiLevelRule};
10use crate::xml::utils::normalize_path_unix;
11use ignore::gitignore::GitignoreBuilder;
12use std::path::Path;
13use tokio::fs;
14
15pub struct DisassembleXmlFileHandler {
16    ign: Option<ignore::gitignore::Gitignore>,
17}
18
19impl DisassembleXmlFileHandler {
20    pub fn new() -> Self {
21        Self { ign: None }
22    }
23
24    async fn load_ignore_rules(&mut self, ignore_path: &str) {
25        let path = Path::new(ignore_path);
26        let content = match fs::read_to_string(path).await {
27            Ok(c) => c,
28            Err(_) => return,
29        };
30        let root = path.parent().unwrap_or(Path::new("."));
31        let mut builder = GitignoreBuilder::new(root);
32        for line in content.lines() {
33            let _ = builder.add_line(None, line);
34        }
35        // `GitignoreBuilder::build` only fails on unlikely I/O errors; treat as absent rules.
36        self.ign = builder.build().ok();
37    }
38
39    fn posix_path(path: &str) -> String {
40        path.replace('\\', "/")
41    }
42
43    fn is_xml_file(file_path: &str) -> bool {
44        file_path.to_lowercase().ends_with(".xml")
45    }
46
47    fn is_ignored(&self, path: &str) -> bool {
48        self.ign
49            .as_ref()
50            .map(|ign| ign.matched(path, false).is_ignore())
51            .unwrap_or(false)
52    }
53
54    /// Derive the disassembled-output directory name from a file stem.
55    ///
56    /// We strip only the trailing extension-like segment (everything after the **last** `.`),
57    /// so `HR_Admin.permissionset-meta` collapses to `HR_Admin` while
58    /// `Account.MyApprovalProcess.approvalProcess-meta` collapses to `Account.MyApprovalProcess`.
59    /// Splitting at the *first* dot — the previous behaviour — was lossy for metadata types
60    /// whose fullName itself contains a dot (e.g. Salesforce approval processes, quick actions,
61    /// custom-metadata records) because two files like `A.X.foo-meta.xml` and `A.Y.foo-meta.xml`
62    /// both resolved to `A/`, silently merging unrelated components.
63    fn output_dir_basename(file_stem: &str) -> &str {
64        file_stem
65            .rsplit_once('.')
66            .map(|(prefix, _)| prefix)
67            .unwrap_or(file_stem)
68    }
69
70    #[allow(clippy::too_many_arguments)]
71    pub async fn disassemble(
72        &mut self,
73        file_path: &str,
74        unique_id_elements: Option<&str>,
75        strategy: Option<&str>,
76        pre_purge: bool,
77        post_purge: bool,
78        ignore_path: &str,
79        format: &str,
80        multi_level_rules: Option<&[MultiLevelRule]>,
81        decompose_rules: Option<&[DecomposeRule]>,
82    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
83        let strategy = strategy.unwrap_or("unique-id");
84        let strategy = if ["unique-id", "grouped-by-tag"].contains(&strategy) {
85            strategy
86        } else {
87            log::warn!(
88                "Unsupported strategy \"{}\", defaulting to \"unique-id\".",
89                strategy
90            );
91            "unique-id"
92        };
93
94        self.load_ignore_rules(ignore_path).await;
95
96        let path = Path::new(file_path);
97        let meta = fs::metadata(path).await?;
98        let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
99        let relative_path = path.strip_prefix(&cwd).unwrap_or(path).to_string_lossy();
100        let relative_path = Self::posix_path(&relative_path);
101
102        // Treat an empty rules slice as "no multi-level".
103        let multi_level_rules = multi_level_rules.filter(|rules| !rules.is_empty());
104
105        if meta.is_file() {
106            self.handle_file(
107                file_path,
108                &relative_path,
109                unique_id_elements,
110                strategy,
111                pre_purge,
112                post_purge,
113                format,
114                multi_level_rules,
115                decompose_rules,
116            )
117            .await?;
118        } else {
119            // Anything that isn't a regular file is treated as a directory; fs::metadata on
120            // the caller already errored out if the path didn't exist.
121            self.handle_directory(
122                file_path,
123                unique_id_elements,
124                strategy,
125                pre_purge,
126                post_purge,
127                format,
128                multi_level_rules,
129                decompose_rules,
130            )
131            .await?;
132        }
133
134        Ok(())
135    }
136
137    #[allow(clippy::too_many_arguments)]
138    async fn handle_file(
139        &self,
140        file_path: &str,
141        relative_path: &str,
142        unique_id_elements: Option<&str>,
143        strategy: &str,
144        pre_purge: bool,
145        post_purge: bool,
146        format: &str,
147        multi_level_rules: Option<&[MultiLevelRule]>,
148        decompose_rules: Option<&[DecomposeRule]>,
149    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
150        let resolved = Path::new(file_path)
151            .canonicalize()
152            .unwrap_or_else(|_| Path::new(file_path).to_path_buf());
153        let resolved_str = normalize_path_unix(&resolved.to_string_lossy());
154
155        if !Self::is_xml_file(&resolved_str) {
156            log::error!(
157                "The file path provided is not an XML file: {}",
158                resolved_str
159            );
160            return Ok(());
161        }
162
163        if self.is_ignored(relative_path) {
164            log::warn!("File ignored by ignore rules: {}", resolved_str);
165            return Ok(());
166        }
167
168        let dir_path = resolved.parent().unwrap_or(Path::new("."));
169        let dir_path_str = normalize_path_unix(&dir_path.to_string_lossy());
170        self.process_file(
171            &dir_path_str,
172            strategy,
173            &resolved_str,
174            unique_id_elements,
175            pre_purge,
176            post_purge,
177            format,
178            multi_level_rules,
179            decompose_rules,
180        )
181        .await
182    }
183
184    #[allow(clippy::too_many_arguments)]
185    async fn handle_directory(
186        &self,
187        dir_path: &str,
188        unique_id_elements: Option<&str>,
189        strategy: &str,
190        pre_purge: bool,
191        post_purge: bool,
192        format: &str,
193        multi_level_rules: Option<&[MultiLevelRule]>,
194        decompose_rules: Option<&[DecomposeRule]>,
195    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
196        let dir_path = normalize_path_unix(dir_path);
197        let mut entries = fs::read_dir(&dir_path).await?;
198        let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
199
200        while let Some(entry) = entries.next_entry().await? {
201            let sub_path = entry.path();
202            let sub_file_path = sub_path.to_string_lossy();
203            let relative_sub = sub_path
204                .strip_prefix(&cwd)
205                .unwrap_or(&sub_path)
206                .to_string_lossy();
207            let relative_sub = Self::posix_path(&relative_sub);
208
209            if !(sub_path.is_file() && Self::is_xml_file(&sub_file_path)) {
210                continue;
211            }
212            if self.is_ignored(&relative_sub) {
213                log::warn!("File ignored by ignore rules: {}", sub_file_path);
214                continue;
215            }
216            let sub_file_path_norm = normalize_path_unix(&sub_file_path);
217            self.process_file(
218                &dir_path,
219                strategy,
220                &sub_file_path_norm,
221                unique_id_elements,
222                pre_purge,
223                post_purge,
224                format,
225                multi_level_rules,
226                decompose_rules,
227            )
228            .await?;
229        }
230        Ok(())
231    }
232
233    #[allow(clippy::too_many_arguments)]
234    async fn process_file(
235        &self,
236        dir_path: &str,
237        strategy: &str,
238        file_path: &str,
239        unique_id_elements: Option<&str>,
240        pre_purge: bool,
241        post_purge: bool,
242        format: &str,
243        multi_level_rules: Option<&[MultiLevelRule]>,
244        decompose_rules: Option<&[DecomposeRule]>,
245    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
246        log::debug!("Parsing file to disassemble: {}", file_path);
247
248        let file_name = Path::new(file_path)
249            .file_stem()
250            .and_then(|s| s.to_str())
251            .unwrap_or("output");
252        let base_name = Self::output_dir_basename(file_name);
253        let output_path = Path::new(dir_path).join(base_name);
254
255        if pre_purge && output_path.exists() {
256            fs::remove_dir_all(&output_path).await.ok();
257        }
258
259        build_disassembled_files_unified(BuildDisassembledFilesOptions {
260            file_path,
261            disassembled_path: output_path.to_str().unwrap_or("."),
262            base_name: file_name,
263            post_purge,
264            format,
265            unique_id_elements,
266            strategy,
267            decompose_rules,
268        })
269        .await?;
270
271        // Apply each multi-level rule in order. Each rule walks the same disassembly tree
272        // independently; rules are merged into the shared `.multi_level.json` so reassembly
273        // can replay them in order.
274        if let Some(rules) = multi_level_rules {
275            for rule in rules {
276                self.recursively_disassemble_multi_level(&output_path, rule, format)
277                    .await?;
278            }
279        }
280
281        Ok(())
282    }
283
284    /// Recursively walk the disassembly output; for XML files matching the rule's file_pattern,
285    /// strip the root and re-disassemble with the rule's unique_id_elements.
286    async fn recursively_disassemble_multi_level(
287        &self,
288        dir_path: &Path,
289        rule: &MultiLevelRule,
290        format: &str,
291    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
292        let mut config = crate::xml::multi_level::load_multi_level_config(dir_path)
293            .await
294            .unwrap_or_default();
295
296        let mut stack = vec![dir_path.to_path_buf()];
297        while let Some(current) = stack.pop() {
298            let mut entries = Vec::new();
299            let mut read_dir = fs::read_dir(&current).await?;
300            while let Some(entry) = read_dir.next_entry().await? {
301                entries.push(entry);
302            }
303
304            for entry in entries {
305                let path = entry.path();
306                let path_str = path.to_string_lossy().to_string();
307
308                if path.is_dir() {
309                    stack.push(path);
310                    continue;
311                }
312                // Anything not a directory is processed as a regular file below.
313                {
314                    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
315                    let path_str_check = path.to_string_lossy();
316                    if !name.ends_with(".xml")
317                        || (!name.contains(&rule.file_pattern)
318                            && !path_str_check.contains(&rule.file_pattern))
319                    {
320                        continue;
321                    }
322
323                    let parsed = match parse_xml(&path_str).await {
324                        Some(p) => p,
325                        None => continue,
326                    };
327                    let has_element_to_strip = parsed
328                        .as_object()
329                        .and_then(|o| {
330                            let root_key = o.keys().find(|k| *k != "?xml")?;
331                            let root_val = o.get(root_key)?.as_object()?;
332                            Some(
333                                root_key == &rule.root_to_strip
334                                    || root_val.contains_key(&rule.root_to_strip),
335                            )
336                        })
337                        .unwrap_or(false);
338                    if !has_element_to_strip {
339                        continue;
340                    }
341
342                    let wrap_xmlns = capture_xmlns_from_root(&parsed).unwrap_or_default();
343
344                    let stripped_xml = match strip_root_and_build_xml(&parsed, &rule.root_to_strip)
345                    {
346                        Some(xml) => xml,
347                        None => continue,
348                    };
349
350                    fs::write(&path, stripped_xml).await?;
351
352                    let file_stem = path
353                        .file_stem()
354                        .and_then(|s| s.to_str())
355                        .unwrap_or("output");
356                    let output_dir_name = Self::output_dir_basename(file_stem);
357                    let parent = path.parent().unwrap_or(dir_path);
358                    let second_level_output = parent.join(output_dir_name);
359
360                    build_disassembled_files_unified(BuildDisassembledFilesOptions {
361                        file_path: &path_str,
362                        disassembled_path: second_level_output.to_str().unwrap_or("."),
363                        base_name: output_dir_name,
364                        post_purge: true,
365                        format,
366                        unique_id_elements: Some(&rule.unique_id_elements),
367                        strategy: "unique-id",
368                        decompose_rules: None,
369                    })
370                    .await?;
371
372                    // Find an existing entry for this rule by (file_pattern, root_to_strip).
373                    // Multiple rules may co-exist in `.multi_level.json` (one per logical
374                    // segment); per-rule deduplication keeps each one a singleton.
375                    let existing_idx = config.rules.iter().position(|r| {
376                        r.file_pattern == rule.file_pattern && r.root_to_strip == rule.root_to_strip
377                    });
378                    match existing_idx {
379                        None => {
380                            let wrap_root = parsed
381                                .as_object()
382                                .and_then(|o| o.keys().find(|k| *k != "?xml").cloned())
383                                .unwrap_or_else(|| rule.wrap_root_element.clone());
384                            let path_segment = if rule.path_segment.is_empty() {
385                                path_segment_from_file_pattern(&rule.file_pattern)
386                            } else {
387                                rule.path_segment.clone()
388                            };
389                            let stored_xmlns = if rule.wrap_xmlns.is_empty() {
390                                wrap_xmlns
391                            } else {
392                                rule.wrap_xmlns.clone()
393                            };
394                            config.rules.push(MultiLevelRule {
395                                file_pattern: rule.file_pattern.clone(),
396                                root_to_strip: rule.root_to_strip.clone(),
397                                unique_id_elements: rule.unique_id_elements.clone(),
398                                path_segment,
399                                // Persist document root (e.g. LoyaltyProgramSetup) so reassembly uses it
400                                // as root with xmlns; path_segment is the inner wrapper in each file.
401                                wrap_root_element: wrap_root,
402                                wrap_xmlns: stored_xmlns,
403                            });
404                        }
405                        Some(idx) => {
406                            // Backfill xmlns from the source if we didn't have one yet; otherwise
407                            // leave the existing entry alone (the first observed file wins).
408                            if config.rules[idx].wrap_xmlns.is_empty() {
409                                config.rules[idx].wrap_xmlns = wrap_xmlns;
410                            }
411                        }
412                    }
413                }
414            }
415        }
416
417        if !config.rules.is_empty() {
418            save_multi_level_config(dir_path, &config).await?;
419        }
420
421        Ok(())
422    }
423}
424
425impl Default for DisassembleXmlFileHandler {
426    fn default() -> Self {
427        Self::new()
428    }
429}
430
431#[cfg(test)]
432mod tests {
433    use super::*;
434
435    #[test]
436    #[allow(clippy::default_constructed_unit_structs)]
437    fn disassemble_handler_default_equals_new() {
438        let _ = DisassembleXmlFileHandler::default();
439    }
440
441    #[test]
442    fn is_xml_file_matches_case_insensitively() {
443        assert!(DisassembleXmlFileHandler::is_xml_file("foo.xml"));
444        assert!(DisassembleXmlFileHandler::is_xml_file("BAR.XML"));
445        assert!(!DisassembleXmlFileHandler::is_xml_file("foo.txt"));
446    }
447
448    #[test]
449    fn posix_path_converts_backslashes() {
450        assert_eq!(
451            DisassembleXmlFileHandler::posix_path(r"C:\Users\name\file.xml"),
452            "C:/Users/name/file.xml"
453        );
454    }
455
456    #[tokio::test]
457    async fn load_ignore_rules_noop_when_path_missing() {
458        let mut handler = DisassembleXmlFileHandler::new();
459        handler
460            .load_ignore_rules("/definitely/does/not/exist/.ignore")
461            .await;
462        assert!(handler.ign.is_none());
463    }
464
465    #[tokio::test]
466    async fn load_ignore_rules_builds_matcher() {
467        let temp = tempfile::tempdir().unwrap();
468        let path = temp.path().join(".ignore");
469        tokio::fs::write(&path, "*.xml\n").await.unwrap();
470        let mut handler = DisassembleXmlFileHandler::new();
471        handler.load_ignore_rules(path.to_str().unwrap()).await;
472        assert!(handler.ign.is_some());
473        assert!(handler.is_ignored("file.xml"));
474        assert!(!handler.is_ignored("file.txt"));
475    }
476
477    #[test]
478    fn is_ignored_default_false_without_rules() {
479        let handler = DisassembleXmlFileHandler::new();
480        assert!(!handler.is_ignored("some/path.xml"));
481    }
482
483    #[test]
484    fn output_dir_basename_strips_only_last_dot_segment() {
485        // Plain Salesforce-style metadata: strip the `.<suffix>-meta` tail.
486        assert_eq!(
487            DisassembleXmlFileHandler::output_dir_basename("HR_Admin.permissionset-meta"),
488            "HR_Admin"
489        );
490        assert_eq!(
491            DisassembleXmlFileHandler::output_dir_basename("Get_Info.flow-meta"),
492            "Get_Info"
493        );
494    }
495
496    #[test]
497    fn output_dir_basename_preserves_dotted_full_names() {
498        // Approval processes are named `<sobject>.<process>` which yields a stem containing
499        // *two* dots. The old `split('.').next()` returned just `<sobject>`, causing
500        // distinct processes for the same sobject to land in the same output directory and
501        // silently merge during reassembly. The new behaviour keeps the dotted fullName.
502        assert_eq!(
503            DisassembleXmlFileHandler::output_dir_basename(
504                "Account_Merge__c.New_Account_Merges_2.approvalProcess-meta"
505            ),
506            "Account_Merge__c.New_Account_Merges_2"
507        );
508        assert_eq!(
509            DisassembleXmlFileHandler::output_dir_basename(
510                "Account_Merge__c.New_Account_Merges_3.approvalProcess-meta"
511            ),
512            "Account_Merge__c.New_Account_Merges_3"
513        );
514        // Quick actions follow the same `<sobject>.<action>` pattern.
515        assert_eq!(
516            DisassembleXmlFileHandler::output_dir_basename("Case.LogACall.quickAction-meta"),
517            "Case.LogACall"
518        );
519    }
520
521    #[test]
522    fn output_dir_basename_no_dot_returns_stem_unchanged() {
523        // Stems without any dot are passed through verbatim (no extension to strip).
524        assert_eq!(DisassembleXmlFileHandler::output_dir_basename("Foo"), "Foo");
525        assert_eq!(DisassembleXmlFileHandler::output_dir_basename(""), "");
526    }
527}