Skip to main content

config_disassembler/xml/handlers/
disassemble.rs

1//! Disassemble XML file handler.
2
3use crate::xml::builders::build_disassembled_files_unified;
4use crate::xml::multi_level::{
5    capture_xmlns_from_root, path_segment_from_file_pattern, save_multi_level_config,
6    strip_root_and_build_xml,
7};
8use crate::xml::parsers::parse_xml;
9use crate::xml::types::{BuildDisassembledFilesOptions, DecomposeRule, MultiLevelRule};
10use crate::xml::utils::normalize_path_unix;
11use ignore::gitignore::GitignoreBuilder;
12use std::path::Path;
13use tokio::fs;
14
15pub struct DisassembleXmlFileHandler {
16    ign: Option<ignore::gitignore::Gitignore>,
17}
18
19impl DisassembleXmlFileHandler {
20    pub fn new() -> Self {
21        Self { ign: None }
22    }
23
24    async fn load_ignore_rules(&mut self, ignore_path: &str) {
25        let path = Path::new(ignore_path);
26        let content = match fs::read_to_string(path).await {
27            Ok(c) => c,
28            Err(_) => return,
29        };
30        let root = path.parent().unwrap_or(Path::new("."));
31        let mut builder = GitignoreBuilder::new(root);
32        for line in content.lines() {
33            let _ = builder.add_line(None, line);
34        }
35        // `GitignoreBuilder::build` only fails on unlikely I/O errors; treat as absent rules.
36        self.ign = builder.build().ok();
37    }
38
39    fn posix_path(path: &str) -> String {
40        path.replace('\\', "/")
41    }
42
43    fn is_xml_file(file_path: &str) -> bool {
44        file_path.to_lowercase().ends_with(".xml")
45    }
46
47    fn is_ignored(&self, path: &str) -> bool {
48        self.ign
49            .as_ref()
50            .map(|ign| ign.matched(path, false).is_ignore())
51            .unwrap_or(false)
52    }
53
54    #[allow(clippy::too_many_arguments)]
55    pub async fn disassemble(
56        &mut self,
57        file_path: &str,
58        unique_id_elements: Option<&str>,
59        strategy: Option<&str>,
60        pre_purge: bool,
61        post_purge: bool,
62        ignore_path: &str,
63        format: &str,
64        multi_level_rules: Option<&[MultiLevelRule]>,
65        decompose_rules: Option<&[DecomposeRule]>,
66    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
67        let strategy = strategy.unwrap_or("unique-id");
68        let strategy = if ["unique-id", "grouped-by-tag"].contains(&strategy) {
69            strategy
70        } else {
71            log::warn!(
72                "Unsupported strategy \"{}\", defaulting to \"unique-id\".",
73                strategy
74            );
75            "unique-id"
76        };
77
78        self.load_ignore_rules(ignore_path).await;
79
80        let path = Path::new(file_path);
81        let meta = fs::metadata(path).await?;
82        let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
83        let relative_path = path.strip_prefix(&cwd).unwrap_or(path).to_string_lossy();
84        let relative_path = Self::posix_path(&relative_path);
85
86        // Treat an empty rules slice as "no multi-level".
87        let multi_level_rules = multi_level_rules.filter(|rules| !rules.is_empty());
88
89        if meta.is_file() {
90            self.handle_file(
91                file_path,
92                &relative_path,
93                unique_id_elements,
94                strategy,
95                pre_purge,
96                post_purge,
97                format,
98                multi_level_rules,
99                decompose_rules,
100            )
101            .await?;
102        } else {
103            // Anything that isn't a regular file is treated as a directory; fs::metadata on
104            // the caller already errored out if the path didn't exist.
105            self.handle_directory(
106                file_path,
107                unique_id_elements,
108                strategy,
109                pre_purge,
110                post_purge,
111                format,
112                multi_level_rules,
113                decompose_rules,
114            )
115            .await?;
116        }
117
118        Ok(())
119    }
120
121    #[allow(clippy::too_many_arguments)]
122    async fn handle_file(
123        &self,
124        file_path: &str,
125        relative_path: &str,
126        unique_id_elements: Option<&str>,
127        strategy: &str,
128        pre_purge: bool,
129        post_purge: bool,
130        format: &str,
131        multi_level_rules: Option<&[MultiLevelRule]>,
132        decompose_rules: Option<&[DecomposeRule]>,
133    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
134        let resolved = Path::new(file_path)
135            .canonicalize()
136            .unwrap_or_else(|_| Path::new(file_path).to_path_buf());
137        let resolved_str = normalize_path_unix(&resolved.to_string_lossy());
138
139        if !Self::is_xml_file(&resolved_str) {
140            log::error!(
141                "The file path provided is not an XML file: {}",
142                resolved_str
143            );
144            return Ok(());
145        }
146
147        if self.is_ignored(relative_path) {
148            log::warn!("File ignored by ignore rules: {}", resolved_str);
149            return Ok(());
150        }
151
152        let dir_path = resolved.parent().unwrap_or(Path::new("."));
153        let dir_path_str = normalize_path_unix(&dir_path.to_string_lossy());
154        self.process_file(
155            &dir_path_str,
156            strategy,
157            &resolved_str,
158            unique_id_elements,
159            pre_purge,
160            post_purge,
161            format,
162            multi_level_rules,
163            decompose_rules,
164        )
165        .await
166    }
167
168    #[allow(clippy::too_many_arguments)]
169    async fn handle_directory(
170        &self,
171        dir_path: &str,
172        unique_id_elements: Option<&str>,
173        strategy: &str,
174        pre_purge: bool,
175        post_purge: bool,
176        format: &str,
177        multi_level_rules: Option<&[MultiLevelRule]>,
178        decompose_rules: Option<&[DecomposeRule]>,
179    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
180        let dir_path = normalize_path_unix(dir_path);
181        let mut entries = fs::read_dir(&dir_path).await?;
182        let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
183
184        while let Some(entry) = entries.next_entry().await? {
185            let sub_path = entry.path();
186            let sub_file_path = sub_path.to_string_lossy();
187            let relative_sub = sub_path
188                .strip_prefix(&cwd)
189                .unwrap_or(&sub_path)
190                .to_string_lossy();
191            let relative_sub = Self::posix_path(&relative_sub);
192
193            if !(sub_path.is_file() && Self::is_xml_file(&sub_file_path)) {
194                continue;
195            }
196            if self.is_ignored(&relative_sub) {
197                log::warn!("File ignored by ignore rules: {}", sub_file_path);
198                continue;
199            }
200            let sub_file_path_norm = normalize_path_unix(&sub_file_path);
201            self.process_file(
202                &dir_path,
203                strategy,
204                &sub_file_path_norm,
205                unique_id_elements,
206                pre_purge,
207                post_purge,
208                format,
209                multi_level_rules,
210                decompose_rules,
211            )
212            .await?;
213        }
214        Ok(())
215    }
216
217    #[allow(clippy::too_many_arguments)]
218    async fn process_file(
219        &self,
220        dir_path: &str,
221        strategy: &str,
222        file_path: &str,
223        unique_id_elements: Option<&str>,
224        pre_purge: bool,
225        post_purge: bool,
226        format: &str,
227        multi_level_rules: Option<&[MultiLevelRule]>,
228        decompose_rules: Option<&[DecomposeRule]>,
229    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
230        log::debug!("Parsing file to disassemble: {}", file_path);
231
232        let file_name = Path::new(file_path)
233            .file_stem()
234            .and_then(|s| s.to_str())
235            .unwrap_or("output");
236        let base_name = file_name.split('.').next().unwrap_or(file_name);
237        let output_path = Path::new(dir_path).join(base_name);
238
239        if pre_purge && output_path.exists() {
240            fs::remove_dir_all(&output_path).await.ok();
241        }
242
243        build_disassembled_files_unified(BuildDisassembledFilesOptions {
244            file_path,
245            disassembled_path: output_path.to_str().unwrap_or("."),
246            base_name: file_name,
247            post_purge,
248            format,
249            unique_id_elements,
250            strategy,
251            decompose_rules,
252        })
253        .await?;
254
255        // Apply each multi-level rule in order. Each rule walks the same disassembly tree
256        // independently; rules are merged into the shared `.multi_level.json` so reassembly
257        // can replay them in order.
258        if let Some(rules) = multi_level_rules {
259            for rule in rules {
260                self.recursively_disassemble_multi_level(&output_path, rule, format)
261                    .await?;
262            }
263        }
264
265        Ok(())
266    }
267
268    /// Recursively walk the disassembly output; for XML files matching the rule's file_pattern,
269    /// strip the root and re-disassemble with the rule's unique_id_elements.
270    async fn recursively_disassemble_multi_level(
271        &self,
272        dir_path: &Path,
273        rule: &MultiLevelRule,
274        format: &str,
275    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
276        let mut config = crate::xml::multi_level::load_multi_level_config(dir_path)
277            .await
278            .unwrap_or_default();
279
280        let mut stack = vec![dir_path.to_path_buf()];
281        while let Some(current) = stack.pop() {
282            let mut entries = Vec::new();
283            let mut read_dir = fs::read_dir(&current).await?;
284            while let Some(entry) = read_dir.next_entry().await? {
285                entries.push(entry);
286            }
287
288            for entry in entries {
289                let path = entry.path();
290                let path_str = path.to_string_lossy().to_string();
291
292                if path.is_dir() {
293                    stack.push(path);
294                    continue;
295                }
296                // Anything not a directory is processed as a regular file below.
297                {
298                    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
299                    let path_str_check = path.to_string_lossy();
300                    if !name.ends_with(".xml")
301                        || (!name.contains(&rule.file_pattern)
302                            && !path_str_check.contains(&rule.file_pattern))
303                    {
304                        continue;
305                    }
306
307                    let parsed = match parse_xml(&path_str).await {
308                        Some(p) => p,
309                        None => continue,
310                    };
311                    let has_element_to_strip = parsed
312                        .as_object()
313                        .and_then(|o| {
314                            let root_key = o.keys().find(|k| *k != "?xml")?;
315                            let root_val = o.get(root_key)?.as_object()?;
316                            Some(
317                                root_key == &rule.root_to_strip
318                                    || root_val.contains_key(&rule.root_to_strip),
319                            )
320                        })
321                        .unwrap_or(false);
322                    if !has_element_to_strip {
323                        continue;
324                    }
325
326                    let wrap_xmlns = capture_xmlns_from_root(&parsed).unwrap_or_default();
327
328                    let stripped_xml = match strip_root_and_build_xml(&parsed, &rule.root_to_strip)
329                    {
330                        Some(xml) => xml,
331                        None => continue,
332                    };
333
334                    fs::write(&path, stripped_xml).await?;
335
336                    let file_stem = path
337                        .file_stem()
338                        .and_then(|s| s.to_str())
339                        .unwrap_or("output");
340                    let output_dir_name = file_stem.split('.').next().unwrap_or(file_stem);
341                    let parent = path.parent().unwrap_or(dir_path);
342                    let second_level_output = parent.join(output_dir_name);
343
344                    build_disassembled_files_unified(BuildDisassembledFilesOptions {
345                        file_path: &path_str,
346                        disassembled_path: second_level_output.to_str().unwrap_or("."),
347                        base_name: output_dir_name,
348                        post_purge: true,
349                        format,
350                        unique_id_elements: Some(&rule.unique_id_elements),
351                        strategy: "unique-id",
352                        decompose_rules: None,
353                    })
354                    .await?;
355
356                    // Find an existing entry for this rule by (file_pattern, root_to_strip).
357                    // Multiple rules may co-exist in `.multi_level.json` (one per logical
358                    // segment); per-rule deduplication keeps each one a singleton.
359                    let existing_idx = config.rules.iter().position(|r| {
360                        r.file_pattern == rule.file_pattern && r.root_to_strip == rule.root_to_strip
361                    });
362                    match existing_idx {
363                        None => {
364                            let wrap_root = parsed
365                                .as_object()
366                                .and_then(|o| o.keys().find(|k| *k != "?xml").cloned())
367                                .unwrap_or_else(|| rule.wrap_root_element.clone());
368                            let path_segment = if rule.path_segment.is_empty() {
369                                path_segment_from_file_pattern(&rule.file_pattern)
370                            } else {
371                                rule.path_segment.clone()
372                            };
373                            let stored_xmlns = if rule.wrap_xmlns.is_empty() {
374                                wrap_xmlns
375                            } else {
376                                rule.wrap_xmlns.clone()
377                            };
378                            config.rules.push(MultiLevelRule {
379                                file_pattern: rule.file_pattern.clone(),
380                                root_to_strip: rule.root_to_strip.clone(),
381                                unique_id_elements: rule.unique_id_elements.clone(),
382                                path_segment,
383                                // Persist document root (e.g. LoyaltyProgramSetup) so reassembly uses it
384                                // as root with xmlns; path_segment is the inner wrapper in each file.
385                                wrap_root_element: wrap_root,
386                                wrap_xmlns: stored_xmlns,
387                            });
388                        }
389                        Some(idx) => {
390                            // Backfill xmlns from the source if we didn't have one yet; otherwise
391                            // leave the existing entry alone (the first observed file wins).
392                            if config.rules[idx].wrap_xmlns.is_empty() {
393                                config.rules[idx].wrap_xmlns = wrap_xmlns;
394                            }
395                        }
396                    }
397                }
398            }
399        }
400
401        if !config.rules.is_empty() {
402            save_multi_level_config(dir_path, &config).await?;
403        }
404
405        Ok(())
406    }
407}
408
409impl Default for DisassembleXmlFileHandler {
410    fn default() -> Self {
411        Self::new()
412    }
413}
414
415#[cfg(test)]
416mod tests {
417    use super::*;
418
419    #[test]
420    #[allow(clippy::default_constructed_unit_structs)]
421    fn disassemble_handler_default_equals_new() {
422        let _ = DisassembleXmlFileHandler::default();
423    }
424
425    #[test]
426    fn is_xml_file_matches_case_insensitively() {
427        assert!(DisassembleXmlFileHandler::is_xml_file("foo.xml"));
428        assert!(DisassembleXmlFileHandler::is_xml_file("BAR.XML"));
429        assert!(!DisassembleXmlFileHandler::is_xml_file("foo.txt"));
430    }
431
432    #[test]
433    fn posix_path_converts_backslashes() {
434        assert_eq!(
435            DisassembleXmlFileHandler::posix_path(r"C:\Users\name\file.xml"),
436            "C:/Users/name/file.xml"
437        );
438    }
439
440    #[tokio::test]
441    async fn load_ignore_rules_noop_when_path_missing() {
442        let mut handler = DisassembleXmlFileHandler::new();
443        handler
444            .load_ignore_rules("/definitely/does/not/exist/.ignore")
445            .await;
446        assert!(handler.ign.is_none());
447    }
448
449    #[tokio::test]
450    async fn load_ignore_rules_builds_matcher() {
451        let temp = tempfile::tempdir().unwrap();
452        let path = temp.path().join(".ignore");
453        tokio::fs::write(&path, "*.xml\n").await.unwrap();
454        let mut handler = DisassembleXmlFileHandler::new();
455        handler.load_ignore_rules(path.to_str().unwrap()).await;
456        assert!(handler.ign.is_some());
457        assert!(handler.is_ignored("file.xml"));
458        assert!(!handler.is_ignored("file.txt"));
459    }
460
461    #[test]
462    fn is_ignored_default_false_without_rules() {
463        let handler = DisassembleXmlFileHandler::new();
464        assert!(!handler.is_ignored("some/path.xml"));
465    }
466}