Skip to main content

xml_disassembler/handlers/
disassemble.rs

1//! Disassemble XML file handler.
2
3use crate::builders::build_disassembled_files_unified;
4use crate::multi_level::{
5    capture_xmlns_from_root, path_segment_from_file_pattern, save_multi_level_config,
6    strip_root_and_build_xml,
7};
8use crate::parsers::parse_xml;
9use crate::types::{BuildDisassembledFilesOptions, DecomposeRule, MultiLevelRule};
10use crate::utils::normalize_path_unix;
11use ignore::gitignore::GitignoreBuilder;
12use std::path::Path;
13use tokio::fs;
14
15pub struct DisassembleXmlFileHandler {
16    ign: Option<ignore::gitignore::Gitignore>,
17}
18
19impl DisassembleXmlFileHandler {
20    pub fn new() -> Self {
21        Self { ign: None }
22    }
23
24    async fn load_ignore_rules(&mut self, ignore_path: &str) {
25        let path = Path::new(ignore_path);
26        let content = match fs::read_to_string(path).await {
27            Ok(c) => c,
28            Err(_) => return,
29        };
30        let root = path.parent().unwrap_or(Path::new("."));
31        let mut builder = GitignoreBuilder::new(root);
32        for line in content.lines() {
33            let _ = builder.add_line(None, line);
34        }
35        // `GitignoreBuilder::build` only fails on unlikely I/O errors; treat as absent rules.
36        self.ign = builder.build().ok();
37    }
38
39    fn posix_path(path: &str) -> String {
40        path.replace('\\', "/")
41    }
42
43    fn is_xml_file(file_path: &str) -> bool {
44        file_path.to_lowercase().ends_with(".xml")
45    }
46
47    fn is_ignored(&self, path: &str) -> bool {
48        self.ign
49            .as_ref()
50            .map(|ign| ign.matched(path, false).is_ignore())
51            .unwrap_or(false)
52    }
53
54    #[allow(clippy::too_many_arguments)]
55    pub async fn disassemble(
56        &mut self,
57        file_path: &str,
58        unique_id_elements: Option<&str>,
59        strategy: Option<&str>,
60        pre_purge: bool,
61        post_purge: bool,
62        ignore_path: &str,
63        format: &str,
64        multi_level_rule: Option<&MultiLevelRule>,
65        decompose_rules: Option<&[DecomposeRule]>,
66    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
67        let strategy = strategy.unwrap_or("unique-id");
68        let strategy = if ["unique-id", "grouped-by-tag"].contains(&strategy) {
69            strategy
70        } else {
71            log::warn!(
72                "Unsupported strategy \"{}\", defaulting to \"unique-id\".",
73                strategy
74            );
75            "unique-id"
76        };
77
78        self.load_ignore_rules(ignore_path).await;
79
80        let path = Path::new(file_path);
81        let meta = fs::metadata(path).await?;
82        let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
83        let relative_path = path.strip_prefix(&cwd).unwrap_or(path).to_string_lossy();
84        let relative_path = Self::posix_path(&relative_path);
85
86        if meta.is_file() {
87            self.handle_file(
88                file_path,
89                &relative_path,
90                unique_id_elements,
91                strategy,
92                pre_purge,
93                post_purge,
94                format,
95                multi_level_rule,
96                decompose_rules,
97            )
98            .await?;
99        } else {
100            // Anything that isn't a regular file is treated as a directory; fs::metadata on
101            // the caller already errored out if the path didn't exist.
102            self.handle_directory(
103                file_path,
104                unique_id_elements,
105                strategy,
106                pre_purge,
107                post_purge,
108                format,
109                multi_level_rule,
110                decompose_rules,
111            )
112            .await?;
113        }
114
115        Ok(())
116    }
117
118    #[allow(clippy::too_many_arguments)]
119    async fn handle_file(
120        &self,
121        file_path: &str,
122        relative_path: &str,
123        unique_id_elements: Option<&str>,
124        strategy: &str,
125        pre_purge: bool,
126        post_purge: bool,
127        format: &str,
128        multi_level_rule: Option<&MultiLevelRule>,
129        decompose_rules: Option<&[DecomposeRule]>,
130    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
131        let resolved = Path::new(file_path)
132            .canonicalize()
133            .unwrap_or_else(|_| Path::new(file_path).to_path_buf());
134        let resolved_str = normalize_path_unix(&resolved.to_string_lossy());
135
136        if !Self::is_xml_file(&resolved_str) {
137            log::error!(
138                "The file path provided is not an XML file: {}",
139                resolved_str
140            );
141            return Ok(());
142        }
143
144        if self.is_ignored(relative_path) {
145            log::warn!("File ignored by ignore rules: {}", resolved_str);
146            return Ok(());
147        }
148
149        let dir_path = resolved.parent().unwrap_or(Path::new("."));
150        let dir_path_str = normalize_path_unix(&dir_path.to_string_lossy());
151        self.process_file(
152            &dir_path_str,
153            strategy,
154            &resolved_str,
155            unique_id_elements,
156            pre_purge,
157            post_purge,
158            format,
159            multi_level_rule,
160            decompose_rules,
161        )
162        .await
163    }
164
165    #[allow(clippy::too_many_arguments)]
166    async fn handle_directory(
167        &self,
168        dir_path: &str,
169        unique_id_elements: Option<&str>,
170        strategy: &str,
171        pre_purge: bool,
172        post_purge: bool,
173        format: &str,
174        multi_level_rule: Option<&MultiLevelRule>,
175        decompose_rules: Option<&[DecomposeRule]>,
176    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
177        let dir_path = normalize_path_unix(dir_path);
178        let mut entries = fs::read_dir(&dir_path).await?;
179        let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
180
181        while let Some(entry) = entries.next_entry().await? {
182            let sub_path = entry.path();
183            let sub_file_path = sub_path.to_string_lossy();
184            let relative_sub = sub_path
185                .strip_prefix(&cwd)
186                .unwrap_or(&sub_path)
187                .to_string_lossy();
188            let relative_sub = Self::posix_path(&relative_sub);
189
190            if !(sub_path.is_file() && Self::is_xml_file(&sub_file_path)) {
191                continue;
192            }
193            if self.is_ignored(&relative_sub) {
194                log::warn!("File ignored by ignore rules: {}", sub_file_path);
195                continue;
196            }
197            let sub_file_path_norm = normalize_path_unix(&sub_file_path);
198            self.process_file(
199                &dir_path,
200                strategy,
201                &sub_file_path_norm,
202                unique_id_elements,
203                pre_purge,
204                post_purge,
205                format,
206                multi_level_rule,
207                decompose_rules,
208            )
209            .await?;
210        }
211        Ok(())
212    }
213
214    #[allow(clippy::too_many_arguments)]
215    async fn process_file(
216        &self,
217        dir_path: &str,
218        strategy: &str,
219        file_path: &str,
220        unique_id_elements: Option<&str>,
221        pre_purge: bool,
222        post_purge: bool,
223        format: &str,
224        multi_level_rule: Option<&MultiLevelRule>,
225        decompose_rules: Option<&[DecomposeRule]>,
226    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
227        log::debug!("Parsing file to disassemble: {}", file_path);
228
229        let file_name = Path::new(file_path)
230            .file_stem()
231            .and_then(|s| s.to_str())
232            .unwrap_or("output");
233        let base_name = file_name.split('.').next().unwrap_or(file_name);
234        let output_path = Path::new(dir_path).join(base_name);
235
236        if pre_purge && output_path.exists() {
237            fs::remove_dir_all(&output_path).await.ok();
238        }
239
240        build_disassembled_files_unified(BuildDisassembledFilesOptions {
241            file_path,
242            disassembled_path: output_path.to_str().unwrap_or("."),
243            base_name: file_name,
244            post_purge,
245            format,
246            unique_id_elements,
247            strategy,
248            decompose_rules,
249        })
250        .await?;
251
252        if let Some(rule) = multi_level_rule {
253            self.recursively_disassemble_multi_level(&output_path, rule, format)
254                .await?;
255        }
256
257        Ok(())
258    }
259
260    /// Recursively walk the disassembly output; for XML files matching the rule's file_pattern,
261    /// strip the root and re-disassemble with the rule's unique_id_elements.
262    async fn recursively_disassemble_multi_level(
263        &self,
264        dir_path: &Path,
265        rule: &MultiLevelRule,
266        format: &str,
267    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
268        let mut config = crate::multi_level::load_multi_level_config(dir_path)
269            .await
270            .unwrap_or_default();
271
272        let mut stack = vec![dir_path.to_path_buf()];
273        while let Some(current) = stack.pop() {
274            let mut entries = Vec::new();
275            let mut read_dir = fs::read_dir(&current).await?;
276            while let Some(entry) = read_dir.next_entry().await? {
277                entries.push(entry);
278            }
279
280            for entry in entries {
281                let path = entry.path();
282                let path_str = path.to_string_lossy().to_string();
283
284                if path.is_dir() {
285                    stack.push(path);
286                    continue;
287                }
288                // Anything not a directory is processed as a regular file below.
289                {
290                    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
291                    let path_str_check = path.to_string_lossy();
292                    if !name.ends_with(".xml")
293                        || (!name.contains(&rule.file_pattern)
294                            && !path_str_check.contains(&rule.file_pattern))
295                    {
296                        continue;
297                    }
298
299                    let parsed = match parse_xml(&path_str).await {
300                        Some(p) => p,
301                        None => continue,
302                    };
303                    let has_element_to_strip = parsed
304                        .as_object()
305                        .and_then(|o| {
306                            let root_key = o.keys().find(|k| *k != "?xml")?;
307                            let root_val = o.get(root_key)?.as_object()?;
308                            Some(
309                                root_key == &rule.root_to_strip
310                                    || root_val.contains_key(&rule.root_to_strip),
311                            )
312                        })
313                        .unwrap_or(false);
314                    if !has_element_to_strip {
315                        continue;
316                    }
317
318                    let wrap_xmlns = capture_xmlns_from_root(&parsed).unwrap_or_default();
319
320                    let stripped_xml = match strip_root_and_build_xml(&parsed, &rule.root_to_strip)
321                    {
322                        Some(xml) => xml,
323                        None => continue,
324                    };
325
326                    fs::write(&path, stripped_xml).await?;
327
328                    let file_stem = path
329                        .file_stem()
330                        .and_then(|s| s.to_str())
331                        .unwrap_or("output");
332                    let output_dir_name = file_stem.split('.').next().unwrap_or(file_stem);
333                    let parent = path.parent().unwrap_or(dir_path);
334                    let second_level_output = parent.join(output_dir_name);
335
336                    build_disassembled_files_unified(BuildDisassembledFilesOptions {
337                        file_path: &path_str,
338                        disassembled_path: second_level_output.to_str().unwrap_or("."),
339                        base_name: output_dir_name,
340                        post_purge: true,
341                        format,
342                        unique_id_elements: Some(&rule.unique_id_elements),
343                        strategy: "unique-id",
344                        decompose_rules: None,
345                    })
346                    .await?;
347
348                    match config.rules.first_mut() {
349                        None => {
350                            let wrap_root = parsed
351                                .as_object()
352                                .and_then(|o| o.keys().find(|k| *k != "?xml").cloned())
353                                .unwrap_or_else(|| rule.wrap_root_element.clone());
354                            let path_segment = if rule.path_segment.is_empty() {
355                                path_segment_from_file_pattern(&rule.file_pattern)
356                            } else {
357                                rule.path_segment.clone()
358                            };
359                            let stored_xmlns = if rule.wrap_xmlns.is_empty() {
360                                wrap_xmlns
361                            } else {
362                                rule.wrap_xmlns.clone()
363                            };
364                            config.rules.push(MultiLevelRule {
365                                file_pattern: rule.file_pattern.clone(),
366                                root_to_strip: rule.root_to_strip.clone(),
367                                unique_id_elements: rule.unique_id_elements.clone(),
368                                path_segment,
369                                // Persist document root (e.g. LoyaltyProgramSetup) so reassembly uses it
370                                // as root with xmlns; path_segment is the inner wrapper in each file.
371                                wrap_root_element: wrap_root,
372                                wrap_xmlns: stored_xmlns,
373                            });
374                        }
375                        Some(r) if r.wrap_xmlns.is_empty() => {
376                            r.wrap_xmlns = wrap_xmlns;
377                        }
378                        Some(_) => {}
379                    }
380                }
381            }
382        }
383
384        if !config.rules.is_empty() {
385            save_multi_level_config(dir_path, &config).await?;
386        }
387
388        Ok(())
389    }
390}
391
392impl Default for DisassembleXmlFileHandler {
393    fn default() -> Self {
394        Self::new()
395    }
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401
402    #[test]
403    #[allow(clippy::default_constructed_unit_structs)]
404    fn disassemble_handler_default_equals_new() {
405        let _ = DisassembleXmlFileHandler::default();
406    }
407
408    #[test]
409    fn is_xml_file_matches_case_insensitively() {
410        assert!(DisassembleXmlFileHandler::is_xml_file("foo.xml"));
411        assert!(DisassembleXmlFileHandler::is_xml_file("BAR.XML"));
412        assert!(!DisassembleXmlFileHandler::is_xml_file("foo.txt"));
413    }
414
415    #[test]
416    fn posix_path_converts_backslashes() {
417        assert_eq!(
418            DisassembleXmlFileHandler::posix_path(r"C:\Users\name\file.xml"),
419            "C:/Users/name/file.xml"
420        );
421    }
422
423    #[tokio::test]
424    async fn load_ignore_rules_noop_when_path_missing() {
425        let mut handler = DisassembleXmlFileHandler::new();
426        handler
427            .load_ignore_rules("/definitely/does/not/exist/.ignore")
428            .await;
429        assert!(handler.ign.is_none());
430    }
431
432    #[tokio::test]
433    async fn load_ignore_rules_builds_matcher() {
434        let temp = tempfile::tempdir().unwrap();
435        let path = temp.path().join(".ignore");
436        tokio::fs::write(&path, "*.xml\n").await.unwrap();
437        let mut handler = DisassembleXmlFileHandler::new();
438        handler.load_ignore_rules(path.to_str().unwrap()).await;
439        assert!(handler.ign.is_some());
440        assert!(handler.is_ignored("file.xml"));
441        assert!(!handler.is_ignored("file.txt"));
442    }
443
444    #[test]
445    fn is_ignored_default_false_without_rules() {
446        let handler = DisassembleXmlFileHandler::new();
447        assert!(!handler.is_ignored("some/path.xml"));
448    }
449}