Skip to main content

xml_disassembler/handlers/
disassemble.rs

1//! Disassemble XML file handler.
2
3use crate::builders::build_disassembled_files_unified;
4use crate::multi_level::{
5    capture_xmlns_from_root, path_segment_from_file_pattern, save_multi_level_config,
6    strip_root_and_build_xml,
7};
8use crate::parsers::parse_xml;
9use crate::types::{BuildDisassembledFilesOptions, DecomposeRule, MultiLevelRule};
10use ignore::gitignore::GitignoreBuilder;
11use std::path::Path;
12use tokio::fs;
13
14pub struct DisassembleXmlFileHandler {
15    ign: Option<ignore::gitignore::Gitignore>,
16}
17
18impl DisassembleXmlFileHandler {
19    pub fn new() -> Self {
20        Self { ign: None }
21    }
22
23    async fn load_ignore_rules(&mut self, ignore_path: &str) {
24        let path = Path::new(ignore_path);
25        if path.exists() {
26            if let Ok(content) = fs::read_to_string(path).await {
27                let root = path.parent().unwrap_or(Path::new("."));
28                let mut builder = GitignoreBuilder::new(root);
29                for line in content.lines() {
30                    let _ = builder.add_line(None, line);
31                }
32                if let Ok(gi) = builder.build() {
33                    self.ign = Some(gi);
34                }
35            }
36        }
37    }
38
39    fn posix_path(path: &str) -> String {
40        path.replace('\\', "/")
41    }
42
43    fn is_xml_file(file_path: &str) -> bool {
44        file_path.to_lowercase().ends_with(".xml")
45    }
46
47    fn is_ignored(&self, path: &str) -> bool {
48        self.ign
49            .as_ref()
50            .map(|ign| ign.matched(path, false).is_ignore())
51            .unwrap_or(false)
52    }
53
54    #[allow(clippy::too_many_arguments)]
55    pub async fn disassemble(
56        &mut self,
57        file_path: &str,
58        unique_id_elements: Option<&str>,
59        strategy: Option<&str>,
60        pre_purge: bool,
61        post_purge: bool,
62        ignore_path: &str,
63        format: &str,
64        multi_level_rule: Option<&MultiLevelRule>,
65        decompose_rules: Option<&[DecomposeRule]>,
66    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
67        let strategy = strategy.unwrap_or("unique-id");
68        let strategy = if ["unique-id", "grouped-by-tag"].contains(&strategy) {
69            strategy
70        } else {
71            log::warn!(
72                "Unsupported strategy \"{}\", defaulting to \"unique-id\".",
73                strategy
74            );
75            "unique-id"
76        };
77
78        self.load_ignore_rules(ignore_path).await;
79
80        let path = Path::new(file_path);
81        let meta = fs::metadata(path).await?;
82        let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
83        let relative_path = path.strip_prefix(&cwd).unwrap_or(path).to_string_lossy();
84        let relative_path = Self::posix_path(&relative_path);
85
86        if meta.is_file() {
87            self.handle_file(
88                file_path,
89                &relative_path,
90                unique_id_elements,
91                strategy,
92                pre_purge,
93                post_purge,
94                format,
95                multi_level_rule,
96                decompose_rules,
97            )
98            .await?;
99        } else if meta.is_dir() {
100            self.handle_directory(
101                file_path,
102                unique_id_elements,
103                strategy,
104                pre_purge,
105                post_purge,
106                format,
107                multi_level_rule,
108                decompose_rules,
109            )
110            .await?;
111        }
112
113        Ok(())
114    }
115
116    #[allow(clippy::too_many_arguments)]
117    async fn handle_file(
118        &self,
119        file_path: &str,
120        relative_path: &str,
121        unique_id_elements: Option<&str>,
122        strategy: &str,
123        pre_purge: bool,
124        post_purge: bool,
125        format: &str,
126        multi_level_rule: Option<&MultiLevelRule>,
127        decompose_rules: Option<&[DecomposeRule]>,
128    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
129        let resolved = Path::new(file_path)
130            .canonicalize()
131            .unwrap_or_else(|_| Path::new(file_path).to_path_buf());
132        let resolved_str = resolved.to_string_lossy();
133
134        if !Self::is_xml_file(&resolved_str) {
135            log::error!(
136                "The file path provided is not an XML file: {}",
137                resolved_str
138            );
139            return Ok(());
140        }
141
142        if self.is_ignored(relative_path) {
143            log::warn!("File ignored by ignore rules: {}", resolved_str);
144            return Ok(());
145        }
146
147        let dir_path = resolved.parent().unwrap_or(Path::new("."));
148        self.process_file(
149            dir_path.to_str().unwrap_or("."),
150            strategy,
151            &resolved_str,
152            unique_id_elements,
153            pre_purge,
154            post_purge,
155            format,
156            multi_level_rule,
157            decompose_rules,
158        )
159        .await
160    }
161
162    #[allow(clippy::too_many_arguments)]
163    async fn handle_directory(
164        &self,
165        dir_path: &str,
166        unique_id_elements: Option<&str>,
167        strategy: &str,
168        pre_purge: bool,
169        post_purge: bool,
170        format: &str,
171        multi_level_rule: Option<&MultiLevelRule>,
172        decompose_rules: Option<&[DecomposeRule]>,
173    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
174        let mut entries = fs::read_dir(dir_path).await?;
175        let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
176
177        while let Some(entry) = entries.next_entry().await? {
178            let sub_path = entry.path();
179            let sub_file_path = sub_path.to_string_lossy();
180            let relative_sub = sub_path
181                .strip_prefix(&cwd)
182                .unwrap_or(&sub_path)
183                .to_string_lossy();
184            let relative_sub = Self::posix_path(&relative_sub);
185
186            if sub_path.is_file() && Self::is_xml_file(&sub_file_path) {
187                if self.is_ignored(&relative_sub) {
188                    log::warn!("File ignored by ignore rules: {}", sub_file_path);
189                } else {
190                    self.process_file(
191                        dir_path,
192                        strategy,
193                        &sub_file_path,
194                        unique_id_elements,
195                        pre_purge,
196                        post_purge,
197                        format,
198                        multi_level_rule,
199                        decompose_rules,
200                    )
201                    .await?;
202                }
203            }
204        }
205        Ok(())
206    }
207
208    #[allow(clippy::too_many_arguments)]
209    async fn process_file(
210        &self,
211        dir_path: &str,
212        strategy: &str,
213        file_path: &str,
214        unique_id_elements: Option<&str>,
215        pre_purge: bool,
216        post_purge: bool,
217        format: &str,
218        multi_level_rule: Option<&MultiLevelRule>,
219        decompose_rules: Option<&[DecomposeRule]>,
220    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
221        log::debug!("Parsing file to disassemble: {}", file_path);
222
223        let file_name = Path::new(file_path)
224            .file_stem()
225            .and_then(|s| s.to_str())
226            .unwrap_or("output");
227        let base_name = file_name.split('.').next().unwrap_or(file_name);
228        let output_path = Path::new(dir_path).join(base_name);
229
230        if pre_purge && output_path.exists() {
231            fs::remove_dir_all(&output_path).await.ok();
232        }
233
234        build_disassembled_files_unified(BuildDisassembledFilesOptions {
235            file_path,
236            disassembled_path: output_path.to_str().unwrap_or("."),
237            base_name: file_name,
238            post_purge,
239            format,
240            unique_id_elements,
241            strategy,
242            decompose_rules,
243        })
244        .await?;
245
246        if let Some(rule) = multi_level_rule {
247            self.recursively_disassemble_multi_level(&output_path, rule, format)
248                .await?;
249        }
250
251        Ok(())
252    }
253
254    /// Recursively walk the disassembly output; for XML files matching the rule's file_pattern,
255    /// strip the root and re-disassemble with the rule's unique_id_elements.
256    async fn recursively_disassemble_multi_level(
257        &self,
258        dir_path: &Path,
259        rule: &MultiLevelRule,
260        format: &str,
261    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
262        let mut config = crate::multi_level::load_multi_level_config(dir_path)
263            .await
264            .unwrap_or_default();
265
266        let mut stack = vec![dir_path.to_path_buf()];
267        while let Some(current) = stack.pop() {
268            let mut entries = Vec::new();
269            let mut read_dir = fs::read_dir(&current).await?;
270            while let Some(entry) = read_dir.next_entry().await? {
271                entries.push(entry);
272            }
273
274            for entry in entries {
275                let path = entry.path();
276                let path_str = path.to_string_lossy().to_string();
277
278                if path.is_dir() {
279                    stack.push(path);
280                } else if path.is_file() {
281                    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
282                    let path_str_check = path.to_string_lossy();
283                    if !name.ends_with(".xml")
284                        || (!name.contains(&rule.file_pattern)
285                            && !path_str_check.contains(&rule.file_pattern))
286                    {
287                        continue;
288                    }
289
290                    let parsed = match parse_xml(&path_str).await {
291                        Some(p) => p,
292                        None => continue,
293                    };
294                    let has_element_to_strip = parsed
295                        .as_object()
296                        .and_then(|o| {
297                            let root_key = o.keys().find(|k| *k != "?xml")?;
298                            let root_val = o.get(root_key)?.as_object()?;
299                            Some(
300                                root_key == &rule.root_to_strip
301                                    || root_val.contains_key(&rule.root_to_strip),
302                            )
303                        })
304                        .unwrap_or(false);
305                    if !has_element_to_strip {
306                        continue;
307                    }
308
309                    let wrap_xmlns = capture_xmlns_from_root(&parsed).unwrap_or_default();
310
311                    let stripped_xml = match strip_root_and_build_xml(&parsed, &rule.root_to_strip)
312                    {
313                        Some(xml) => xml,
314                        None => continue,
315                    };
316
317                    fs::write(&path, stripped_xml).await?;
318
319                    let file_stem = path
320                        .file_stem()
321                        .and_then(|s| s.to_str())
322                        .unwrap_or("output");
323                    let output_dir_name = file_stem.split('.').next().unwrap_or(file_stem);
324                    let parent = path.parent().unwrap_or(dir_path);
325                    let second_level_output = parent.join(output_dir_name);
326
327                    build_disassembled_files_unified(BuildDisassembledFilesOptions {
328                        file_path: &path_str,
329                        disassembled_path: second_level_output.to_str().unwrap_or("."),
330                        base_name: output_dir_name,
331                        post_purge: true,
332                        format,
333                        unique_id_elements: Some(&rule.unique_id_elements),
334                        strategy: "unique-id",
335                        decompose_rules: None,
336                    })
337                    .await?;
338
339                    if config.rules.is_empty() {
340                        let wrap_root = parsed
341                            .as_object()
342                            .and_then(|o| o.keys().find(|k| *k != "?xml").cloned())
343                            .unwrap_or_else(|| rule.wrap_root_element.clone());
344                        config.rules.push(MultiLevelRule {
345                            file_pattern: rule.file_pattern.clone(),
346                            root_to_strip: rule.root_to_strip.clone(),
347                            unique_id_elements: rule.unique_id_elements.clone(),
348                            path_segment: if rule.path_segment.is_empty() {
349                                path_segment_from_file_pattern(&rule.file_pattern)
350                            } else {
351                                rule.path_segment.clone()
352                            },
353                            // Persist document root (e.g. LoyaltyProgramSetup) so reassembly uses it as root with xmlns;
354                            // path_segment (e.g. programProcesses) is the inner wrapper in each file.
355                            wrap_root_element: wrap_root,
356                            wrap_xmlns: if rule.wrap_xmlns.is_empty() {
357                                wrap_xmlns
358                            } else {
359                                rule.wrap_xmlns.clone()
360                            },
361                        });
362                    } else if let Some(r) = config.rules.first_mut() {
363                        if r.wrap_xmlns.is_empty() {
364                            r.wrap_xmlns = wrap_xmlns;
365                        }
366                    }
367                }
368            }
369        }
370
371        if !config.rules.is_empty() {
372            save_multi_level_config(dir_path, &config).await?;
373        }
374
375        Ok(())
376    }
377}
378
379impl Default for DisassembleXmlFileHandler {
380    fn default() -> Self {
381        Self::new()
382    }
383}