Skip to main content

xml_disassembler/handlers/
disassemble.rs

1//! Disassemble XML file handler.
2
3use crate::builders::build_disassembled_files_unified;
4use crate::multi_level::{
5    capture_xmlns_from_root, path_segment_from_file_pattern, save_multi_level_config,
6    strip_root_and_build_xml,
7};
8use crate::parsers::parse_xml;
9use crate::types::{BuildDisassembledFilesOptions, DecomposeRule, MultiLevelRule};
10use crate::utils::normalize_path_unix;
11use ignore::gitignore::GitignoreBuilder;
12use std::path::Path;
13use tokio::fs;
14
15pub struct DisassembleXmlFileHandler {
16    ign: Option<ignore::gitignore::Gitignore>,
17}
18
19impl DisassembleXmlFileHandler {
20    pub fn new() -> Self {
21        Self { ign: None }
22    }
23
24    async fn load_ignore_rules(&mut self, ignore_path: &str) {
25        let path = Path::new(ignore_path);
26        if path.exists() {
27            if let Ok(content) = fs::read_to_string(path).await {
28                let root = path.parent().unwrap_or(Path::new("."));
29                let mut builder = GitignoreBuilder::new(root);
30                for line in content.lines() {
31                    let _ = builder.add_line(None, line);
32                }
33                if let Ok(gi) = builder.build() {
34                    self.ign = Some(gi);
35                }
36            }
37        }
38    }
39
40    fn posix_path(path: &str) -> String {
41        path.replace('\\', "/")
42    }
43
44    fn is_xml_file(file_path: &str) -> bool {
45        file_path.to_lowercase().ends_with(".xml")
46    }
47
48    fn is_ignored(&self, path: &str) -> bool {
49        self.ign
50            .as_ref()
51            .map(|ign| ign.matched(path, false).is_ignore())
52            .unwrap_or(false)
53    }
54
55    #[allow(clippy::too_many_arguments)]
56    pub async fn disassemble(
57        &mut self,
58        file_path: &str,
59        unique_id_elements: Option<&str>,
60        strategy: Option<&str>,
61        pre_purge: bool,
62        post_purge: bool,
63        ignore_path: &str,
64        format: &str,
65        multi_level_rule: Option<&MultiLevelRule>,
66        decompose_rules: Option<&[DecomposeRule]>,
67    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
68        let strategy = strategy.unwrap_or("unique-id");
69        let strategy = if ["unique-id", "grouped-by-tag"].contains(&strategy) {
70            strategy
71        } else {
72            log::warn!(
73                "Unsupported strategy \"{}\", defaulting to \"unique-id\".",
74                strategy
75            );
76            "unique-id"
77        };
78
79        self.load_ignore_rules(ignore_path).await;
80
81        let path = Path::new(file_path);
82        let meta = fs::metadata(path).await?;
83        let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
84        let relative_path = path.strip_prefix(&cwd).unwrap_or(path).to_string_lossy();
85        let relative_path = Self::posix_path(&relative_path);
86
87        if meta.is_file() {
88            self.handle_file(
89                file_path,
90                &relative_path,
91                unique_id_elements,
92                strategy,
93                pre_purge,
94                post_purge,
95                format,
96                multi_level_rule,
97                decompose_rules,
98            )
99            .await?;
100        } else if meta.is_dir() {
101            self.handle_directory(
102                file_path,
103                unique_id_elements,
104                strategy,
105                pre_purge,
106                post_purge,
107                format,
108                multi_level_rule,
109                decompose_rules,
110            )
111            .await?;
112        }
113
114        Ok(())
115    }
116
117    #[allow(clippy::too_many_arguments)]
118    async fn handle_file(
119        &self,
120        file_path: &str,
121        relative_path: &str,
122        unique_id_elements: Option<&str>,
123        strategy: &str,
124        pre_purge: bool,
125        post_purge: bool,
126        format: &str,
127        multi_level_rule: Option<&MultiLevelRule>,
128        decompose_rules: Option<&[DecomposeRule]>,
129    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
130        let resolved = Path::new(file_path)
131            .canonicalize()
132            .unwrap_or_else(|_| Path::new(file_path).to_path_buf());
133        let resolved_str = normalize_path_unix(&resolved.to_string_lossy());
134
135        if !Self::is_xml_file(&resolved_str) {
136            log::error!(
137                "The file path provided is not an XML file: {}",
138                resolved_str
139            );
140            return Ok(());
141        }
142
143        if self.is_ignored(relative_path) {
144            log::warn!("File ignored by ignore rules: {}", resolved_str);
145            return Ok(());
146        }
147
148        let dir_path = resolved.parent().unwrap_or(Path::new("."));
149        let dir_path_str = normalize_path_unix(&dir_path.to_string_lossy());
150        self.process_file(
151            &dir_path_str,
152            strategy,
153            &resolved_str,
154            unique_id_elements,
155            pre_purge,
156            post_purge,
157            format,
158            multi_level_rule,
159            decompose_rules,
160        )
161        .await
162    }
163
164    #[allow(clippy::too_many_arguments)]
165    async fn handle_directory(
166        &self,
167        dir_path: &str,
168        unique_id_elements: Option<&str>,
169        strategy: &str,
170        pre_purge: bool,
171        post_purge: bool,
172        format: &str,
173        multi_level_rule: Option<&MultiLevelRule>,
174        decompose_rules: Option<&[DecomposeRule]>,
175    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
176        let dir_path = normalize_path_unix(dir_path);
177        let mut entries = fs::read_dir(&dir_path).await?;
178        let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
179
180        while let Some(entry) = entries.next_entry().await? {
181            let sub_path = entry.path();
182            let sub_file_path = sub_path.to_string_lossy();
183            let relative_sub = sub_path
184                .strip_prefix(&cwd)
185                .unwrap_or(&sub_path)
186                .to_string_lossy();
187            let relative_sub = Self::posix_path(&relative_sub);
188
189            if sub_path.is_file() && Self::is_xml_file(&sub_file_path) {
190                if self.is_ignored(&relative_sub) {
191                    log::warn!("File ignored by ignore rules: {}", sub_file_path);
192                } else {
193                    let sub_file_path_norm = normalize_path_unix(&sub_file_path);
194                    self.process_file(
195                        &dir_path,
196                        strategy,
197                        &sub_file_path_norm,
198                        unique_id_elements,
199                        pre_purge,
200                        post_purge,
201                        format,
202                        multi_level_rule,
203                        decompose_rules,
204                    )
205                    .await?;
206                }
207            }
208        }
209        Ok(())
210    }
211
212    #[allow(clippy::too_many_arguments)]
213    async fn process_file(
214        &self,
215        dir_path: &str,
216        strategy: &str,
217        file_path: &str,
218        unique_id_elements: Option<&str>,
219        pre_purge: bool,
220        post_purge: bool,
221        format: &str,
222        multi_level_rule: Option<&MultiLevelRule>,
223        decompose_rules: Option<&[DecomposeRule]>,
224    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
225        log::debug!("Parsing file to disassemble: {}", file_path);
226
227        let file_name = Path::new(file_path)
228            .file_stem()
229            .and_then(|s| s.to_str())
230            .unwrap_or("output");
231        let base_name = file_name.split('.').next().unwrap_or(file_name);
232        let output_path = Path::new(dir_path).join(base_name);
233
234        if pre_purge && output_path.exists() {
235            fs::remove_dir_all(&output_path).await.ok();
236        }
237
238        build_disassembled_files_unified(BuildDisassembledFilesOptions {
239            file_path,
240            disassembled_path: output_path.to_str().unwrap_or("."),
241            base_name: file_name,
242            post_purge,
243            format,
244            unique_id_elements,
245            strategy,
246            decompose_rules,
247        })
248        .await?;
249
250        if let Some(rule) = multi_level_rule {
251            self.recursively_disassemble_multi_level(&output_path, rule, format)
252                .await?;
253        }
254
255        Ok(())
256    }
257
258    /// Recursively walk the disassembly output; for XML files matching the rule's file_pattern,
259    /// strip the root and re-disassemble with the rule's unique_id_elements.
260    async fn recursively_disassemble_multi_level(
261        &self,
262        dir_path: &Path,
263        rule: &MultiLevelRule,
264        format: &str,
265    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
266        let mut config = crate::multi_level::load_multi_level_config(dir_path)
267            .await
268            .unwrap_or_default();
269
270        let mut stack = vec![dir_path.to_path_buf()];
271        while let Some(current) = stack.pop() {
272            let mut entries = Vec::new();
273            let mut read_dir = fs::read_dir(&current).await?;
274            while let Some(entry) = read_dir.next_entry().await? {
275                entries.push(entry);
276            }
277
278            for entry in entries {
279                let path = entry.path();
280                let path_str = path.to_string_lossy().to_string();
281
282                if path.is_dir() {
283                    stack.push(path);
284                } else if path.is_file() {
285                    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
286                    let path_str_check = path.to_string_lossy();
287                    if !name.ends_with(".xml")
288                        || (!name.contains(&rule.file_pattern)
289                            && !path_str_check.contains(&rule.file_pattern))
290                    {
291                        continue;
292                    }
293
294                    let parsed = match parse_xml(&path_str).await {
295                        Some(p) => p,
296                        None => continue,
297                    };
298                    let has_element_to_strip = parsed
299                        .as_object()
300                        .and_then(|o| {
301                            let root_key = o.keys().find(|k| *k != "?xml")?;
302                            let root_val = o.get(root_key)?.as_object()?;
303                            Some(
304                                root_key == &rule.root_to_strip
305                                    || root_val.contains_key(&rule.root_to_strip),
306                            )
307                        })
308                        .unwrap_or(false);
309                    if !has_element_to_strip {
310                        continue;
311                    }
312
313                    let wrap_xmlns = capture_xmlns_from_root(&parsed).unwrap_or_default();
314
315                    let stripped_xml = match strip_root_and_build_xml(&parsed, &rule.root_to_strip)
316                    {
317                        Some(xml) => xml,
318                        None => continue,
319                    };
320
321                    fs::write(&path, stripped_xml).await?;
322
323                    let file_stem = path
324                        .file_stem()
325                        .and_then(|s| s.to_str())
326                        .unwrap_or("output");
327                    let output_dir_name = file_stem.split('.').next().unwrap_or(file_stem);
328                    let parent = path.parent().unwrap_or(dir_path);
329                    let second_level_output = parent.join(output_dir_name);
330
331                    build_disassembled_files_unified(BuildDisassembledFilesOptions {
332                        file_path: &path_str,
333                        disassembled_path: second_level_output.to_str().unwrap_or("."),
334                        base_name: output_dir_name,
335                        post_purge: true,
336                        format,
337                        unique_id_elements: Some(&rule.unique_id_elements),
338                        strategy: "unique-id",
339                        decompose_rules: None,
340                    })
341                    .await?;
342
343                    if config.rules.is_empty() {
344                        let wrap_root = parsed
345                            .as_object()
346                            .and_then(|o| o.keys().find(|k| *k != "?xml").cloned())
347                            .unwrap_or_else(|| rule.wrap_root_element.clone());
348                        config.rules.push(MultiLevelRule {
349                            file_pattern: rule.file_pattern.clone(),
350                            root_to_strip: rule.root_to_strip.clone(),
351                            unique_id_elements: rule.unique_id_elements.clone(),
352                            path_segment: if rule.path_segment.is_empty() {
353                                path_segment_from_file_pattern(&rule.file_pattern)
354                            } else {
355                                rule.path_segment.clone()
356                            },
357                            // Persist document root (e.g. LoyaltyProgramSetup) so reassembly uses it as root with xmlns;
358                            // path_segment (e.g. programProcesses) is the inner wrapper in each file.
359                            wrap_root_element: wrap_root,
360                            wrap_xmlns: if rule.wrap_xmlns.is_empty() {
361                                wrap_xmlns
362                            } else {
363                                rule.wrap_xmlns.clone()
364                            },
365                        });
366                    } else if let Some(r) = config.rules.first_mut() {
367                        if r.wrap_xmlns.is_empty() {
368                            r.wrap_xmlns = wrap_xmlns;
369                        }
370                    }
371                }
372            }
373        }
374
375        if !config.rules.is_empty() {
376            save_multi_level_config(dir_path, &config).await?;
377        }
378
379        Ok(())
380    }
381}
382
383impl Default for DisassembleXmlFileHandler {
384    fn default() -> Self {
385        Self::new()
386    }
387}
388
389#[cfg(test)]
390mod tests {
391    use super::*;
392
393    #[test]
394    #[allow(clippy::default_constructed_unit_structs)]
395    fn disassemble_handler_default_equals_new() {
396        let _ = DisassembleXmlFileHandler::default();
397    }
398}