Skip to main content

xml_disassembler/handlers/
disassemble.rs

1//! Disassemble XML file handler.
2
3use crate::builders::build_disassembled_files_unified;
4use crate::multi_level::{
5    capture_xmlns_from_root, path_segment_from_file_pattern, save_multi_level_config,
6    strip_root_and_build_xml,
7};
8use crate::parsers::parse_xml;
9use crate::types::{BuildDisassembledFilesOptions, MultiLevelRule};
10use ignore::gitignore::GitignoreBuilder;
11use std::path::Path;
12use tokio::fs;
13
14pub struct DisassembleXmlFileHandler {
15    ign: Option<ignore::gitignore::Gitignore>,
16}
17
18impl DisassembleXmlFileHandler {
19    pub fn new() -> Self {
20        Self { ign: None }
21    }
22
23    async fn load_ignore_rules(&mut self, ignore_path: &str) {
24        let path = Path::new(ignore_path);
25        if path.exists() {
26            if let Ok(content) = fs::read_to_string(path).await {
27                let root = path.parent().unwrap_or(Path::new("."));
28                let mut builder = GitignoreBuilder::new(root);
29                for line in content.lines() {
30                    let _ = builder.add_line(None, line);
31                }
32                if let Ok(gi) = builder.build() {
33                    self.ign = Some(gi);
34                }
35            }
36        }
37    }
38
39    fn posix_path(path: &str) -> String {
40        path.replace('\\', "/")
41    }
42
43    fn is_xml_file(file_path: &str) -> bool {
44        file_path.to_lowercase().ends_with(".xml")
45    }
46
47    fn is_ignored(&self, path: &str) -> bool {
48        self.ign
49            .as_ref()
50            .map(|ign| ign.matched(path, false).is_ignore())
51            .unwrap_or(false)
52    }
53
54    #[allow(clippy::too_many_arguments)]
55    pub async fn disassemble(
56        &mut self,
57        file_path: &str,
58        unique_id_elements: Option<&str>,
59        strategy: Option<&str>,
60        pre_purge: bool,
61        post_purge: bool,
62        ignore_path: &str,
63        format: &str,
64        multi_level_rule: Option<&MultiLevelRule>,
65    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
66        let strategy = strategy.unwrap_or("unique-id");
67        let strategy = if ["unique-id", "grouped-by-tag"].contains(&strategy) {
68            strategy
69        } else {
70            log::warn!(
71                "Unsupported strategy \"{}\", defaulting to \"unique-id\".",
72                strategy
73            );
74            "unique-id"
75        };
76
77        self.load_ignore_rules(ignore_path).await;
78
79        let path = Path::new(file_path);
80        let meta = fs::metadata(path).await?;
81        let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
82        let relative_path = path.strip_prefix(&cwd).unwrap_or(path).to_string_lossy();
83        let relative_path = Self::posix_path(&relative_path);
84
85        if meta.is_file() {
86            self.handle_file(
87                file_path,
88                &relative_path,
89                unique_id_elements,
90                strategy,
91                pre_purge,
92                post_purge,
93                format,
94                multi_level_rule,
95            )
96            .await?;
97        } else if meta.is_dir() {
98            self.handle_directory(
99                file_path,
100                unique_id_elements,
101                strategy,
102                pre_purge,
103                post_purge,
104                format,
105                multi_level_rule,
106            )
107            .await?;
108        }
109
110        Ok(())
111    }
112
113    #[allow(clippy::too_many_arguments)]
114    async fn handle_file(
115        &self,
116        file_path: &str,
117        relative_path: &str,
118        unique_id_elements: Option<&str>,
119        strategy: &str,
120        pre_purge: bool,
121        post_purge: bool,
122        format: &str,
123        multi_level_rule: Option<&MultiLevelRule>,
124    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
125        let resolved = Path::new(file_path)
126            .canonicalize()
127            .unwrap_or_else(|_| Path::new(file_path).to_path_buf());
128        let resolved_str = resolved.to_string_lossy();
129
130        if !Self::is_xml_file(&resolved_str) {
131            log::error!(
132                "The file path provided is not an XML file: {}",
133                resolved_str
134            );
135            return Ok(());
136        }
137
138        if self.is_ignored(relative_path) {
139            log::warn!("File ignored by ignore rules: {}", resolved_str);
140            return Ok(());
141        }
142
143        let dir_path = resolved.parent().unwrap_or(Path::new("."));
144        self.process_file(
145            dir_path.to_str().unwrap_or("."),
146            strategy,
147            &resolved_str,
148            unique_id_elements,
149            pre_purge,
150            post_purge,
151            format,
152            multi_level_rule,
153        )
154        .await
155    }
156
157    #[allow(clippy::too_many_arguments)]
158    async fn handle_directory(
159        &self,
160        dir_path: &str,
161        unique_id_elements: Option<&str>,
162        strategy: &str,
163        pre_purge: bool,
164        post_purge: bool,
165        format: &str,
166        multi_level_rule: Option<&MultiLevelRule>,
167    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
168        let mut entries = fs::read_dir(dir_path).await?;
169        let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
170
171        while let Some(entry) = entries.next_entry().await? {
172            let sub_path = entry.path();
173            let sub_file_path = sub_path.to_string_lossy();
174            let relative_sub = sub_path
175                .strip_prefix(&cwd)
176                .unwrap_or(&sub_path)
177                .to_string_lossy();
178            let relative_sub = Self::posix_path(&relative_sub);
179
180            if sub_path.is_file() && Self::is_xml_file(&sub_file_path) {
181                if self.is_ignored(&relative_sub) {
182                    log::warn!("File ignored by ignore rules: {}", sub_file_path);
183                } else {
184                    self.process_file(
185                        dir_path,
186                        strategy,
187                        &sub_file_path,
188                        unique_id_elements,
189                        pre_purge,
190                        post_purge,
191                        format,
192                        multi_level_rule,
193                    )
194                    .await?;
195                }
196            }
197        }
198        Ok(())
199    }
200
201    #[allow(clippy::too_many_arguments)]
202    async fn process_file(
203        &self,
204        dir_path: &str,
205        strategy: &str,
206        file_path: &str,
207        unique_id_elements: Option<&str>,
208        pre_purge: bool,
209        post_purge: bool,
210        format: &str,
211        multi_level_rule: Option<&MultiLevelRule>,
212    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
213        log::debug!("Parsing file to disassemble: {}", file_path);
214
215        let file_name = Path::new(file_path)
216            .file_stem()
217            .and_then(|s| s.to_str())
218            .unwrap_or("output");
219        let base_name = file_name.split('.').next().unwrap_or(file_name);
220        let output_path = Path::new(dir_path).join(base_name);
221
222        if pre_purge && output_path.exists() {
223            fs::remove_dir_all(&output_path).await.ok();
224        }
225
226        build_disassembled_files_unified(BuildDisassembledFilesOptions {
227            file_path,
228            disassembled_path: output_path.to_str().unwrap_or("."),
229            base_name: file_name,
230            post_purge,
231            format,
232            unique_id_elements,
233            strategy,
234        })
235        .await?;
236
237        if let Some(rule) = multi_level_rule {
238            self.recursively_disassemble_multi_level(&output_path, rule, format)
239                .await?;
240        }
241
242        Ok(())
243    }
244
245    /// Recursively walk the disassembly output; for XML files matching the rule's file_pattern,
246    /// strip the root and re-disassemble with the rule's unique_id_elements.
247    async fn recursively_disassemble_multi_level(
248        &self,
249        dir_path: &Path,
250        rule: &MultiLevelRule,
251        format: &str,
252    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
253        let mut config = crate::multi_level::load_multi_level_config(dir_path)
254            .await
255            .unwrap_or_default();
256
257        let mut stack = vec![dir_path.to_path_buf()];
258        while let Some(current) = stack.pop() {
259            let mut entries = Vec::new();
260            let mut read_dir = fs::read_dir(&current).await?;
261            while let Some(entry) = read_dir.next_entry().await? {
262                entries.push(entry);
263            }
264
265            for entry in entries {
266                let path = entry.path();
267                let path_str = path.to_string_lossy().to_string();
268
269                if path.is_dir() {
270                    stack.push(path);
271                } else if path.is_file() {
272                    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
273                    let path_str_check = path.to_string_lossy();
274                    if !name.ends_with(".xml")
275                        || (!name.contains(&rule.file_pattern)
276                            && !path_str_check.contains(&rule.file_pattern))
277                    {
278                        continue;
279                    }
280
281                    let parsed = match parse_xml(&path_str).await {
282                        Some(p) => p,
283                        None => continue,
284                    };
285                    let has_element_to_strip = parsed
286                        .as_object()
287                        .and_then(|o| {
288                            let root_key = o.keys().find(|k| *k != "?xml")?;
289                            let root_val = o.get(root_key)?.as_object()?;
290                            Some(
291                                root_key == &rule.root_to_strip
292                                    || root_val.contains_key(&rule.root_to_strip),
293                            )
294                        })
295                        .unwrap_or(false);
296                    if !has_element_to_strip {
297                        continue;
298                    }
299
300                    let wrap_xmlns = capture_xmlns_from_root(&parsed).unwrap_or_default();
301
302                    let stripped_xml = match strip_root_and_build_xml(&parsed, &rule.root_to_strip)
303                    {
304                        Some(xml) => xml,
305                        None => continue,
306                    };
307
308                    fs::write(&path, stripped_xml).await?;
309
310                    let file_stem = path
311                        .file_stem()
312                        .and_then(|s| s.to_str())
313                        .unwrap_or("output");
314                    let output_dir_name = file_stem.split('.').next().unwrap_or(file_stem);
315                    let parent = path.parent().unwrap_or(dir_path);
316                    let second_level_output = parent.join(output_dir_name);
317
318                    build_disassembled_files_unified(BuildDisassembledFilesOptions {
319                        file_path: &path_str,
320                        disassembled_path: second_level_output.to_str().unwrap_or("."),
321                        base_name: output_dir_name,
322                        post_purge: true,
323                        format,
324                        unique_id_elements: Some(&rule.unique_id_elements),
325                        strategy: "unique-id",
326                    })
327                    .await?;
328
329                    if config.rules.is_empty() {
330                        let wrap_root = parsed
331                            .as_object()
332                            .and_then(|o| o.keys().find(|k| *k != "?xml").cloned())
333                            .unwrap_or_else(|| rule.wrap_root_element.clone());
334                        config.rules.push(MultiLevelRule {
335                            file_pattern: rule.file_pattern.clone(),
336                            root_to_strip: rule.root_to_strip.clone(),
337                            unique_id_elements: rule.unique_id_elements.clone(),
338                            path_segment: if rule.path_segment.is_empty() {
339                                path_segment_from_file_pattern(&rule.file_pattern)
340                            } else {
341                                rule.path_segment.clone()
342                            },
343                            // Persist document root (e.g. LoyaltyProgramSetup) so reassembly uses it as root with xmlns;
344                            // path_segment (e.g. programProcesses) is the inner wrapper in each file.
345                            wrap_root_element: wrap_root,
346                            wrap_xmlns: if rule.wrap_xmlns.is_empty() {
347                                wrap_xmlns
348                            } else {
349                                rule.wrap_xmlns.clone()
350                            },
351                        });
352                    } else if let Some(r) = config.rules.first_mut() {
353                        if r.wrap_xmlns.is_empty() {
354                            r.wrap_xmlns = wrap_xmlns;
355                        }
356                    }
357                }
358            }
359        }
360
361        if !config.rules.is_empty() {
362            save_multi_level_config(dir_path, &config).await?;
363        }
364
365        Ok(())
366    }
367}
368
369impl Default for DisassembleXmlFileHandler {
370    fn default() -> Self {
371        Self::new()
372    }
373}