xml_disassembler/handlers/
disassemble.rs1use crate::builders::build_disassembled_files_unified;
4use crate::multi_level::{
5 capture_xmlns_from_root, path_segment_from_file_pattern, save_multi_level_config,
6 strip_root_and_build_xml,
7};
8use crate::parsers::parse_xml;
9use crate::types::{BuildDisassembledFilesOptions, DecomposeRule, MultiLevelRule};
10use crate::utils::normalize_path_unix;
11use ignore::gitignore::GitignoreBuilder;
12use std::path::Path;
13use tokio::fs;
14
15pub struct DisassembleXmlFileHandler {
16 ign: Option<ignore::gitignore::Gitignore>,
17}
18
19impl DisassembleXmlFileHandler {
20 pub fn new() -> Self {
21 Self { ign: None }
22 }
23
24 async fn load_ignore_rules(&mut self, ignore_path: &str) {
25 let path = Path::new(ignore_path);
26 let content = match fs::read_to_string(path).await {
27 Ok(c) => c,
28 Err(_) => return,
29 };
30 let root = path.parent().unwrap_or(Path::new("."));
31 let mut builder = GitignoreBuilder::new(root);
32 for line in content.lines() {
33 let _ = builder.add_line(None, line);
34 }
35 self.ign = builder.build().ok();
37 }
38
39 fn posix_path(path: &str) -> String {
40 path.replace('\\', "/")
41 }
42
43 fn is_xml_file(file_path: &str) -> bool {
44 file_path.to_lowercase().ends_with(".xml")
45 }
46
47 fn is_ignored(&self, path: &str) -> bool {
48 self.ign
49 .as_ref()
50 .map(|ign| ign.matched(path, false).is_ignore())
51 .unwrap_or(false)
52 }
53
54 #[allow(clippy::too_many_arguments)]
55 pub async fn disassemble(
56 &mut self,
57 file_path: &str,
58 unique_id_elements: Option<&str>,
59 strategy: Option<&str>,
60 pre_purge: bool,
61 post_purge: bool,
62 ignore_path: &str,
63 format: &str,
64 multi_level_rule: Option<&MultiLevelRule>,
65 decompose_rules: Option<&[DecomposeRule]>,
66 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
67 let strategy = strategy.unwrap_or("unique-id");
68 let strategy = if ["unique-id", "grouped-by-tag"].contains(&strategy) {
69 strategy
70 } else {
71 log::warn!(
72 "Unsupported strategy \"{}\", defaulting to \"unique-id\".",
73 strategy
74 );
75 "unique-id"
76 };
77
78 self.load_ignore_rules(ignore_path).await;
79
80 let path = Path::new(file_path);
81 let meta = fs::metadata(path).await?;
82 let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
83 let relative_path = path.strip_prefix(&cwd).unwrap_or(path).to_string_lossy();
84 let relative_path = Self::posix_path(&relative_path);
85
86 if meta.is_file() {
87 self.handle_file(
88 file_path,
89 &relative_path,
90 unique_id_elements,
91 strategy,
92 pre_purge,
93 post_purge,
94 format,
95 multi_level_rule,
96 decompose_rules,
97 )
98 .await?;
99 } else {
100 self.handle_directory(
103 file_path,
104 unique_id_elements,
105 strategy,
106 pre_purge,
107 post_purge,
108 format,
109 multi_level_rule,
110 decompose_rules,
111 )
112 .await?;
113 }
114
115 Ok(())
116 }
117
118 #[allow(clippy::too_many_arguments)]
119 async fn handle_file(
120 &self,
121 file_path: &str,
122 relative_path: &str,
123 unique_id_elements: Option<&str>,
124 strategy: &str,
125 pre_purge: bool,
126 post_purge: bool,
127 format: &str,
128 multi_level_rule: Option<&MultiLevelRule>,
129 decompose_rules: Option<&[DecomposeRule]>,
130 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
131 let resolved = Path::new(file_path)
132 .canonicalize()
133 .unwrap_or_else(|_| Path::new(file_path).to_path_buf());
134 let resolved_str = normalize_path_unix(&resolved.to_string_lossy());
135
136 if !Self::is_xml_file(&resolved_str) {
137 log::error!(
138 "The file path provided is not an XML file: {}",
139 resolved_str
140 );
141 return Ok(());
142 }
143
144 if self.is_ignored(relative_path) {
145 log::warn!("File ignored by ignore rules: {}", resolved_str);
146 return Ok(());
147 }
148
149 let dir_path = resolved.parent().unwrap_or(Path::new("."));
150 let dir_path_str = normalize_path_unix(&dir_path.to_string_lossy());
151 self.process_file(
152 &dir_path_str,
153 strategy,
154 &resolved_str,
155 unique_id_elements,
156 pre_purge,
157 post_purge,
158 format,
159 multi_level_rule,
160 decompose_rules,
161 )
162 .await
163 }
164
165 #[allow(clippy::too_many_arguments)]
166 async fn handle_directory(
167 &self,
168 dir_path: &str,
169 unique_id_elements: Option<&str>,
170 strategy: &str,
171 pre_purge: bool,
172 post_purge: bool,
173 format: &str,
174 multi_level_rule: Option<&MultiLevelRule>,
175 decompose_rules: Option<&[DecomposeRule]>,
176 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
177 let dir_path = normalize_path_unix(dir_path);
178 let mut entries = fs::read_dir(&dir_path).await?;
179 let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
180
181 while let Some(entry) = entries.next_entry().await? {
182 let sub_path = entry.path();
183 let sub_file_path = sub_path.to_string_lossy();
184 let relative_sub = sub_path
185 .strip_prefix(&cwd)
186 .unwrap_or(&sub_path)
187 .to_string_lossy();
188 let relative_sub = Self::posix_path(&relative_sub);
189
190 if !(sub_path.is_file() && Self::is_xml_file(&sub_file_path)) {
191 continue;
192 }
193 if self.is_ignored(&relative_sub) {
194 log::warn!("File ignored by ignore rules: {}", sub_file_path);
195 continue;
196 }
197 let sub_file_path_norm = normalize_path_unix(&sub_file_path);
198 self.process_file(
199 &dir_path,
200 strategy,
201 &sub_file_path_norm,
202 unique_id_elements,
203 pre_purge,
204 post_purge,
205 format,
206 multi_level_rule,
207 decompose_rules,
208 )
209 .await?;
210 }
211 Ok(())
212 }
213
214 #[allow(clippy::too_many_arguments)]
215 async fn process_file(
216 &self,
217 dir_path: &str,
218 strategy: &str,
219 file_path: &str,
220 unique_id_elements: Option<&str>,
221 pre_purge: bool,
222 post_purge: bool,
223 format: &str,
224 multi_level_rule: Option<&MultiLevelRule>,
225 decompose_rules: Option<&[DecomposeRule]>,
226 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
227 log::debug!("Parsing file to disassemble: {}", file_path);
228
229 let file_name = Path::new(file_path)
230 .file_stem()
231 .and_then(|s| s.to_str())
232 .unwrap_or("output");
233 let base_name = file_name.split('.').next().unwrap_or(file_name);
234 let output_path = Path::new(dir_path).join(base_name);
235
236 if pre_purge && output_path.exists() {
237 fs::remove_dir_all(&output_path).await.ok();
238 }
239
240 build_disassembled_files_unified(BuildDisassembledFilesOptions {
241 file_path,
242 disassembled_path: output_path.to_str().unwrap_or("."),
243 base_name: file_name,
244 post_purge,
245 format,
246 unique_id_elements,
247 strategy,
248 decompose_rules,
249 })
250 .await?;
251
252 if let Some(rule) = multi_level_rule {
253 self.recursively_disassemble_multi_level(&output_path, rule, format)
254 .await?;
255 }
256
257 Ok(())
258 }
259
260 async fn recursively_disassemble_multi_level(
263 &self,
264 dir_path: &Path,
265 rule: &MultiLevelRule,
266 format: &str,
267 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
268 let mut config = crate::multi_level::load_multi_level_config(dir_path)
269 .await
270 .unwrap_or_default();
271
272 let mut stack = vec![dir_path.to_path_buf()];
273 while let Some(current) = stack.pop() {
274 let mut entries = Vec::new();
275 let mut read_dir = fs::read_dir(¤t).await?;
276 while let Some(entry) = read_dir.next_entry().await? {
277 entries.push(entry);
278 }
279
280 for entry in entries {
281 let path = entry.path();
282 let path_str = path.to_string_lossy().to_string();
283
284 if path.is_dir() {
285 stack.push(path);
286 continue;
287 }
288 {
290 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
291 let path_str_check = path.to_string_lossy();
292 if !name.ends_with(".xml")
293 || (!name.contains(&rule.file_pattern)
294 && !path_str_check.contains(&rule.file_pattern))
295 {
296 continue;
297 }
298
299 let parsed = match parse_xml(&path_str).await {
300 Some(p) => p,
301 None => continue,
302 };
303 let has_element_to_strip = parsed
304 .as_object()
305 .and_then(|o| {
306 let root_key = o.keys().find(|k| *k != "?xml")?;
307 let root_val = o.get(root_key)?.as_object()?;
308 Some(
309 root_key == &rule.root_to_strip
310 || root_val.contains_key(&rule.root_to_strip),
311 )
312 })
313 .unwrap_or(false);
314 if !has_element_to_strip {
315 continue;
316 }
317
318 let wrap_xmlns = capture_xmlns_from_root(&parsed).unwrap_or_default();
319
320 let stripped_xml = match strip_root_and_build_xml(&parsed, &rule.root_to_strip)
321 {
322 Some(xml) => xml,
323 None => continue,
324 };
325
326 fs::write(&path, stripped_xml).await?;
327
328 let file_stem = path
329 .file_stem()
330 .and_then(|s| s.to_str())
331 .unwrap_or("output");
332 let output_dir_name = file_stem.split('.').next().unwrap_or(file_stem);
333 let parent = path.parent().unwrap_or(dir_path);
334 let second_level_output = parent.join(output_dir_name);
335
336 build_disassembled_files_unified(BuildDisassembledFilesOptions {
337 file_path: &path_str,
338 disassembled_path: second_level_output.to_str().unwrap_or("."),
339 base_name: output_dir_name,
340 post_purge: true,
341 format,
342 unique_id_elements: Some(&rule.unique_id_elements),
343 strategy: "unique-id",
344 decompose_rules: None,
345 })
346 .await?;
347
348 match config.rules.first_mut() {
349 None => {
350 let wrap_root = parsed
351 .as_object()
352 .and_then(|o| o.keys().find(|k| *k != "?xml").cloned())
353 .unwrap_or_else(|| rule.wrap_root_element.clone());
354 let path_segment = if rule.path_segment.is_empty() {
355 path_segment_from_file_pattern(&rule.file_pattern)
356 } else {
357 rule.path_segment.clone()
358 };
359 let stored_xmlns = if rule.wrap_xmlns.is_empty() {
360 wrap_xmlns
361 } else {
362 rule.wrap_xmlns.clone()
363 };
364 config.rules.push(MultiLevelRule {
365 file_pattern: rule.file_pattern.clone(),
366 root_to_strip: rule.root_to_strip.clone(),
367 unique_id_elements: rule.unique_id_elements.clone(),
368 path_segment,
369 wrap_root_element: wrap_root,
372 wrap_xmlns: stored_xmlns,
373 });
374 }
375 Some(r) if r.wrap_xmlns.is_empty() => {
376 r.wrap_xmlns = wrap_xmlns;
377 }
378 Some(_) => {}
379 }
380 }
381 }
382 }
383
384 if !config.rules.is_empty() {
385 save_multi_level_config(dir_path, &config).await?;
386 }
387
388 Ok(())
389 }
390}
391
392impl Default for DisassembleXmlFileHandler {
393 fn default() -> Self {
394 Self::new()
395 }
396}
397
398#[cfg(test)]
399mod tests {
400 use super::*;
401
402 #[test]
403 #[allow(clippy::default_constructed_unit_structs)]
404 fn disassemble_handler_default_equals_new() {
405 let _ = DisassembleXmlFileHandler::default();
406 }
407
408 #[test]
409 fn is_xml_file_matches_case_insensitively() {
410 assert!(DisassembleXmlFileHandler::is_xml_file("foo.xml"));
411 assert!(DisassembleXmlFileHandler::is_xml_file("BAR.XML"));
412 assert!(!DisassembleXmlFileHandler::is_xml_file("foo.txt"));
413 }
414
415 #[test]
416 fn posix_path_converts_backslashes() {
417 assert_eq!(
418 DisassembleXmlFileHandler::posix_path(r"C:\Users\name\file.xml"),
419 "C:/Users/name/file.xml"
420 );
421 }
422
423 #[tokio::test]
424 async fn load_ignore_rules_noop_when_path_missing() {
425 let mut handler = DisassembleXmlFileHandler::new();
426 handler
427 .load_ignore_rules("/definitely/does/not/exist/.ignore")
428 .await;
429 assert!(handler.ign.is_none());
430 }
431
432 #[tokio::test]
433 async fn load_ignore_rules_builds_matcher() {
434 let temp = tempfile::tempdir().unwrap();
435 let path = temp.path().join(".ignore");
436 tokio::fs::write(&path, "*.xml\n").await.unwrap();
437 let mut handler = DisassembleXmlFileHandler::new();
438 handler.load_ignore_rules(path.to_str().unwrap()).await;
439 assert!(handler.ign.is_some());
440 assert!(handler.is_ignored("file.xml"));
441 assert!(!handler.is_ignored("file.txt"));
442 }
443
444 #[test]
445 fn is_ignored_default_false_without_rules() {
446 let handler = DisassembleXmlFileHandler::new();
447 assert!(!handler.is_ignored("some/path.xml"));
448 }
449}