config_disassembler/xml/handlers/
disassemble.rs1use crate::xml::builders::build_disassembled_files_unified;
4use crate::xml::multi_level::{
5 capture_xmlns_from_root, path_segment_from_file_pattern, save_multi_level_config,
6 strip_root_and_build_xml,
7};
8use crate::xml::parsers::parse_xml;
9use crate::xml::types::{BuildDisassembledFilesOptions, DecomposeRule, MultiLevelRule};
10use crate::xml::utils::normalize_path_unix;
11use ignore::gitignore::GitignoreBuilder;
12use std::path::Path;
13use tokio::fs;
14
15pub struct DisassembleXmlFileHandler {
16 ign: Option<ignore::gitignore::Gitignore>,
17}
18
19impl DisassembleXmlFileHandler {
20 pub fn new() -> Self {
21 Self { ign: None }
22 }
23
24 async fn load_ignore_rules(&mut self, ignore_path: &str) {
25 let path = Path::new(ignore_path);
26 let content = match fs::read_to_string(path).await {
27 Ok(c) => c,
28 Err(_) => return,
29 };
30 let root = path.parent().unwrap_or(Path::new("."));
31 let mut builder = GitignoreBuilder::new(root);
32 for line in content.lines() {
33 let _ = builder.add_line(None, line);
34 }
35 self.ign = builder.build().ok();
37 }
38
39 fn posix_path(path: &str) -> String {
40 path.replace('\\', "/")
41 }
42
43 fn is_xml_file(file_path: &str) -> bool {
44 file_path.to_lowercase().ends_with(".xml")
45 }
46
47 fn is_ignored(&self, path: &str) -> bool {
48 self.ign
49 .as_ref()
50 .map(|ign| ign.matched(path, false).is_ignore())
51 .unwrap_or(false)
52 }
53
54 #[allow(clippy::too_many_arguments)]
55 pub async fn disassemble(
56 &mut self,
57 file_path: &str,
58 unique_id_elements: Option<&str>,
59 strategy: Option<&str>,
60 pre_purge: bool,
61 post_purge: bool,
62 ignore_path: &str,
63 format: &str,
64 multi_level_rules: Option<&[MultiLevelRule]>,
65 decompose_rules: Option<&[DecomposeRule]>,
66 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
67 let strategy = strategy.unwrap_or("unique-id");
68 let strategy = if ["unique-id", "grouped-by-tag"].contains(&strategy) {
69 strategy
70 } else {
71 log::warn!(
72 "Unsupported strategy \"{}\", defaulting to \"unique-id\".",
73 strategy
74 );
75 "unique-id"
76 };
77
78 self.load_ignore_rules(ignore_path).await;
79
80 let path = Path::new(file_path);
81 let meta = fs::metadata(path).await?;
82 let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
83 let relative_path = path.strip_prefix(&cwd).unwrap_or(path).to_string_lossy();
84 let relative_path = Self::posix_path(&relative_path);
85
86 let multi_level_rules = multi_level_rules.filter(|rules| !rules.is_empty());
88
89 if meta.is_file() {
90 self.handle_file(
91 file_path,
92 &relative_path,
93 unique_id_elements,
94 strategy,
95 pre_purge,
96 post_purge,
97 format,
98 multi_level_rules,
99 decompose_rules,
100 )
101 .await?;
102 } else {
103 self.handle_directory(
106 file_path,
107 unique_id_elements,
108 strategy,
109 pre_purge,
110 post_purge,
111 format,
112 multi_level_rules,
113 decompose_rules,
114 )
115 .await?;
116 }
117
118 Ok(())
119 }
120
121 #[allow(clippy::too_many_arguments)]
122 async fn handle_file(
123 &self,
124 file_path: &str,
125 relative_path: &str,
126 unique_id_elements: Option<&str>,
127 strategy: &str,
128 pre_purge: bool,
129 post_purge: bool,
130 format: &str,
131 multi_level_rules: Option<&[MultiLevelRule]>,
132 decompose_rules: Option<&[DecomposeRule]>,
133 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
134 let resolved = Path::new(file_path)
135 .canonicalize()
136 .unwrap_or_else(|_| Path::new(file_path).to_path_buf());
137 let resolved_str = normalize_path_unix(&resolved.to_string_lossy());
138
139 if !Self::is_xml_file(&resolved_str) {
140 log::error!(
141 "The file path provided is not an XML file: {}",
142 resolved_str
143 );
144 return Ok(());
145 }
146
147 if self.is_ignored(relative_path) {
148 log::warn!("File ignored by ignore rules: {}", resolved_str);
149 return Ok(());
150 }
151
152 let dir_path = resolved.parent().unwrap_or(Path::new("."));
153 let dir_path_str = normalize_path_unix(&dir_path.to_string_lossy());
154 self.process_file(
155 &dir_path_str,
156 strategy,
157 &resolved_str,
158 unique_id_elements,
159 pre_purge,
160 post_purge,
161 format,
162 multi_level_rules,
163 decompose_rules,
164 )
165 .await
166 }
167
168 #[allow(clippy::too_many_arguments)]
169 async fn handle_directory(
170 &self,
171 dir_path: &str,
172 unique_id_elements: Option<&str>,
173 strategy: &str,
174 pre_purge: bool,
175 post_purge: bool,
176 format: &str,
177 multi_level_rules: Option<&[MultiLevelRule]>,
178 decompose_rules: Option<&[DecomposeRule]>,
179 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
180 let dir_path = normalize_path_unix(dir_path);
181 let mut entries = fs::read_dir(&dir_path).await?;
182 let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
183
184 while let Some(entry) = entries.next_entry().await? {
185 let sub_path = entry.path();
186 let sub_file_path = sub_path.to_string_lossy();
187 let relative_sub = sub_path
188 .strip_prefix(&cwd)
189 .unwrap_or(&sub_path)
190 .to_string_lossy();
191 let relative_sub = Self::posix_path(&relative_sub);
192
193 if !(sub_path.is_file() && Self::is_xml_file(&sub_file_path)) {
194 continue;
195 }
196 if self.is_ignored(&relative_sub) {
197 log::warn!("File ignored by ignore rules: {}", sub_file_path);
198 continue;
199 }
200 let sub_file_path_norm = normalize_path_unix(&sub_file_path);
201 self.process_file(
202 &dir_path,
203 strategy,
204 &sub_file_path_norm,
205 unique_id_elements,
206 pre_purge,
207 post_purge,
208 format,
209 multi_level_rules,
210 decompose_rules,
211 )
212 .await?;
213 }
214 Ok(())
215 }
216
217 #[allow(clippy::too_many_arguments)]
218 async fn process_file(
219 &self,
220 dir_path: &str,
221 strategy: &str,
222 file_path: &str,
223 unique_id_elements: Option<&str>,
224 pre_purge: bool,
225 post_purge: bool,
226 format: &str,
227 multi_level_rules: Option<&[MultiLevelRule]>,
228 decompose_rules: Option<&[DecomposeRule]>,
229 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
230 log::debug!("Parsing file to disassemble: {}", file_path);
231
232 let file_name = Path::new(file_path)
233 .file_stem()
234 .and_then(|s| s.to_str())
235 .unwrap_or("output");
236 let base_name = file_name.split('.').next().unwrap_or(file_name);
237 let output_path = Path::new(dir_path).join(base_name);
238
239 if pre_purge && output_path.exists() {
240 fs::remove_dir_all(&output_path).await.ok();
241 }
242
243 build_disassembled_files_unified(BuildDisassembledFilesOptions {
244 file_path,
245 disassembled_path: output_path.to_str().unwrap_or("."),
246 base_name: file_name,
247 post_purge,
248 format,
249 unique_id_elements,
250 strategy,
251 decompose_rules,
252 })
253 .await?;
254
255 if let Some(rules) = multi_level_rules {
259 for rule in rules {
260 self.recursively_disassemble_multi_level(&output_path, rule, format)
261 .await?;
262 }
263 }
264
265 Ok(())
266 }
267
268 async fn recursively_disassemble_multi_level(
271 &self,
272 dir_path: &Path,
273 rule: &MultiLevelRule,
274 format: &str,
275 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
276 let mut config = crate::xml::multi_level::load_multi_level_config(dir_path)
277 .await
278 .unwrap_or_default();
279
280 let mut stack = vec![dir_path.to_path_buf()];
281 while let Some(current) = stack.pop() {
282 let mut entries = Vec::new();
283 let mut read_dir = fs::read_dir(¤t).await?;
284 while let Some(entry) = read_dir.next_entry().await? {
285 entries.push(entry);
286 }
287
288 for entry in entries {
289 let path = entry.path();
290 let path_str = path.to_string_lossy().to_string();
291
292 if path.is_dir() {
293 stack.push(path);
294 continue;
295 }
296 {
298 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
299 let path_str_check = path.to_string_lossy();
300 if !name.ends_with(".xml")
301 || (!name.contains(&rule.file_pattern)
302 && !path_str_check.contains(&rule.file_pattern))
303 {
304 continue;
305 }
306
307 let parsed = match parse_xml(&path_str).await {
308 Some(p) => p,
309 None => continue,
310 };
311 let has_element_to_strip = parsed
312 .as_object()
313 .and_then(|o| {
314 let root_key = o.keys().find(|k| *k != "?xml")?;
315 let root_val = o.get(root_key)?.as_object()?;
316 Some(
317 root_key == &rule.root_to_strip
318 || root_val.contains_key(&rule.root_to_strip),
319 )
320 })
321 .unwrap_or(false);
322 if !has_element_to_strip {
323 continue;
324 }
325
326 let wrap_xmlns = capture_xmlns_from_root(&parsed).unwrap_or_default();
327
328 let stripped_xml = match strip_root_and_build_xml(&parsed, &rule.root_to_strip)
329 {
330 Some(xml) => xml,
331 None => continue,
332 };
333
334 fs::write(&path, stripped_xml).await?;
335
336 let file_stem = path
337 .file_stem()
338 .and_then(|s| s.to_str())
339 .unwrap_or("output");
340 let output_dir_name = file_stem.split('.').next().unwrap_or(file_stem);
341 let parent = path.parent().unwrap_or(dir_path);
342 let second_level_output = parent.join(output_dir_name);
343
344 build_disassembled_files_unified(BuildDisassembledFilesOptions {
345 file_path: &path_str,
346 disassembled_path: second_level_output.to_str().unwrap_or("."),
347 base_name: output_dir_name,
348 post_purge: true,
349 format,
350 unique_id_elements: Some(&rule.unique_id_elements),
351 strategy: "unique-id",
352 decompose_rules: None,
353 })
354 .await?;
355
356 let existing_idx = config.rules.iter().position(|r| {
360 r.file_pattern == rule.file_pattern && r.root_to_strip == rule.root_to_strip
361 });
362 match existing_idx {
363 None => {
364 let wrap_root = parsed
365 .as_object()
366 .and_then(|o| o.keys().find(|k| *k != "?xml").cloned())
367 .unwrap_or_else(|| rule.wrap_root_element.clone());
368 let path_segment = if rule.path_segment.is_empty() {
369 path_segment_from_file_pattern(&rule.file_pattern)
370 } else {
371 rule.path_segment.clone()
372 };
373 let stored_xmlns = if rule.wrap_xmlns.is_empty() {
374 wrap_xmlns
375 } else {
376 rule.wrap_xmlns.clone()
377 };
378 config.rules.push(MultiLevelRule {
379 file_pattern: rule.file_pattern.clone(),
380 root_to_strip: rule.root_to_strip.clone(),
381 unique_id_elements: rule.unique_id_elements.clone(),
382 path_segment,
383 wrap_root_element: wrap_root,
386 wrap_xmlns: stored_xmlns,
387 });
388 }
389 Some(idx) => {
390 if config.rules[idx].wrap_xmlns.is_empty() {
393 config.rules[idx].wrap_xmlns = wrap_xmlns;
394 }
395 }
396 }
397 }
398 }
399 }
400
401 if !config.rules.is_empty() {
402 save_multi_level_config(dir_path, &config).await?;
403 }
404
405 Ok(())
406 }
407}
408
409impl Default for DisassembleXmlFileHandler {
410 fn default() -> Self {
411 Self::new()
412 }
413}
414
415#[cfg(test)]
416mod tests {
417 use super::*;
418
419 #[test]
420 #[allow(clippy::default_constructed_unit_structs)]
421 fn disassemble_handler_default_equals_new() {
422 let _ = DisassembleXmlFileHandler::default();
423 }
424
425 #[test]
426 fn is_xml_file_matches_case_insensitively() {
427 assert!(DisassembleXmlFileHandler::is_xml_file("foo.xml"));
428 assert!(DisassembleXmlFileHandler::is_xml_file("BAR.XML"));
429 assert!(!DisassembleXmlFileHandler::is_xml_file("foo.txt"));
430 }
431
432 #[test]
433 fn posix_path_converts_backslashes() {
434 assert_eq!(
435 DisassembleXmlFileHandler::posix_path(r"C:\Users\name\file.xml"),
436 "C:/Users/name/file.xml"
437 );
438 }
439
440 #[tokio::test]
441 async fn load_ignore_rules_noop_when_path_missing() {
442 let mut handler = DisassembleXmlFileHandler::new();
443 handler
444 .load_ignore_rules("/definitely/does/not/exist/.ignore")
445 .await;
446 assert!(handler.ign.is_none());
447 }
448
449 #[tokio::test]
450 async fn load_ignore_rules_builds_matcher() {
451 let temp = tempfile::tempdir().unwrap();
452 let path = temp.path().join(".ignore");
453 tokio::fs::write(&path, "*.xml\n").await.unwrap();
454 let mut handler = DisassembleXmlFileHandler::new();
455 handler.load_ignore_rules(path.to_str().unwrap()).await;
456 assert!(handler.ign.is_some());
457 assert!(handler.is_ignored("file.xml"));
458 assert!(!handler.is_ignored("file.txt"));
459 }
460
461 #[test]
462 fn is_ignored_default_false_without_rules() {
463 let handler = DisassembleXmlFileHandler::new();
464 assert!(!handler.is_ignored("some/path.xml"));
465 }
466}