xml_disassembler/handlers/
disassemble.rs1use crate::builders::build_disassembled_files_unified;
4use crate::multi_level::{
5 capture_xmlns_from_root, path_segment_from_file_pattern, save_multi_level_config,
6 strip_root_and_build_xml,
7};
8use crate::parsers::parse_xml;
9use crate::types::{BuildDisassembledFilesOptions, DecomposeRule, MultiLevelRule};
10use crate::utils::normalize_path_unix;
11use ignore::gitignore::GitignoreBuilder;
12use std::path::Path;
13use tokio::fs;
14
15pub struct DisassembleXmlFileHandler {
16 ign: Option<ignore::gitignore::Gitignore>,
17}
18
19impl DisassembleXmlFileHandler {
20 pub fn new() -> Self {
21 Self { ign: None }
22 }
23
24 async fn load_ignore_rules(&mut self, ignore_path: &str) {
25 let path = Path::new(ignore_path);
26 if path.exists() {
27 if let Ok(content) = fs::read_to_string(path).await {
28 let root = path.parent().unwrap_or(Path::new("."));
29 let mut builder = GitignoreBuilder::new(root);
30 for line in content.lines() {
31 let _ = builder.add_line(None, line);
32 }
33 if let Ok(gi) = builder.build() {
34 self.ign = Some(gi);
35 }
36 }
37 }
38 }
39
40 fn posix_path(path: &str) -> String {
41 path.replace('\\', "/")
42 }
43
44 fn is_xml_file(file_path: &str) -> bool {
45 file_path.to_lowercase().ends_with(".xml")
46 }
47
48 fn is_ignored(&self, path: &str) -> bool {
49 self.ign
50 .as_ref()
51 .map(|ign| ign.matched(path, false).is_ignore())
52 .unwrap_or(false)
53 }
54
55 #[allow(clippy::too_many_arguments)]
56 pub async fn disassemble(
57 &mut self,
58 file_path: &str,
59 unique_id_elements: Option<&str>,
60 strategy: Option<&str>,
61 pre_purge: bool,
62 post_purge: bool,
63 ignore_path: &str,
64 format: &str,
65 multi_level_rule: Option<&MultiLevelRule>,
66 decompose_rules: Option<&[DecomposeRule]>,
67 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
68 let strategy = strategy.unwrap_or("unique-id");
69 let strategy = if ["unique-id", "grouped-by-tag"].contains(&strategy) {
70 strategy
71 } else {
72 log::warn!(
73 "Unsupported strategy \"{}\", defaulting to \"unique-id\".",
74 strategy
75 );
76 "unique-id"
77 };
78
79 self.load_ignore_rules(ignore_path).await;
80
81 let path = Path::new(file_path);
82 let meta = fs::metadata(path).await?;
83 let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
84 let relative_path = path.strip_prefix(&cwd).unwrap_or(path).to_string_lossy();
85 let relative_path = Self::posix_path(&relative_path);
86
87 if meta.is_file() {
88 self.handle_file(
89 file_path,
90 &relative_path,
91 unique_id_elements,
92 strategy,
93 pre_purge,
94 post_purge,
95 format,
96 multi_level_rule,
97 decompose_rules,
98 )
99 .await?;
100 } else if meta.is_dir() {
101 self.handle_directory(
102 file_path,
103 unique_id_elements,
104 strategy,
105 pre_purge,
106 post_purge,
107 format,
108 multi_level_rule,
109 decompose_rules,
110 )
111 .await?;
112 }
113
114 Ok(())
115 }
116
117 #[allow(clippy::too_many_arguments)]
118 async fn handle_file(
119 &self,
120 file_path: &str,
121 relative_path: &str,
122 unique_id_elements: Option<&str>,
123 strategy: &str,
124 pre_purge: bool,
125 post_purge: bool,
126 format: &str,
127 multi_level_rule: Option<&MultiLevelRule>,
128 decompose_rules: Option<&[DecomposeRule]>,
129 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
130 let resolved = Path::new(file_path)
131 .canonicalize()
132 .unwrap_or_else(|_| Path::new(file_path).to_path_buf());
133 let resolved_str = normalize_path_unix(&resolved.to_string_lossy());
134
135 if !Self::is_xml_file(&resolved_str) {
136 log::error!(
137 "The file path provided is not an XML file: {}",
138 resolved_str
139 );
140 return Ok(());
141 }
142
143 if self.is_ignored(relative_path) {
144 log::warn!("File ignored by ignore rules: {}", resolved_str);
145 return Ok(());
146 }
147
148 let dir_path = resolved.parent().unwrap_or(Path::new("."));
149 let dir_path_str = normalize_path_unix(&dir_path.to_string_lossy());
150 self.process_file(
151 &dir_path_str,
152 strategy,
153 &resolved_str,
154 unique_id_elements,
155 pre_purge,
156 post_purge,
157 format,
158 multi_level_rule,
159 decompose_rules,
160 )
161 .await
162 }
163
164 #[allow(clippy::too_many_arguments)]
165 async fn handle_directory(
166 &self,
167 dir_path: &str,
168 unique_id_elements: Option<&str>,
169 strategy: &str,
170 pre_purge: bool,
171 post_purge: bool,
172 format: &str,
173 multi_level_rule: Option<&MultiLevelRule>,
174 decompose_rules: Option<&[DecomposeRule]>,
175 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
176 let dir_path = normalize_path_unix(dir_path);
177 let mut entries = fs::read_dir(&dir_path).await?;
178 let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
179
180 while let Some(entry) = entries.next_entry().await? {
181 let sub_path = entry.path();
182 let sub_file_path = sub_path.to_string_lossy();
183 let relative_sub = sub_path
184 .strip_prefix(&cwd)
185 .unwrap_or(&sub_path)
186 .to_string_lossy();
187 let relative_sub = Self::posix_path(&relative_sub);
188
189 if sub_path.is_file() && Self::is_xml_file(&sub_file_path) {
190 if self.is_ignored(&relative_sub) {
191 log::warn!("File ignored by ignore rules: {}", sub_file_path);
192 } else {
193 let sub_file_path_norm = normalize_path_unix(&sub_file_path);
194 self.process_file(
195 &dir_path,
196 strategy,
197 &sub_file_path_norm,
198 unique_id_elements,
199 pre_purge,
200 post_purge,
201 format,
202 multi_level_rule,
203 decompose_rules,
204 )
205 .await?;
206 }
207 }
208 }
209 Ok(())
210 }
211
212 #[allow(clippy::too_many_arguments)]
213 async fn process_file(
214 &self,
215 dir_path: &str,
216 strategy: &str,
217 file_path: &str,
218 unique_id_elements: Option<&str>,
219 pre_purge: bool,
220 post_purge: bool,
221 format: &str,
222 multi_level_rule: Option<&MultiLevelRule>,
223 decompose_rules: Option<&[DecomposeRule]>,
224 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
225 log::debug!("Parsing file to disassemble: {}", file_path);
226
227 let file_name = Path::new(file_path)
228 .file_stem()
229 .and_then(|s| s.to_str())
230 .unwrap_or("output");
231 let base_name = file_name.split('.').next().unwrap_or(file_name);
232 let output_path = Path::new(dir_path).join(base_name);
233
234 if pre_purge && output_path.exists() {
235 fs::remove_dir_all(&output_path).await.ok();
236 }
237
238 build_disassembled_files_unified(BuildDisassembledFilesOptions {
239 file_path,
240 disassembled_path: output_path.to_str().unwrap_or("."),
241 base_name: file_name,
242 post_purge,
243 format,
244 unique_id_elements,
245 strategy,
246 decompose_rules,
247 })
248 .await?;
249
250 if let Some(rule) = multi_level_rule {
251 self.recursively_disassemble_multi_level(&output_path, rule, format)
252 .await?;
253 }
254
255 Ok(())
256 }
257
258 async fn recursively_disassemble_multi_level(
261 &self,
262 dir_path: &Path,
263 rule: &MultiLevelRule,
264 format: &str,
265 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
266 let mut config = crate::multi_level::load_multi_level_config(dir_path)
267 .await
268 .unwrap_or_default();
269
270 let mut stack = vec![dir_path.to_path_buf()];
271 while let Some(current) = stack.pop() {
272 let mut entries = Vec::new();
273 let mut read_dir = fs::read_dir(¤t).await?;
274 while let Some(entry) = read_dir.next_entry().await? {
275 entries.push(entry);
276 }
277
278 for entry in entries {
279 let path = entry.path();
280 let path_str = path.to_string_lossy().to_string();
281
282 if path.is_dir() {
283 stack.push(path);
284 } else if path.is_file() {
285 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
286 let path_str_check = path.to_string_lossy();
287 if !name.ends_with(".xml")
288 || (!name.contains(&rule.file_pattern)
289 && !path_str_check.contains(&rule.file_pattern))
290 {
291 continue;
292 }
293
294 let parsed = match parse_xml(&path_str).await {
295 Some(p) => p,
296 None => continue,
297 };
298 let has_element_to_strip = parsed
299 .as_object()
300 .and_then(|o| {
301 let root_key = o.keys().find(|k| *k != "?xml")?;
302 let root_val = o.get(root_key)?.as_object()?;
303 Some(
304 root_key == &rule.root_to_strip
305 || root_val.contains_key(&rule.root_to_strip),
306 )
307 })
308 .unwrap_or(false);
309 if !has_element_to_strip {
310 continue;
311 }
312
313 let wrap_xmlns = capture_xmlns_from_root(&parsed).unwrap_or_default();
314
315 let stripped_xml = match strip_root_and_build_xml(&parsed, &rule.root_to_strip)
316 {
317 Some(xml) => xml,
318 None => continue,
319 };
320
321 fs::write(&path, stripped_xml).await?;
322
323 let file_stem = path
324 .file_stem()
325 .and_then(|s| s.to_str())
326 .unwrap_or("output");
327 let output_dir_name = file_stem.split('.').next().unwrap_or(file_stem);
328 let parent = path.parent().unwrap_or(dir_path);
329 let second_level_output = parent.join(output_dir_name);
330
331 build_disassembled_files_unified(BuildDisassembledFilesOptions {
332 file_path: &path_str,
333 disassembled_path: second_level_output.to_str().unwrap_or("."),
334 base_name: output_dir_name,
335 post_purge: true,
336 format,
337 unique_id_elements: Some(&rule.unique_id_elements),
338 strategy: "unique-id",
339 decompose_rules: None,
340 })
341 .await?;
342
343 if config.rules.is_empty() {
344 let wrap_root = parsed
345 .as_object()
346 .and_then(|o| o.keys().find(|k| *k != "?xml").cloned())
347 .unwrap_or_else(|| rule.wrap_root_element.clone());
348 config.rules.push(MultiLevelRule {
349 file_pattern: rule.file_pattern.clone(),
350 root_to_strip: rule.root_to_strip.clone(),
351 unique_id_elements: rule.unique_id_elements.clone(),
352 path_segment: if rule.path_segment.is_empty() {
353 path_segment_from_file_pattern(&rule.file_pattern)
354 } else {
355 rule.path_segment.clone()
356 },
357 wrap_root_element: wrap_root,
360 wrap_xmlns: if rule.wrap_xmlns.is_empty() {
361 wrap_xmlns
362 } else {
363 rule.wrap_xmlns.clone()
364 },
365 });
366 } else if let Some(r) = config.rules.first_mut() {
367 if r.wrap_xmlns.is_empty() {
368 r.wrap_xmlns = wrap_xmlns;
369 }
370 }
371 }
372 }
373 }
374
375 if !config.rules.is_empty() {
376 save_multi_level_config(dir_path, &config).await?;
377 }
378
379 Ok(())
380 }
381}
382
383impl Default for DisassembleXmlFileHandler {
384 fn default() -> Self {
385 Self::new()
386 }
387}
388
389#[cfg(test)]
390mod tests {
391 use super::*;
392
393 #[test]
394 #[allow(clippy::default_constructed_unit_structs)]
395 fn disassemble_handler_default_equals_new() {
396 let _ = DisassembleXmlFileHandler::default();
397 }
398}