xml_disassembler/handlers/
disassemble.rs1use crate::builders::build_disassembled_files_unified;
4use crate::multi_level::{
5 capture_xmlns_from_root, path_segment_from_file_pattern, save_multi_level_config,
6 strip_root_and_build_xml,
7};
8use crate::parsers::parse_xml;
9use crate::types::{BuildDisassembledFilesOptions, DecomposeRule, MultiLevelRule};
10use ignore::gitignore::GitignoreBuilder;
11use std::path::Path;
12use tokio::fs;
13
14pub struct DisassembleXmlFileHandler {
15 ign: Option<ignore::gitignore::Gitignore>,
16}
17
18impl DisassembleXmlFileHandler {
19 pub fn new() -> Self {
20 Self { ign: None }
21 }
22
23 async fn load_ignore_rules(&mut self, ignore_path: &str) {
24 let path = Path::new(ignore_path);
25 if path.exists() {
26 if let Ok(content) = fs::read_to_string(path).await {
27 let root = path.parent().unwrap_or(Path::new("."));
28 let mut builder = GitignoreBuilder::new(root);
29 for line in content.lines() {
30 let _ = builder.add_line(None, line);
31 }
32 if let Ok(gi) = builder.build() {
33 self.ign = Some(gi);
34 }
35 }
36 }
37 }
38
39 fn posix_path(path: &str) -> String {
40 path.replace('\\', "/")
41 }
42
43 fn is_xml_file(file_path: &str) -> bool {
44 file_path.to_lowercase().ends_with(".xml")
45 }
46
47 fn is_ignored(&self, path: &str) -> bool {
48 self.ign
49 .as_ref()
50 .map(|ign| ign.matched(path, false).is_ignore())
51 .unwrap_or(false)
52 }
53
54 #[allow(clippy::too_many_arguments)]
55 pub async fn disassemble(
56 &mut self,
57 file_path: &str,
58 unique_id_elements: Option<&str>,
59 strategy: Option<&str>,
60 pre_purge: bool,
61 post_purge: bool,
62 ignore_path: &str,
63 format: &str,
64 multi_level_rule: Option<&MultiLevelRule>,
65 decompose_rules: Option<&[DecomposeRule]>,
66 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
67 let strategy = strategy.unwrap_or("unique-id");
68 let strategy = if ["unique-id", "grouped-by-tag"].contains(&strategy) {
69 strategy
70 } else {
71 log::warn!(
72 "Unsupported strategy \"{}\", defaulting to \"unique-id\".",
73 strategy
74 );
75 "unique-id"
76 };
77
78 self.load_ignore_rules(ignore_path).await;
79
80 let path = Path::new(file_path);
81 let meta = fs::metadata(path).await?;
82 let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
83 let relative_path = path.strip_prefix(&cwd).unwrap_or(path).to_string_lossy();
84 let relative_path = Self::posix_path(&relative_path);
85
86 if meta.is_file() {
87 self.handle_file(
88 file_path,
89 &relative_path,
90 unique_id_elements,
91 strategy,
92 pre_purge,
93 post_purge,
94 format,
95 multi_level_rule,
96 decompose_rules,
97 )
98 .await?;
99 } else if meta.is_dir() {
100 self.handle_directory(
101 file_path,
102 unique_id_elements,
103 strategy,
104 pre_purge,
105 post_purge,
106 format,
107 multi_level_rule,
108 decompose_rules,
109 )
110 .await?;
111 }
112
113 Ok(())
114 }
115
116 #[allow(clippy::too_many_arguments)]
117 async fn handle_file(
118 &self,
119 file_path: &str,
120 relative_path: &str,
121 unique_id_elements: Option<&str>,
122 strategy: &str,
123 pre_purge: bool,
124 post_purge: bool,
125 format: &str,
126 multi_level_rule: Option<&MultiLevelRule>,
127 decompose_rules: Option<&[DecomposeRule]>,
128 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
129 let resolved = Path::new(file_path)
130 .canonicalize()
131 .unwrap_or_else(|_| Path::new(file_path).to_path_buf());
132 let resolved_str = resolved.to_string_lossy();
133
134 if !Self::is_xml_file(&resolved_str) {
135 log::error!(
136 "The file path provided is not an XML file: {}",
137 resolved_str
138 );
139 return Ok(());
140 }
141
142 if self.is_ignored(relative_path) {
143 log::warn!("File ignored by ignore rules: {}", resolved_str);
144 return Ok(());
145 }
146
147 let dir_path = resolved.parent().unwrap_or(Path::new("."));
148 self.process_file(
149 dir_path.to_str().unwrap_or("."),
150 strategy,
151 &resolved_str,
152 unique_id_elements,
153 pre_purge,
154 post_purge,
155 format,
156 multi_level_rule,
157 decompose_rules,
158 )
159 .await
160 }
161
162 #[allow(clippy::too_many_arguments)]
163 async fn handle_directory(
164 &self,
165 dir_path: &str,
166 unique_id_elements: Option<&str>,
167 strategy: &str,
168 pre_purge: bool,
169 post_purge: bool,
170 format: &str,
171 multi_level_rule: Option<&MultiLevelRule>,
172 decompose_rules: Option<&[DecomposeRule]>,
173 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
174 let mut entries = fs::read_dir(dir_path).await?;
175 let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
176
177 while let Some(entry) = entries.next_entry().await? {
178 let sub_path = entry.path();
179 let sub_file_path = sub_path.to_string_lossy();
180 let relative_sub = sub_path
181 .strip_prefix(&cwd)
182 .unwrap_or(&sub_path)
183 .to_string_lossy();
184 let relative_sub = Self::posix_path(&relative_sub);
185
186 if sub_path.is_file() && Self::is_xml_file(&sub_file_path) {
187 if self.is_ignored(&relative_sub) {
188 log::warn!("File ignored by ignore rules: {}", sub_file_path);
189 } else {
190 self.process_file(
191 dir_path,
192 strategy,
193 &sub_file_path,
194 unique_id_elements,
195 pre_purge,
196 post_purge,
197 format,
198 multi_level_rule,
199 decompose_rules,
200 )
201 .await?;
202 }
203 }
204 }
205 Ok(())
206 }
207
208 #[allow(clippy::too_many_arguments)]
209 async fn process_file(
210 &self,
211 dir_path: &str,
212 strategy: &str,
213 file_path: &str,
214 unique_id_elements: Option<&str>,
215 pre_purge: bool,
216 post_purge: bool,
217 format: &str,
218 multi_level_rule: Option<&MultiLevelRule>,
219 decompose_rules: Option<&[DecomposeRule]>,
220 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
221 log::debug!("Parsing file to disassemble: {}", file_path);
222
223 let file_name = Path::new(file_path)
224 .file_stem()
225 .and_then(|s| s.to_str())
226 .unwrap_or("output");
227 let base_name = file_name.split('.').next().unwrap_or(file_name);
228 let output_path = Path::new(dir_path).join(base_name);
229
230 if pre_purge && output_path.exists() {
231 fs::remove_dir_all(&output_path).await.ok();
232 }
233
234 build_disassembled_files_unified(BuildDisassembledFilesOptions {
235 file_path,
236 disassembled_path: output_path.to_str().unwrap_or("."),
237 base_name: file_name,
238 post_purge,
239 format,
240 unique_id_elements,
241 strategy,
242 decompose_rules,
243 })
244 .await?;
245
246 if let Some(rule) = multi_level_rule {
247 self.recursively_disassemble_multi_level(&output_path, rule, format)
248 .await?;
249 }
250
251 Ok(())
252 }
253
254 async fn recursively_disassemble_multi_level(
257 &self,
258 dir_path: &Path,
259 rule: &MultiLevelRule,
260 format: &str,
261 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
262 let mut config = crate::multi_level::load_multi_level_config(dir_path)
263 .await
264 .unwrap_or_default();
265
266 let mut stack = vec![dir_path.to_path_buf()];
267 while let Some(current) = stack.pop() {
268 let mut entries = Vec::new();
269 let mut read_dir = fs::read_dir(¤t).await?;
270 while let Some(entry) = read_dir.next_entry().await? {
271 entries.push(entry);
272 }
273
274 for entry in entries {
275 let path = entry.path();
276 let path_str = path.to_string_lossy().to_string();
277
278 if path.is_dir() {
279 stack.push(path);
280 } else if path.is_file() {
281 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
282 let path_str_check = path.to_string_lossy();
283 if !name.ends_with(".xml")
284 || (!name.contains(&rule.file_pattern)
285 && !path_str_check.contains(&rule.file_pattern))
286 {
287 continue;
288 }
289
290 let parsed = match parse_xml(&path_str).await {
291 Some(p) => p,
292 None => continue,
293 };
294 let has_element_to_strip = parsed
295 .as_object()
296 .and_then(|o| {
297 let root_key = o.keys().find(|k| *k != "?xml")?;
298 let root_val = o.get(root_key)?.as_object()?;
299 Some(
300 root_key == &rule.root_to_strip
301 || root_val.contains_key(&rule.root_to_strip),
302 )
303 })
304 .unwrap_or(false);
305 if !has_element_to_strip {
306 continue;
307 }
308
309 let wrap_xmlns = capture_xmlns_from_root(&parsed).unwrap_or_default();
310
311 let stripped_xml = match strip_root_and_build_xml(&parsed, &rule.root_to_strip)
312 {
313 Some(xml) => xml,
314 None => continue,
315 };
316
317 fs::write(&path, stripped_xml).await?;
318
319 let file_stem = path
320 .file_stem()
321 .and_then(|s| s.to_str())
322 .unwrap_or("output");
323 let output_dir_name = file_stem.split('.').next().unwrap_or(file_stem);
324 let parent = path.parent().unwrap_or(dir_path);
325 let second_level_output = parent.join(output_dir_name);
326
327 build_disassembled_files_unified(BuildDisassembledFilesOptions {
328 file_path: &path_str,
329 disassembled_path: second_level_output.to_str().unwrap_or("."),
330 base_name: output_dir_name,
331 post_purge: true,
332 format,
333 unique_id_elements: Some(&rule.unique_id_elements),
334 strategy: "unique-id",
335 decompose_rules: None,
336 })
337 .await?;
338
339 if config.rules.is_empty() {
340 let wrap_root = parsed
341 .as_object()
342 .and_then(|o| o.keys().find(|k| *k != "?xml").cloned())
343 .unwrap_or_else(|| rule.wrap_root_element.clone());
344 config.rules.push(MultiLevelRule {
345 file_pattern: rule.file_pattern.clone(),
346 root_to_strip: rule.root_to_strip.clone(),
347 unique_id_elements: rule.unique_id_elements.clone(),
348 path_segment: if rule.path_segment.is_empty() {
349 path_segment_from_file_pattern(&rule.file_pattern)
350 } else {
351 rule.path_segment.clone()
352 },
353 wrap_root_element: wrap_root,
356 wrap_xmlns: if rule.wrap_xmlns.is_empty() {
357 wrap_xmlns
358 } else {
359 rule.wrap_xmlns.clone()
360 },
361 });
362 } else if let Some(r) = config.rules.first_mut() {
363 if r.wrap_xmlns.is_empty() {
364 r.wrap_xmlns = wrap_xmlns;
365 }
366 }
367 }
368 }
369 }
370
371 if !config.rules.is_empty() {
372 save_multi_level_config(dir_path, &config).await?;
373 }
374
375 Ok(())
376 }
377}
378
379impl Default for DisassembleXmlFileHandler {
380 fn default() -> Self {
381 Self::new()
382 }
383}