xml_disassembler/handlers/
disassemble.rs1use crate::builders::build_disassembled_files_unified;
4use crate::multi_level::{
5 capture_xmlns_from_root, path_segment_from_file_pattern, save_multi_level_config,
6 strip_root_and_build_xml,
7};
8use crate::parsers::parse_xml;
9use crate::types::{BuildDisassembledFilesOptions, MultiLevelRule};
10use ignore::gitignore::GitignoreBuilder;
11use std::path::Path;
12use tokio::fs;
13
14pub struct DisassembleXmlFileHandler {
15 ign: Option<ignore::gitignore::Gitignore>,
16}
17
18impl DisassembleXmlFileHandler {
19 pub fn new() -> Self {
20 Self { ign: None }
21 }
22
23 async fn load_ignore_rules(&mut self, ignore_path: &str) {
24 let path = Path::new(ignore_path);
25 if path.exists() {
26 if let Ok(content) = fs::read_to_string(path).await {
27 let root = path.parent().unwrap_or(Path::new("."));
28 let mut builder = GitignoreBuilder::new(root);
29 for line in content.lines() {
30 let _ = builder.add_line(None, line);
31 }
32 if let Ok(gi) = builder.build() {
33 self.ign = Some(gi);
34 }
35 }
36 }
37 }
38
39 fn posix_path(path: &str) -> String {
40 path.replace('\\', "/")
41 }
42
43 fn is_xml_file(file_path: &str) -> bool {
44 file_path.to_lowercase().ends_with(".xml")
45 }
46
47 fn is_ignored(&self, path: &str) -> bool {
48 self.ign
49 .as_ref()
50 .map(|ign| ign.matched(path, false).is_ignore())
51 .unwrap_or(false)
52 }
53
54 #[allow(clippy::too_many_arguments)]
55 pub async fn disassemble(
56 &mut self,
57 file_path: &str,
58 unique_id_elements: Option<&str>,
59 strategy: Option<&str>,
60 pre_purge: bool,
61 post_purge: bool,
62 ignore_path: &str,
63 format: &str,
64 multi_level_rule: Option<&MultiLevelRule>,
65 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
66 let strategy = strategy.unwrap_or("unique-id");
67 let strategy = if ["unique-id", "grouped-by-tag"].contains(&strategy) {
68 strategy
69 } else {
70 log::warn!(
71 "Unsupported strategy \"{}\", defaulting to \"unique-id\".",
72 strategy
73 );
74 "unique-id"
75 };
76
77 self.load_ignore_rules(ignore_path).await;
78
79 let path = Path::new(file_path);
80 let meta = fs::metadata(path).await?;
81 let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
82 let relative_path = path.strip_prefix(&cwd).unwrap_or(path).to_string_lossy();
83 let relative_path = Self::posix_path(&relative_path);
84
85 if meta.is_file() {
86 self.handle_file(
87 file_path,
88 &relative_path,
89 unique_id_elements,
90 strategy,
91 pre_purge,
92 post_purge,
93 format,
94 multi_level_rule,
95 )
96 .await?;
97 } else if meta.is_dir() {
98 self.handle_directory(
99 file_path,
100 unique_id_elements,
101 strategy,
102 pre_purge,
103 post_purge,
104 format,
105 multi_level_rule,
106 )
107 .await?;
108 }
109
110 Ok(())
111 }
112
113 #[allow(clippy::too_many_arguments)]
114 async fn handle_file(
115 &self,
116 file_path: &str,
117 relative_path: &str,
118 unique_id_elements: Option<&str>,
119 strategy: &str,
120 pre_purge: bool,
121 post_purge: bool,
122 format: &str,
123 multi_level_rule: Option<&MultiLevelRule>,
124 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
125 let resolved = Path::new(file_path)
126 .canonicalize()
127 .unwrap_or_else(|_| Path::new(file_path).to_path_buf());
128 let resolved_str = resolved.to_string_lossy();
129
130 if !Self::is_xml_file(&resolved_str) {
131 log::error!(
132 "The file path provided is not an XML file: {}",
133 resolved_str
134 );
135 return Ok(());
136 }
137
138 if self.is_ignored(relative_path) {
139 log::warn!("File ignored by ignore rules: {}", resolved_str);
140 return Ok(());
141 }
142
143 let dir_path = resolved.parent().unwrap_or(Path::new("."));
144 self.process_file(
145 dir_path.to_str().unwrap_or("."),
146 strategy,
147 &resolved_str,
148 unique_id_elements,
149 pre_purge,
150 post_purge,
151 format,
152 multi_level_rule,
153 )
154 .await
155 }
156
157 #[allow(clippy::too_many_arguments)]
158 async fn handle_directory(
159 &self,
160 dir_path: &str,
161 unique_id_elements: Option<&str>,
162 strategy: &str,
163 pre_purge: bool,
164 post_purge: bool,
165 format: &str,
166 multi_level_rule: Option<&MultiLevelRule>,
167 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
168 let mut entries = fs::read_dir(dir_path).await?;
169 let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
170
171 while let Some(entry) = entries.next_entry().await? {
172 let sub_path = entry.path();
173 let sub_file_path = sub_path.to_string_lossy();
174 let relative_sub = sub_path
175 .strip_prefix(&cwd)
176 .unwrap_or(&sub_path)
177 .to_string_lossy();
178 let relative_sub = Self::posix_path(&relative_sub);
179
180 if sub_path.is_file() && Self::is_xml_file(&sub_file_path) {
181 if self.is_ignored(&relative_sub) {
182 log::warn!("File ignored by ignore rules: {}", sub_file_path);
183 } else {
184 self.process_file(
185 dir_path,
186 strategy,
187 &sub_file_path,
188 unique_id_elements,
189 pre_purge,
190 post_purge,
191 format,
192 multi_level_rule,
193 )
194 .await?;
195 }
196 }
197 }
198 Ok(())
199 }
200
201 #[allow(clippy::too_many_arguments)]
202 async fn process_file(
203 &self,
204 dir_path: &str,
205 strategy: &str,
206 file_path: &str,
207 unique_id_elements: Option<&str>,
208 pre_purge: bool,
209 post_purge: bool,
210 format: &str,
211 multi_level_rule: Option<&MultiLevelRule>,
212 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
213 log::debug!("Parsing file to disassemble: {}", file_path);
214
215 let file_name = Path::new(file_path)
216 .file_stem()
217 .and_then(|s| s.to_str())
218 .unwrap_or("output");
219 let base_name = file_name.split('.').next().unwrap_or(file_name);
220 let output_path = Path::new(dir_path).join(base_name);
221
222 if pre_purge && output_path.exists() {
223 fs::remove_dir_all(&output_path).await.ok();
224 }
225
226 build_disassembled_files_unified(BuildDisassembledFilesOptions {
227 file_path,
228 disassembled_path: output_path.to_str().unwrap_or("."),
229 base_name: file_name,
230 post_purge,
231 format,
232 unique_id_elements,
233 strategy,
234 })
235 .await?;
236
237 if let Some(rule) = multi_level_rule {
238 self.recursively_disassemble_multi_level(&output_path, rule, format)
239 .await?;
240 }
241
242 Ok(())
243 }
244
245 async fn recursively_disassemble_multi_level(
248 &self,
249 dir_path: &Path,
250 rule: &MultiLevelRule,
251 format: &str,
252 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
253 let mut config = crate::multi_level::load_multi_level_config(dir_path)
254 .await
255 .unwrap_or_default();
256
257 let mut stack = vec![dir_path.to_path_buf()];
258 while let Some(current) = stack.pop() {
259 let mut entries = Vec::new();
260 let mut read_dir = fs::read_dir(¤t).await?;
261 while let Some(entry) = read_dir.next_entry().await? {
262 entries.push(entry);
263 }
264
265 for entry in entries {
266 let path = entry.path();
267 let path_str = path.to_string_lossy().to_string();
268
269 if path.is_dir() {
270 stack.push(path);
271 } else if path.is_file() {
272 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
273 let path_str_check = path.to_string_lossy();
274 if !name.ends_with(".xml")
275 || (!name.contains(&rule.file_pattern)
276 && !path_str_check.contains(&rule.file_pattern))
277 {
278 continue;
279 }
280
281 let parsed = match parse_xml(&path_str).await {
282 Some(p) => p,
283 None => continue,
284 };
285 let has_element_to_strip = parsed
286 .as_object()
287 .and_then(|o| {
288 let root_key = o.keys().find(|k| *k != "?xml")?;
289 let root_val = o.get(root_key)?.as_object()?;
290 Some(
291 root_key == &rule.root_to_strip
292 || root_val.contains_key(&rule.root_to_strip),
293 )
294 })
295 .unwrap_or(false);
296 if !has_element_to_strip {
297 continue;
298 }
299
300 let wrap_xmlns = capture_xmlns_from_root(&parsed).unwrap_or_default();
301
302 let stripped_xml = match strip_root_and_build_xml(&parsed, &rule.root_to_strip)
303 {
304 Some(xml) => xml,
305 None => continue,
306 };
307
308 fs::write(&path, stripped_xml).await?;
309
310 let file_stem = path
311 .file_stem()
312 .and_then(|s| s.to_str())
313 .unwrap_or("output");
314 let output_dir_name = file_stem.split('.').next().unwrap_or(file_stem);
315 let parent = path.parent().unwrap_or(dir_path);
316 let second_level_output = parent.join(output_dir_name);
317
318 build_disassembled_files_unified(BuildDisassembledFilesOptions {
319 file_path: &path_str,
320 disassembled_path: second_level_output.to_str().unwrap_or("."),
321 base_name: output_dir_name,
322 post_purge: true,
323 format,
324 unique_id_elements: Some(&rule.unique_id_elements),
325 strategy: "unique-id",
326 })
327 .await?;
328
329 if config.rules.is_empty() {
330 let wrap_root = parsed
331 .as_object()
332 .and_then(|o| o.keys().find(|k| *k != "?xml").cloned())
333 .unwrap_or_else(|| rule.wrap_root_element.clone());
334 config.rules.push(MultiLevelRule {
335 file_pattern: rule.file_pattern.clone(),
336 root_to_strip: rule.root_to_strip.clone(),
337 unique_id_elements: rule.unique_id_elements.clone(),
338 path_segment: if rule.path_segment.is_empty() {
339 path_segment_from_file_pattern(&rule.file_pattern)
340 } else {
341 rule.path_segment.clone()
342 },
343 wrap_root_element: wrap_root,
346 wrap_xmlns: if rule.wrap_xmlns.is_empty() {
347 wrap_xmlns
348 } else {
349 rule.wrap_xmlns.clone()
350 },
351 });
352 } else if let Some(r) = config.rules.first_mut() {
353 if r.wrap_xmlns.is_empty() {
354 r.wrap_xmlns = wrap_xmlns;
355 }
356 }
357 }
358 }
359 }
360
361 if !config.rules.is_empty() {
362 save_multi_level_config(dir_path, &config).await?;
363 }
364
365 Ok(())
366 }
367}
368
369impl Default for DisassembleXmlFileHandler {
370 fn default() -> Self {
371 Self::new()
372 }
373}