xml_disassembler/handlers/
reassemble.rs1use crate::builders::{build_xml_string, merge_xml_elements, reorder_root_keys};
4use crate::multi_level::{ensure_segment_files_structure, load_multi_level_config};
5use crate::parsers::parse_to_xml_object;
6use crate::types::XmlElement;
7use crate::utils::normalize_path_unix;
8use serde_json::{Map, Value};
9use std::future::Future;
10use std::path::Path;
11use std::pin::Pin;
12use tokio::fs;
13
14fn strip_xmlns_from_value(v: Value) -> Value {
16 let obj = match v.as_object() {
17 Some(o) => o,
18 None => return v,
19 };
20 let mut out = Map::new();
21 for (k, val) in obj {
22 if k != "@xmlns" {
23 out.insert(k.clone(), val.clone());
24 }
25 }
26 Value::Object(out)
27}
28
29type ProcessDirFuture<'a> = Pin<
30 Box<
31 dyn Future<Output = Result<Vec<XmlElement>, Box<dyn std::error::Error + Send + Sync>>>
32 + Send
33 + 'a,
34 >,
35>;
36
37pub struct ReassembleXmlFileHandler;
38
39impl ReassembleXmlFileHandler {
40 pub fn new() -> Self {
41 Self
42 }
43
44 pub async fn reassemble(
45 &self,
46 file_path: &str,
47 file_extension: Option<&str>,
48 post_purge: bool,
49 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
50 let file_path = normalize_path_unix(file_path);
51 if !self.validate_directory(&file_path).await? {
52 return Ok(());
53 }
54
55 let path = Path::new(&file_path);
56 let config = load_multi_level_config(path).await;
57 if let Some(ref config) = config {
58 for rule in &config.rules {
59 if rule.path_segment.is_empty() {
60 continue;
61 }
62 let segment_path = path.join(&rule.path_segment);
63 if !segment_path.is_dir() {
64 continue;
65 }
66 let mut entries = Vec::new();
67 let mut read_dir = fs::read_dir(&segment_path).await?;
68 while let Some(entry) = read_dir.next_entry().await? {
69 entries.push(entry);
70 }
71 entries.sort_by_key(|e| e.file_name());
73 for entry in entries {
74 let process_path = entry.path();
75 if !process_path.is_dir() {
76 continue;
77 }
78 let process_path_str = normalize_path_unix(&process_path.to_string_lossy());
79 let mut sub_entries = Vec::new();
80 let mut sub_read = fs::read_dir(&process_path).await?;
81 while let Some(e) = sub_read.next_entry().await? {
82 sub_entries.push(e);
83 }
84 sub_entries.sort_by_key(|e| e.file_name());
86 for sub_entry in sub_entries {
87 let sub_path = sub_entry.path();
88 if sub_path.is_dir() {
89 let sub_path_str = normalize_path_unix(&sub_path.to_string_lossy());
90 self.reassemble_plain(&sub_path_str, Some("xml"), true, None)
91 .await?;
92 }
93 }
94 self.reassemble_plain(&process_path_str, Some("xml"), true, None)
95 .await?;
96 }
97 ensure_segment_files_structure(
98 &segment_path,
99 &rule.wrap_root_element,
100 &rule.path_segment,
101 &rule.wrap_xmlns,
102 )
103 .await?;
104 }
105 }
106
107 let base_segment = config.as_ref().and_then(|c| {
108 c.rules.first().map(|r| {
109 (
110 file_path.clone(),
111 r.path_segment.clone(),
112 true, )
114 })
115 });
116 let post_purge_final = post_purge || config.is_some();
118 self.reassemble_plain(&file_path, file_extension, post_purge_final, base_segment)
119 .await
120 }
121
122 async fn reassemble_plain(
127 &self,
128 file_path: &str,
129 file_extension: Option<&str>,
130 post_purge: bool,
131 base_segment: Option<(String, String, bool)>,
132 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
133 let file_path = normalize_path_unix(file_path);
134 log::debug!("Parsing directory to reassemble: {}", file_path);
135 let parsed_objects = self
136 .process_files_in_directory(file_path.to_string(), base_segment)
137 .await?;
138
139 if parsed_objects.is_empty() {
140 log::error!(
141 "No files under {} were parsed successfully. A reassembled XML file was not created.",
142 file_path
143 );
144 return Ok(());
145 }
146
147 let mut merged = match merge_xml_elements(&parsed_objects) {
148 Some(m) => m,
149 None => return Ok(()),
150 };
151
152 let key_order_path = Path::new(&file_path).join(".key_order.json");
154 if key_order_path.exists() {
155 if let Ok(bytes) = fs::read(&key_order_path).await {
156 if let Ok(key_order) = serde_json::from_slice::<Vec<String>>(&bytes) {
157 if let Some(reordered) = reorder_root_keys(&merged, &key_order) {
158 merged = reordered;
159 }
160 }
161 }
162 }
163
164 let final_xml = build_xml_string(&merged);
165 let output_path = self.get_output_path(&file_path, file_extension);
166
167 fs::write(&output_path, final_xml).await?;
168
169 if post_purge {
170 fs::remove_dir_all(file_path).await.ok();
171 }
172
173 Ok(())
174 }
175
176 fn process_files_in_directory<'a>(
177 &'a self,
178 dir_path: String,
179 base_segment: Option<(String, String, bool)>,
180 ) -> ProcessDirFuture<'a> {
181 Box::pin(async move {
182 let mut parsed = Vec::new();
183 let mut entries = Vec::new();
184 let mut read_dir = fs::read_dir(&dir_path).await?;
185 while let Some(entry) = read_dir.next_entry().await? {
186 entries.push(entry);
187 }
188 entries.sort_by(|a, b| {
190 let a_name = a.file_name().to_string_lossy().to_string();
191 let b_name = b.file_name().to_string_lossy().to_string();
192 a_name.cmp(&b_name)
193 });
194
195 let is_base = base_segment
196 .as_ref()
197 .map(|(base, _, _)| dir_path == *base)
198 .unwrap_or(false);
199 let segment_name = base_segment.as_ref().map(|(_, name, _)| name.as_str());
200 let extract_inner = base_segment.as_ref().map(|(_, _, e)| *e).unwrap_or(false);
201
202 for entry in entries {
203 let path = entry.path();
204 let file_path = normalize_path_unix(&path.to_string_lossy()).to_string();
205
206 if path.is_file() {
207 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
208 if !name.starts_with('.') && self.is_parsable_file(name) {
209 if let Some(parsed_obj) = parse_to_xml_object(&file_path).await {
210 parsed.push(parsed_obj);
211 }
212 }
213 } else if path.is_dir() {
214 let dir_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
215 if is_base && segment_name == Some(dir_name) {
216 let segment_element = self
217 .collect_segment_as_array(
218 &file_path,
219 segment_name.unwrap(),
220 extract_inner,
221 )
222 .await?;
223 if let Some(el) = segment_element {
224 parsed.push(el);
225 }
226 } else {
227 let sub_parsed = self
228 .process_files_in_directory(file_path, base_segment.clone())
229 .await?;
230 parsed.extend(sub_parsed);
231 }
232 }
233 }
234
235 Ok(parsed)
236 })
237 }
238
239 async fn collect_segment_as_array(
243 &self,
244 segment_dir: &str,
245 segment_name: &str,
246 extract_inner: bool,
247 ) -> Result<Option<XmlElement>, Box<dyn std::error::Error + Send + Sync>> {
248 let mut xml_files = Vec::new();
249 let mut read_dir = fs::read_dir(segment_dir).await?;
250 while let Some(entry) = read_dir.next_entry().await? {
251 let path = entry.path();
252 if path.is_file() {
253 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
254 if !name.starts_with('.') && self.is_parsable_file(name) {
255 xml_files.push(normalize_path_unix(&path.to_string_lossy()));
256 }
257 }
258 }
259 xml_files.sort();
260
261 let mut root_contents = Vec::new();
262 let mut first_xml: Option<(String, Option<Value>)> = None;
263 for file_path in &xml_files {
264 let parsed = match parse_to_xml_object(file_path).await {
265 Some(p) => p,
266 None => continue,
267 };
268 let obj = match parsed.as_object() {
269 Some(o) => o,
270 None => continue,
271 };
272 let root_key = match obj.keys().find(|k| *k != "?xml").cloned() {
273 Some(k) => k,
274 None => continue,
275 };
276 let root_val = obj
277 .get(&root_key)
278 .cloned()
279 .unwrap_or(Value::Object(serde_json::Map::new()));
280 let mut content = if extract_inner {
281 root_val
282 .get(segment_name)
283 .cloned()
284 .unwrap_or_else(|| Value::Object(serde_json::Map::new()))
285 } else {
286 root_val
287 };
288 if extract_inner {
290 content = strip_xmlns_from_value(content);
291 }
292 root_contents.push(content);
293 if first_xml.is_none() {
294 first_xml = Some((root_key, obj.get("?xml").cloned()));
295 }
296 }
297 if root_contents.is_empty() {
298 return Ok(None);
299 }
300 let (root_key, decl_opt) = first_xml.unwrap();
301 let mut content = serde_json::Map::new();
302 content.insert(segment_name.to_string(), Value::Array(root_contents));
303 let mut top = serde_json::Map::new();
304 if let Some(decl) = decl_opt {
305 top.insert("?xml".to_string(), decl);
306 } else {
307 let mut d = serde_json::Map::new();
308 d.insert("@version".to_string(), Value::String("1.0".to_string()));
309 d.insert("@encoding".to_string(), Value::String("UTF-8".to_string()));
310 top.insert("?xml".to_string(), Value::Object(d));
311 }
312 top.insert(root_key, Value::Object(content));
313 Ok(Some(Value::Object(top)))
314 }
315
316 fn is_parsable_file(&self, file_name: &str) -> bool {
317 let lower = file_name.to_lowercase();
318 lower.ends_with(".xml")
319 || lower.ends_with(".json")
320 || lower.ends_with(".json5")
321 || lower.ends_with(".yaml")
322 || lower.ends_with(".yml")
323 }
324
325 async fn validate_directory(
326 &self,
327 path: &str,
328 ) -> Result<bool, Box<dyn std::error::Error + Send + Sync>> {
329 let meta = fs::metadata(path).await?;
330 if !meta.is_dir() {
331 log::error!(
332 "The provided path to reassemble is not a directory: {}",
333 path
334 );
335 return Ok(false);
336 }
337 Ok(true)
338 }
339
340 fn get_output_path(&self, dir_path: &str, extension: Option<&str>) -> String {
341 let path = Path::new(dir_path);
342 let parent = path.parent().unwrap_or(Path::new("."));
343 let base_name = path
344 .file_name()
345 .and_then(|n| n.to_str())
346 .unwrap_or("output");
347 let ext = extension.unwrap_or("xml");
348 parent
349 .join(format!("{}.{}", base_name, ext))
350 .to_string_lossy()
351 .to_string()
352 }
353}
354
355impl Default for ReassembleXmlFileHandler {
356 fn default() -> Self {
357 Self::new()
358 }
359}
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364
365 #[test]
366 #[allow(clippy::default_constructed_unit_structs)]
367 fn reassemble_handler_default_equals_new() {
368 let _ = ReassembleXmlFileHandler::default();
369 }
370}