xml_disassembler/handlers/
reassemble.rs1use crate::builders::{build_xml_string, merge_xml_elements, reorder_root_keys};
4use crate::multi_level::{ensure_segment_files_structure, load_multi_level_config};
5use crate::parsers::parse_to_xml_object;
6use crate::types::XmlElement;
7use serde_json::{Map, Value};
8use std::future::Future;
9use std::path::Path;
10use std::pin::Pin;
11use tokio::fs;
12
13fn strip_xmlns_from_value(v: Value) -> Value {
15 let obj = match v.as_object() {
16 Some(o) => o,
17 None => return v,
18 };
19 let mut out = Map::new();
20 for (k, val) in obj {
21 if k != "@xmlns" {
22 out.insert(k.clone(), val.clone());
23 }
24 }
25 Value::Object(out)
26}
27
28type ProcessDirFuture<'a> = Pin<
29 Box<
30 dyn Future<Output = Result<Vec<XmlElement>, Box<dyn std::error::Error + Send + Sync>>>
31 + Send
32 + 'a,
33 >,
34>;
35
36pub struct ReassembleXmlFileHandler;
37
38impl ReassembleXmlFileHandler {
39 pub fn new() -> Self {
40 Self
41 }
42
43 pub async fn reassemble(
44 &self,
45 file_path: &str,
46 file_extension: Option<&str>,
47 post_purge: bool,
48 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
49 if !self.validate_directory(file_path).await? {
50 return Ok(());
51 }
52
53 let path = Path::new(file_path);
54 let config = load_multi_level_config(path).await;
55 if let Some(ref config) = config {
56 for rule in &config.rules {
57 if rule.path_segment.is_empty() {
58 continue;
59 }
60 let segment_path = path.join(&rule.path_segment);
61 if !segment_path.is_dir() {
62 continue;
63 }
64 let mut entries = Vec::new();
65 let mut read_dir = fs::read_dir(&segment_path).await?;
66 while let Some(entry) = read_dir.next_entry().await? {
67 entries.push(entry);
68 }
69 for entry in entries {
70 let process_path = entry.path();
71 if !process_path.is_dir() {
72 continue;
73 }
74 let process_path_str = process_path.to_string_lossy().to_string();
75 let mut sub_entries = Vec::new();
76 let mut sub_read = fs::read_dir(&process_path).await?;
77 while let Some(e) = sub_read.next_entry().await? {
78 sub_entries.push(e);
79 }
80 for sub_entry in sub_entries {
81 let sub_path = sub_entry.path();
82 if sub_path.is_dir() {
83 let sub_path_str = sub_path.to_string_lossy().to_string();
84 self.reassemble_plain(&sub_path_str, Some("xml"), true, None)
85 .await?;
86 }
87 }
88 self.reassemble_plain(&process_path_str, Some("xml"), true, None)
89 .await?;
90 }
91 ensure_segment_files_structure(
92 &segment_path,
93 &rule.wrap_root_element,
94 &rule.path_segment,
95 &rule.wrap_xmlns,
96 )
97 .await?;
98 }
99 }
100
101 let base_segment = config.as_ref().and_then(|c| {
102 c.rules.first().map(|r| {
103 (
104 file_path.to_string(),
105 r.path_segment.clone(),
106 true, )
108 })
109 });
110 let post_purge_final = post_purge || config.is_some();
112 self.reassemble_plain(file_path, file_extension, post_purge_final, base_segment)
113 .await
114 }
115
116 async fn reassemble_plain(
121 &self,
122 file_path: &str,
123 file_extension: Option<&str>,
124 post_purge: bool,
125 base_segment: Option<(String, String, bool)>,
126 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
127 log::debug!("Parsing directory to reassemble: {}", file_path);
128 let parsed_objects = self
129 .process_files_in_directory(file_path.to_string(), base_segment)
130 .await?;
131
132 if parsed_objects.is_empty() {
133 log::error!(
134 "No files under {} were parsed successfully. A reassembled XML file was not created.",
135 file_path
136 );
137 return Ok(());
138 }
139
140 let mut merged = match merge_xml_elements(&parsed_objects) {
141 Some(m) => m,
142 None => return Ok(()),
143 };
144
145 let key_order_path = Path::new(file_path).join(".key_order.json");
147 if key_order_path.exists() {
148 if let Ok(bytes) = fs::read(&key_order_path).await {
149 if let Ok(key_order) = serde_json::from_slice::<Vec<String>>(&bytes) {
150 if let Some(reordered) = reorder_root_keys(&merged, &key_order) {
151 merged = reordered;
152 }
153 }
154 }
155 }
156
157 let final_xml = build_xml_string(&merged);
158 let output_path = self.get_output_path(file_path, file_extension);
159
160 fs::write(&output_path, final_xml).await?;
161
162 if post_purge {
163 fs::remove_dir_all(file_path).await.ok();
164 }
165
166 Ok(())
167 }
168
169 fn process_files_in_directory<'a>(
170 &'a self,
171 dir_path: String,
172 base_segment: Option<(String, String, bool)>,
173 ) -> ProcessDirFuture<'a> {
174 Box::pin(async move {
175 let mut parsed = Vec::new();
176 let mut entries = Vec::new();
177 let mut read_dir = fs::read_dir(&dir_path).await?;
178 while let Some(entry) = read_dir.next_entry().await? {
179 entries.push(entry);
180 }
181 entries.sort_by(|a, b| {
182 let a_base: String = a
183 .file_name()
184 .to_str()
185 .unwrap_or("")
186 .split('.')
187 .next()
188 .unwrap_or("")
189 .to_string();
190 let b_base: String = b
191 .file_name()
192 .to_str()
193 .unwrap_or("")
194 .split('.')
195 .next()
196 .unwrap_or("")
197 .to_string();
198 a_base.cmp(&b_base)
199 });
200
201 let is_base = base_segment
202 .as_ref()
203 .map(|(base, _, _)| dir_path == *base)
204 .unwrap_or(false);
205 let segment_name = base_segment.as_ref().map(|(_, name, _)| name.as_str());
206 let extract_inner = base_segment.as_ref().map(|(_, _, e)| *e).unwrap_or(false);
207
208 for entry in entries {
209 let path = entry.path();
210 let file_path = path.to_string_lossy().to_string();
211
212 if path.is_file() {
213 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
214 if !name.starts_with('.') && self.is_parsable_file(name) {
215 if let Some(parsed_obj) = parse_to_xml_object(&file_path).await {
216 parsed.push(parsed_obj);
217 }
218 }
219 } else if path.is_dir() {
220 let dir_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
221 if is_base && segment_name == Some(dir_name) {
222 let segment_element = self
223 .collect_segment_as_array(
224 &file_path,
225 segment_name.unwrap(),
226 extract_inner,
227 )
228 .await?;
229 if let Some(el) = segment_element {
230 parsed.push(el);
231 }
232 } else {
233 let sub_parsed = self
234 .process_files_in_directory(file_path, base_segment.clone())
235 .await?;
236 parsed.extend(sub_parsed);
237 }
238 }
239 }
240
241 Ok(parsed)
242 })
243 }
244
245 async fn collect_segment_as_array(
249 &self,
250 segment_dir: &str,
251 segment_name: &str,
252 extract_inner: bool,
253 ) -> Result<Option<XmlElement>, Box<dyn std::error::Error + Send + Sync>> {
254 let mut xml_files = Vec::new();
255 let mut read_dir = fs::read_dir(segment_dir).await?;
256 while let Some(entry) = read_dir.next_entry().await? {
257 let path = entry.path();
258 if path.is_file() {
259 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
260 if !name.starts_with('.') && self.is_parsable_file(name) {
261 xml_files.push(path.to_string_lossy().to_string());
262 }
263 }
264 }
265 xml_files.sort();
266
267 let mut root_contents = Vec::new();
268 let mut first_xml: Option<(String, Option<Value>)> = None;
269 for file_path in &xml_files {
270 let parsed = match parse_to_xml_object(file_path).await {
271 Some(p) => p,
272 None => continue,
273 };
274 let obj = match parsed.as_object() {
275 Some(o) => o,
276 None => continue,
277 };
278 let root_key = match obj.keys().find(|k| *k != "?xml").cloned() {
279 Some(k) => k,
280 None => continue,
281 };
282 let root_val = obj
283 .get(&root_key)
284 .cloned()
285 .unwrap_or(Value::Object(serde_json::Map::new()));
286 let mut content = if extract_inner {
287 root_val
288 .get(segment_name)
289 .cloned()
290 .unwrap_or_else(|| Value::Object(serde_json::Map::new()))
291 } else {
292 root_val
293 };
294 if extract_inner {
296 content = strip_xmlns_from_value(content);
297 }
298 root_contents.push(content);
299 if first_xml.is_none() {
300 first_xml = Some((root_key, obj.get("?xml").cloned()));
301 }
302 }
303 if root_contents.is_empty() {
304 return Ok(None);
305 }
306 let (root_key, decl_opt) = first_xml.unwrap();
307 let mut content = serde_json::Map::new();
308 content.insert(segment_name.to_string(), Value::Array(root_contents));
309 let mut top = serde_json::Map::new();
310 if let Some(decl) = decl_opt {
311 top.insert("?xml".to_string(), decl);
312 } else {
313 let mut d = serde_json::Map::new();
314 d.insert("@version".to_string(), Value::String("1.0".to_string()));
315 d.insert("@encoding".to_string(), Value::String("UTF-8".to_string()));
316 top.insert("?xml".to_string(), Value::Object(d));
317 }
318 top.insert(root_key, Value::Object(content));
319 Ok(Some(Value::Object(top)))
320 }
321
322 fn is_parsable_file(&self, file_name: &str) -> bool {
323 let lower = file_name.to_lowercase();
324 lower.ends_with(".xml")
325 || lower.ends_with(".json")
326 || lower.ends_with(".json5")
327 || lower.ends_with(".yaml")
328 || lower.ends_with(".yml")
329 }
330
331 async fn validate_directory(
332 &self,
333 path: &str,
334 ) -> Result<bool, Box<dyn std::error::Error + Send + Sync>> {
335 let meta = fs::metadata(path).await?;
336 if !meta.is_dir() {
337 log::error!(
338 "The provided path to reassemble is not a directory: {}",
339 path
340 );
341 return Ok(false);
342 }
343 Ok(true)
344 }
345
346 fn get_output_path(&self, dir_path: &str, extension: Option<&str>) -> String {
347 let path = Path::new(dir_path);
348 let parent = path.parent().unwrap_or(Path::new("."));
349 let base_name = path
350 .file_name()
351 .and_then(|n| n.to_str())
352 .unwrap_or("output");
353 let ext = extension.unwrap_or("xml");
354 parent
355 .join(format!("{}.{}", base_name, ext))
356 .to_string_lossy()
357 .to_string()
358 }
359}
360
361impl Default for ReassembleXmlFileHandler {
362 fn default() -> Self {
363 Self::new()
364 }
365}