xml_disassembler/handlers/
reassemble.rs1use crate::builders::{build_xml_string, merge_xml_elements, reorder_root_keys};
4use crate::multi_level::{ensure_segment_files_structure, load_multi_level_config};
5use crate::parsers::parse_to_xml_object;
6use crate::types::XmlElement;
7use serde_json::{Map, Value};
8use std::future::Future;
9use std::path::Path;
10use std::pin::Pin;
11use tokio::fs;
12
13fn strip_xmlns_from_value(v: Value) -> Value {
15 let obj = match v.as_object() {
16 Some(o) => o,
17 None => return v,
18 };
19 let mut out = Map::new();
20 for (k, val) in obj {
21 if k != "@xmlns" {
22 out.insert(k.clone(), val.clone());
23 }
24 }
25 Value::Object(out)
26}
27
28type ProcessDirFuture<'a> = Pin<
29 Box<
30 dyn Future<Output = Result<Vec<XmlElement>, Box<dyn std::error::Error + Send + Sync>>>
31 + Send
32 + 'a,
33 >,
34>;
35
36pub struct ReassembleXmlFileHandler;
37
38impl ReassembleXmlFileHandler {
39 pub fn new() -> Self {
40 Self
41 }
42
43 pub async fn reassemble(
44 &self,
45 file_path: &str,
46 file_extension: Option<&str>,
47 post_purge: bool,
48 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
49 if !self.validate_directory(file_path).await? {
50 return Ok(());
51 }
52
53 let path = Path::new(file_path);
54 let config = load_multi_level_config(path).await;
55 if let Some(ref config) = config {
56 for rule in &config.rules {
57 if rule.path_segment.is_empty() {
58 continue;
59 }
60 let segment_path = path.join(&rule.path_segment);
61 if !segment_path.is_dir() {
62 continue;
63 }
64 let mut entries = Vec::new();
65 let mut read_dir = fs::read_dir(&segment_path).await?;
66 while let Some(entry) = read_dir.next_entry().await? {
67 entries.push(entry);
68 }
69 entries.sort_by_key(|e| e.file_name());
71 for entry in entries {
72 let process_path = entry.path();
73 if !process_path.is_dir() {
74 continue;
75 }
76 let process_path_str = process_path.to_string_lossy().to_string();
77 let mut sub_entries = Vec::new();
78 let mut sub_read = fs::read_dir(&process_path).await?;
79 while let Some(e) = sub_read.next_entry().await? {
80 sub_entries.push(e);
81 }
82 sub_entries.sort_by_key(|e| e.file_name());
84 for sub_entry in sub_entries {
85 let sub_path = sub_entry.path();
86 if sub_path.is_dir() {
87 let sub_path_str = sub_path.to_string_lossy().to_string();
88 self.reassemble_plain(&sub_path_str, Some("xml"), true, None)
89 .await?;
90 }
91 }
92 self.reassemble_plain(&process_path_str, Some("xml"), true, None)
93 .await?;
94 }
95 ensure_segment_files_structure(
96 &segment_path,
97 &rule.wrap_root_element,
98 &rule.path_segment,
99 &rule.wrap_xmlns,
100 )
101 .await?;
102 }
103 }
104
105 let base_segment = config.as_ref().and_then(|c| {
106 c.rules.first().map(|r| {
107 (
108 file_path.to_string(),
109 r.path_segment.clone(),
110 true, )
112 })
113 });
114 let post_purge_final = post_purge || config.is_some();
116 self.reassemble_plain(file_path, file_extension, post_purge_final, base_segment)
117 .await
118 }
119
120 async fn reassemble_plain(
125 &self,
126 file_path: &str,
127 file_extension: Option<&str>,
128 post_purge: bool,
129 base_segment: Option<(String, String, bool)>,
130 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
131 log::debug!("Parsing directory to reassemble: {}", file_path);
132 let parsed_objects = self
133 .process_files_in_directory(file_path.to_string(), base_segment)
134 .await?;
135
136 if parsed_objects.is_empty() {
137 log::error!(
138 "No files under {} were parsed successfully. A reassembled XML file was not created.",
139 file_path
140 );
141 return Ok(());
142 }
143
144 let mut merged = match merge_xml_elements(&parsed_objects) {
145 Some(m) => m,
146 None => return Ok(()),
147 };
148
149 let key_order_path = Path::new(file_path).join(".key_order.json");
151 if key_order_path.exists() {
152 if let Ok(bytes) = fs::read(&key_order_path).await {
153 if let Ok(key_order) = serde_json::from_slice::<Vec<String>>(&bytes) {
154 if let Some(reordered) = reorder_root_keys(&merged, &key_order) {
155 merged = reordered;
156 }
157 }
158 }
159 }
160
161 let final_xml = build_xml_string(&merged);
162 let output_path = self.get_output_path(file_path, file_extension);
163
164 fs::write(&output_path, final_xml).await?;
165
166 if post_purge {
167 fs::remove_dir_all(file_path).await.ok();
168 }
169
170 Ok(())
171 }
172
173 fn process_files_in_directory<'a>(
174 &'a self,
175 dir_path: String,
176 base_segment: Option<(String, String, bool)>,
177 ) -> ProcessDirFuture<'a> {
178 Box::pin(async move {
179 let mut parsed = Vec::new();
180 let mut entries = Vec::new();
181 let mut read_dir = fs::read_dir(&dir_path).await?;
182 while let Some(entry) = read_dir.next_entry().await? {
183 entries.push(entry);
184 }
185 entries.sort_by(|a, b| {
187 let a_name = a.file_name().to_string_lossy().to_string();
188 let b_name = b.file_name().to_string_lossy().to_string();
189 a_name.cmp(&b_name)
190 });
191
192 let is_base = base_segment
193 .as_ref()
194 .map(|(base, _, _)| dir_path == *base)
195 .unwrap_or(false);
196 let segment_name = base_segment.as_ref().map(|(_, name, _)| name.as_str());
197 let extract_inner = base_segment.as_ref().map(|(_, _, e)| *e).unwrap_or(false);
198
199 for entry in entries {
200 let path = entry.path();
201 let file_path = path.to_string_lossy().to_string();
202
203 if path.is_file() {
204 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
205 if !name.starts_with('.') && self.is_parsable_file(name) {
206 if let Some(parsed_obj) = parse_to_xml_object(&file_path).await {
207 parsed.push(parsed_obj);
208 }
209 }
210 } else if path.is_dir() {
211 let dir_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
212 if is_base && segment_name == Some(dir_name) {
213 let segment_element = self
214 .collect_segment_as_array(
215 &file_path,
216 segment_name.unwrap(),
217 extract_inner,
218 )
219 .await?;
220 if let Some(el) = segment_element {
221 parsed.push(el);
222 }
223 } else {
224 let sub_parsed = self
225 .process_files_in_directory(file_path, base_segment.clone())
226 .await?;
227 parsed.extend(sub_parsed);
228 }
229 }
230 }
231
232 Ok(parsed)
233 })
234 }
235
236 async fn collect_segment_as_array(
240 &self,
241 segment_dir: &str,
242 segment_name: &str,
243 extract_inner: bool,
244 ) -> Result<Option<XmlElement>, Box<dyn std::error::Error + Send + Sync>> {
245 let mut xml_files = Vec::new();
246 let mut read_dir = fs::read_dir(segment_dir).await?;
247 while let Some(entry) = read_dir.next_entry().await? {
248 let path = entry.path();
249 if path.is_file() {
250 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
251 if !name.starts_with('.') && self.is_parsable_file(name) {
252 xml_files.push(path.to_string_lossy().to_string());
253 }
254 }
255 }
256 xml_files.sort();
257
258 let mut root_contents = Vec::new();
259 let mut first_xml: Option<(String, Option<Value>)> = None;
260 for file_path in &xml_files {
261 let parsed = match parse_to_xml_object(file_path).await {
262 Some(p) => p,
263 None => continue,
264 };
265 let obj = match parsed.as_object() {
266 Some(o) => o,
267 None => continue,
268 };
269 let root_key = match obj.keys().find(|k| *k != "?xml").cloned() {
270 Some(k) => k,
271 None => continue,
272 };
273 let root_val = obj
274 .get(&root_key)
275 .cloned()
276 .unwrap_or(Value::Object(serde_json::Map::new()));
277 let mut content = if extract_inner {
278 root_val
279 .get(segment_name)
280 .cloned()
281 .unwrap_or_else(|| Value::Object(serde_json::Map::new()))
282 } else {
283 root_val
284 };
285 if extract_inner {
287 content = strip_xmlns_from_value(content);
288 }
289 root_contents.push(content);
290 if first_xml.is_none() {
291 first_xml = Some((root_key, obj.get("?xml").cloned()));
292 }
293 }
294 if root_contents.is_empty() {
295 return Ok(None);
296 }
297 let (root_key, decl_opt) = first_xml.unwrap();
298 let mut content = serde_json::Map::new();
299 content.insert(segment_name.to_string(), Value::Array(root_contents));
300 let mut top = serde_json::Map::new();
301 if let Some(decl) = decl_opt {
302 top.insert("?xml".to_string(), decl);
303 } else {
304 let mut d = serde_json::Map::new();
305 d.insert("@version".to_string(), Value::String("1.0".to_string()));
306 d.insert("@encoding".to_string(), Value::String("UTF-8".to_string()));
307 top.insert("?xml".to_string(), Value::Object(d));
308 }
309 top.insert(root_key, Value::Object(content));
310 Ok(Some(Value::Object(top)))
311 }
312
313 fn is_parsable_file(&self, file_name: &str) -> bool {
314 let lower = file_name.to_lowercase();
315 lower.ends_with(".xml")
316 || lower.ends_with(".json")
317 || lower.ends_with(".json5")
318 || lower.ends_with(".yaml")
319 || lower.ends_with(".yml")
320 }
321
322 async fn validate_directory(
323 &self,
324 path: &str,
325 ) -> Result<bool, Box<dyn std::error::Error + Send + Sync>> {
326 let meta = fs::metadata(path).await?;
327 if !meta.is_dir() {
328 log::error!(
329 "The provided path to reassemble is not a directory: {}",
330 path
331 );
332 return Ok(false);
333 }
334 Ok(true)
335 }
336
337 fn get_output_path(&self, dir_path: &str, extension: Option<&str>) -> String {
338 let path = Path::new(dir_path);
339 let parent = path.parent().unwrap_or(Path::new("."));
340 let base_name = path
341 .file_name()
342 .and_then(|n| n.to_str())
343 .unwrap_or("output");
344 let ext = extension.unwrap_or("xml");
345 parent
346 .join(format!("{}.{}", base_name, ext))
347 .to_string_lossy()
348 .to_string()
349 }
350}
351
352impl Default for ReassembleXmlFileHandler {
353 fn default() -> Self {
354 Self::new()
355 }
356}