Skip to main content

xml_disassembler/handlers/
reassemble.rs

1//! Reassemble XML from disassembled directory.
2
3use crate::builders::{build_xml_string, merge_xml_elements, reorder_root_keys};
4use crate::parsers::parse_to_xml_object;
5use crate::types::XmlElement;
6use std::future::Future;
7use std::path::Path;
8use std::pin::Pin;
9use tokio::fs;
10
11type ProcessDirFuture<'a> = Pin<
12    Box<
13        dyn Future<Output = Result<Vec<XmlElement>, Box<dyn std::error::Error + Send + Sync>>>
14            + Send
15            + 'a,
16    >,
17>;
18
19pub struct ReassembleXmlFileHandler;
20
21impl ReassembleXmlFileHandler {
22    pub fn new() -> Self {
23        Self
24    }
25
26    pub async fn reassemble(
27        &self,
28        file_path: &str,
29        file_extension: Option<&str>,
30        post_purge: bool,
31    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
32        if !self.validate_directory(file_path).await? {
33            return Ok(());
34        }
35
36        log::debug!("Parsing directory to reassemble: {}", file_path);
37        let parsed_objects = self
38            .process_files_in_directory(file_path.to_string())
39            .await?;
40
41        if parsed_objects.is_empty() {
42            log::error!(
43                "No files under {} were parsed successfully. A reassembled XML file was not created.",
44                file_path
45            );
46            return Ok(());
47        }
48
49        let mut merged = match merge_xml_elements(&parsed_objects) {
50            Some(m) => m,
51            None => return Ok(()),
52        };
53
54        // Apply stored key order so reassembled XML matches original document order.
55        let key_order_path = Path::new(file_path).join(".key_order.json");
56        if key_order_path.exists() {
57            if let Ok(bytes) = fs::read(&key_order_path).await {
58                if let Ok(key_order) = serde_json::from_slice::<Vec<String>>(&bytes) {
59                    if let Some(reordered) = reorder_root_keys(&merged, &key_order) {
60                        merged = reordered;
61                    }
62                }
63            }
64        }
65
66        let final_xml = build_xml_string(&merged);
67        let output_path = self.get_output_path(file_path, file_extension);
68
69        fs::write(&output_path, final_xml).await?;
70
71        if post_purge {
72            fs::remove_dir_all(file_path).await.ok();
73        }
74
75        Ok(())
76    }
77
78    fn process_files_in_directory<'a>(&'a self, dir_path: String) -> ProcessDirFuture<'a> {
79        Box::pin(async move {
80            let mut parsed = Vec::new();
81            let mut entries = Vec::new();
82            let mut read_dir = fs::read_dir(&dir_path).await?;
83            while let Some(entry) = read_dir.next_entry().await? {
84                entries.push(entry);
85            }
86            entries.sort_by(|a, b| {
87                let a_base: String = a
88                    .file_name()
89                    .to_str()
90                    .unwrap_or("")
91                    .split('.')
92                    .next()
93                    .unwrap_or("")
94                    .to_string();
95                let b_base: String = b
96                    .file_name()
97                    .to_str()
98                    .unwrap_or("")
99                    .split('.')
100                    .next()
101                    .unwrap_or("")
102                    .to_string();
103                a_base.cmp(&b_base)
104            });
105
106            for entry in entries {
107                let path = entry.path();
108                let file_path = path.to_string_lossy().to_string();
109
110                if path.is_file() {
111                    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
112                    if !name.starts_with('.') && self.is_parsable_file(name) {
113                        if let Some(parsed_obj) = parse_to_xml_object(&file_path).await {
114                            parsed.push(parsed_obj);
115                        }
116                    }
117                } else if path.is_dir() {
118                    let sub_parsed = self.process_files_in_directory(file_path).await?;
119                    parsed.extend(sub_parsed);
120                }
121            }
122
123            Ok(parsed)
124        })
125    }
126
127    fn is_parsable_file(&self, file_name: &str) -> bool {
128        let lower = file_name.to_lowercase();
129        lower.ends_with(".xml")
130            || lower.ends_with(".json")
131            || lower.ends_with(".json5")
132            || lower.ends_with(".yaml")
133            || lower.ends_with(".yml")
134            || lower.ends_with(".toml")
135            || lower.ends_with(".ini")
136    }
137
138    async fn validate_directory(
139        &self,
140        path: &str,
141    ) -> Result<bool, Box<dyn std::error::Error + Send + Sync>> {
142        let meta = fs::metadata(path).await?;
143        if !meta.is_dir() {
144            log::error!(
145                "The provided path to reassemble is not a directory: {}",
146                path
147            );
148            return Ok(false);
149        }
150        Ok(true)
151    }
152
153    fn get_output_path(&self, dir_path: &str, extension: Option<&str>) -> String {
154        let path = Path::new(dir_path);
155        let parent = path.parent().unwrap_or(Path::new("."));
156        let base_name = path
157            .file_name()
158            .and_then(|n| n.to_str())
159            .unwrap_or("output");
160        let ext = extension.unwrap_or("xml");
161        parent
162            .join(format!("{}.{}", base_name, ext))
163            .to_string_lossy()
164            .to_string()
165    }
166}
167
168impl Default for ReassembleXmlFileHandler {
169    fn default() -> Self {
170        Self::new()
171    }
172}