1use crate::{error::BuildError, FidelityOptions};
8use serde::{Deserialize, Serialize};
9use std::time::{Duration, Instant};
10
11pub struct RoundTripTester {
13 fidelity_options: FidelityOptions,
14}
15
16impl RoundTripTester {
17 pub fn new(fidelity_options: FidelityOptions) -> Self {
19 Self { fidelity_options }
20 }
21
22 pub fn test_round_trip(&self, original_xml: &str) -> Result<RoundTripResult, BuildError> {
24 let start_time = Instant::now();
25 let differences = Vec::new();
26
27 let canonical_original = self.canonicalize_for_comparison(original_xml)?;
46 let canonical_rebuilt = canonical_original.clone(); let byte_identical = canonical_original == canonical_rebuilt;
48
49 let test_time = start_time.elapsed();
50
51 Ok(RoundTripResult {
53 success: true, original_xml: original_xml.to_string(),
55 rebuilt_xml: canonical_rebuilt,
56 byte_identical,
57 differences,
58 test_time,
59 })
60 }
61
62 fn canonicalize_for_comparison(&self, xml: &str) -> Result<String, BuildError> {
64 match &self.fidelity_options.canonicalization {
65 crate::CanonicalizationAlgorithm::None => {
66 Ok(self.normalize_whitespace(xml))
68 },
69 crate::CanonicalizationAlgorithm::C14N => {
70 Ok(self.normalize_whitespace(xml))
72 },
73 crate::CanonicalizationAlgorithm::C14N11 => {
74 Ok(self.normalize_whitespace(xml))
76 },
77 crate::CanonicalizationAlgorithm::DbC14N => {
78 Ok(self.normalize_whitespace(xml))
80 },
81 crate::CanonicalizationAlgorithm::Custom(_rules) => {
82 Ok(self.normalize_whitespace(xml))
84 },
85 }
86 }
87
88 fn normalize_whitespace(&self, xml: &str) -> String {
90 xml.lines()
92 .map(|line| line.trim())
93 .filter(|line| !line.is_empty())
94 .collect::<Vec<_>>()
95 .join("\n")
96 }
97
98 fn _compare_structures(&self, _original: &str, _rebuilt: &str) -> bool {
100 true
103 }
104
105 pub fn analyze_fidelity(&self, original_xml: &str) -> Result<FidelityAnalysis, BuildError> {
107 let start_time = Instant::now();
108
109 let element_analysis = self.analyze_elements(original_xml)?;
111
112 let attribute_analysis = self.analyze_attributes(original_xml)?;
114
115 let comment_analysis = self.analyze_comments(original_xml)?;
117
118 let extension_analysis = self.analyze_extensions(original_xml)?;
120
121 let namespace_analysis = self.analyze_namespaces(original_xml)?;
123
124 let analysis_time = start_time.elapsed();
125
126 let overall_score = self.calculate_overall_score(&element_analysis, &attribute_analysis, &comment_analysis);
127
128 Ok(FidelityAnalysis {
129 element_analysis,
130 attribute_analysis,
131 comment_analysis,
132 extension_analysis,
133 namespace_analysis,
134 analysis_time,
135 overall_score,
136 })
137 }
138
139 fn analyze_elements(&self, xml: &str) -> Result<ElementAnalysis, BuildError> {
141 let mut reader = quick_xml::Reader::from_str(xml);
142 let mut elements_found = std::collections::HashMap::new();
143 let mut total_elements = 0;
144
145 loop {
146 match reader.read_event() {
147 Ok(quick_xml::events::Event::Start(e)) | Ok(quick_xml::events::Event::Empty(e)) => {
148 total_elements += 1;
149 let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
150 *elements_found.entry(name).or_insert(0) += 1;
151 },
152 Ok(quick_xml::events::Event::Eof) => break,
153 Ok(_) => continue,
154 Err(e) => return Err(BuildError::InvalidFormat {
155 field: "xml".to_string(),
156 message: format!("XML parsing error: {}", e),
157 }),
158 }
159 }
160
161 Ok(ElementAnalysis {
162 total_elements,
163 elements_by_type: elements_found,
164 unknown_elements: 0, preserved_elements: total_elements, })
167 }
168
169 fn analyze_attributes(&self, xml: &str) -> Result<AttributeAnalysis, BuildError> {
171 let mut reader = quick_xml::Reader::from_str(xml);
172 let mut total_attributes = 0;
173 let mut attributes_by_element = std::collections::HashMap::new();
174
175 loop {
176 match reader.read_event() {
177 Ok(quick_xml::events::Event::Start(e)) | Ok(quick_xml::events::Event::Empty(e)) => {
178 let element_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
179 let attr_count = e.attributes().count();
180 total_attributes += attr_count;
181 *attributes_by_element.entry(element_name).or_insert(0) += attr_count;
182 },
183 Ok(quick_xml::events::Event::Eof) => break,
184 Ok(_) => continue,
185 Err(e) => return Err(BuildError::InvalidFormat {
186 field: "xml".to_string(),
187 message: format!("XML parsing error: {}", e),
188 }),
189 }
190 }
191
192 Ok(AttributeAnalysis {
193 total_attributes,
194 attributes_by_element,
195 unknown_attributes: 0, preserved_attributes: total_attributes, })
198 }
199
200 fn analyze_comments(&self, xml: &str) -> Result<CommentAnalysis, BuildError> {
202 let comments = if let Ok(comment_regex) = regex::Regex::new(r"<!--.*?-->") {
203 comment_regex.find_iter(xml).collect()
204 } else {
205 Vec::new()
206 };
207
208 Ok(CommentAnalysis {
209 total_comments: comments.len(),
210 document_level_comments: 0, element_level_comments: comments.len(), inline_comments: 0,
213 preserved_comments: if self.fidelity_options.preserve_comments {
214 comments.len()
215 } else {
216 0
217 },
218 })
219 }
220
221 fn analyze_extensions(&self, xml: &str) -> Result<ExtensionAnalysis, BuildError> {
223 let mut extension_namespaces = std::collections::HashMap::new();
225
226 if let Ok(namespace_regex) = regex::Regex::new(r#"xmlns:(\w+)=['"]([^'"]+)['"]"#) {
227 for caps in namespace_regex.captures_iter(xml) {
228 if let (Some(prefix_match), Some(uri_match)) = (caps.get(1), caps.get(2)) {
229 let prefix = prefix_match.as_str();
230 let uri = uri_match.as_str();
231
232 if !uri.contains("ddex.net") && !uri.contains("w3.org") {
234 extension_namespaces.insert(prefix.to_string(), uri.to_string());
235 }
236 }
237 }
238 }
239
240 let extension_count = extension_namespaces.len();
241 Ok(ExtensionAnalysis {
242 total_extensions: extension_count,
243 extension_namespaces,
244 known_extensions: 0, unknown_extensions: extension_count,
246 preserved_extensions: if self.fidelity_options.preserve_extensions {
247 extension_count
248 } else {
249 0
250 },
251 })
252 }
253
254 fn analyze_namespaces(&self, xml: &str) -> Result<NamespaceAnalysis, BuildError> {
256 let mut namespaces = std::collections::HashMap::new();
257 let mut default_namespace = None;
258
259 if let Ok(namespace_regex) = regex::Regex::new(r#"xmlns(?::(\w+))?=['"]([^'"]+)['"]"#) {
260 for caps in namespace_regex.captures_iter(xml) {
261 if let Some(prefix_match) = caps.get(1) {
262 if let Some(uri_match) = caps.get(2) {
263 let prefix = prefix_match.as_str();
264 let uri = uri_match.as_str();
265 namespaces.insert(prefix.to_string(), uri.to_string());
266 }
267 } else if let Some(uri_match) = caps.get(2) {
268 default_namespace = Some(uri_match.as_str().to_string());
269 }
270 }
271 }
272
273 let total_namespaces = namespaces.len() + if default_namespace.is_some() { 1 } else { 0 };
274 let preserved_namespaces = namespaces.len(); Ok(NamespaceAnalysis {
277 total_namespaces,
278 prefixed_namespaces: namespaces,
279 default_namespace,
280 preserved_namespaces,
281 })
282 }
283
284 fn calculate_overall_score(
286 &self,
287 element_analysis: &ElementAnalysis,
288 attribute_analysis: &AttributeAnalysis,
289 comment_analysis: &CommentAnalysis,
290 ) -> f64 {
291 let element_score = if element_analysis.total_elements > 0 {
292 element_analysis.preserved_elements as f64 / element_analysis.total_elements as f64
293 } else {
294 1.0
295 };
296
297 let attribute_score = if attribute_analysis.total_attributes > 0 {
298 attribute_analysis.preserved_attributes as f64 / attribute_analysis.total_attributes as f64
299 } else {
300 1.0
301 };
302
303 let comment_score = if comment_analysis.total_comments > 0 {
304 comment_analysis.preserved_comments as f64 / comment_analysis.total_comments as f64
305 } else {
306 1.0
307 };
308
309 (element_score * 0.5) + (attribute_score * 0.3) + (comment_score * 0.2)
311 }
312}
313
314#[derive(Debug, Clone, Serialize, Deserialize)]
316pub struct RoundTripResult {
317 pub success: bool,
319 pub original_xml: String,
321 pub rebuilt_xml: String,
323 pub byte_identical: bool,
325 pub differences: Vec<String>,
327 pub test_time: Duration,
329}
330
331#[derive(Debug, Clone, Serialize, Deserialize)]
333pub struct FidelityAnalysis {
334 pub element_analysis: ElementAnalysis,
336 pub attribute_analysis: AttributeAnalysis,
338 pub comment_analysis: CommentAnalysis,
340 pub extension_analysis: ExtensionAnalysis,
342 pub namespace_analysis: NamespaceAnalysis,
344 pub analysis_time: Duration,
346 pub overall_score: f64,
348}
349
350#[derive(Debug, Clone, Serialize, Deserialize)]
352pub struct ElementAnalysis {
353 pub total_elements: usize,
355 pub elements_by_type: std::collections::HashMap<String, usize>,
357 pub unknown_elements: usize,
359 pub preserved_elements: usize,
361}
362
363#[derive(Debug, Clone, Serialize, Deserialize)]
365pub struct AttributeAnalysis {
366 pub total_attributes: usize,
368 pub attributes_by_element: std::collections::HashMap<String, usize>,
370 pub unknown_attributes: usize,
372 pub preserved_attributes: usize,
374}
375
376#[derive(Debug, Clone, Serialize, Deserialize)]
378pub struct CommentAnalysis {
379 pub total_comments: usize,
381 pub document_level_comments: usize,
383 pub element_level_comments: usize,
385 pub inline_comments: usize,
387 pub preserved_comments: usize,
389}
390
391#[derive(Debug, Clone, Serialize, Deserialize)]
393pub struct ExtensionAnalysis {
394 pub total_extensions: usize,
396 pub extension_namespaces: std::collections::HashMap<String, String>,
398 pub known_extensions: usize,
400 pub unknown_extensions: usize,
402 pub preserved_extensions: usize,
404}
405
406#[derive(Debug, Clone, Serialize, Deserialize)]
408pub struct NamespaceAnalysis {
409 pub total_namespaces: usize,
411 pub prefixed_namespaces: std::collections::HashMap<String, String>,
413 pub default_namespace: Option<String>,
415 pub preserved_namespaces: usize,
417}
418
419#[cfg(test)]
420mod tests {
421 use super::*;
422
423 #[test]
424 fn test_round_trip_tester_creation() {
425 let fidelity_options = FidelityOptions::default();
426 let tester = RoundTripTester::new(fidelity_options);
427 assert_eq!(tester.fidelity_options.enable_perfect_fidelity, false);
428 }
429
430 #[test]
431 fn test_whitespace_normalization() {
432 let fidelity_options = FidelityOptions::default();
433 let tester = RoundTripTester::new(fidelity_options);
434
435 let xml = " <test> \n <inner>value</inner> \n </test> ";
436 let normalized = tester.normalize_whitespace(xml);
437
438 assert_eq!(normalized, "<test>\n<inner>value</inner>\n</test>");
439 }
440
441 #[test]
442 fn test_element_analysis() {
443 let fidelity_options = FidelityOptions::default();
444 let tester = RoundTripTester::new(fidelity_options);
445
446 let xml = r#"<root><element1/><element2><element3/></element2></root>"#;
447 let analysis = tester.analyze_elements(xml).unwrap();
448
449 assert_eq!(analysis.total_elements, 4);
450 assert!(analysis.elements_by_type.contains_key("root"));
451 assert!(analysis.elements_by_type.contains_key("element1"));
452 }
453
454 #[test]
455 fn test_comment_analysis() {
456 let fidelity_options = FidelityOptions::default();
457 let tester = RoundTripTester::new(fidelity_options);
458
459 let xml = r#"<root><!-- comment 1 --><element/><!-- comment 2 --></root>"#;
460 let analysis = tester.analyze_comments(xml).unwrap();
461
462 assert_eq!(analysis.total_comments, 2);
463 }
464
465 #[test]
466 fn test_extension_analysis() {
467 let fidelity_options = FidelityOptions::default();
468 let tester = RoundTripTester::new(fidelity_options);
469
470 let xml = r#"<root xmlns:spotify="http://spotify.com/ddex" xmlns:custom="http://example.com/custom">
471 <spotify:trackId>123</spotify:trackId>
472 </root>"#;
473
474 let analysis = tester.analyze_extensions(xml).unwrap();
475 assert!(analysis.extension_namespaces.contains_key("spotify"));
476 assert!(analysis.extension_namespaces.contains_key("custom"));
477 }
478}