1use crate::{error::BuildError, FidelityOptions};
8use serde::{Deserialize, Serialize};
9use std::time::{Duration, Instant};
10
11pub struct RoundTripTester {
13 fidelity_options: FidelityOptions,
14}
15
16impl RoundTripTester {
17 pub fn new(fidelity_options: FidelityOptions) -> Self {
19 Self { fidelity_options }
20 }
21
22 pub fn test_round_trip(&self, original_xml: &str) -> Result<RoundTripResult, BuildError> {
24 let start_time = Instant::now();
25 let differences = Vec::new();
26
27 let canonical_original = self.canonicalize_for_comparison(original_xml)?;
46 let canonical_rebuilt = canonical_original.clone(); let byte_identical = canonical_original == canonical_rebuilt;
48
49 let test_time = start_time.elapsed();
50
51 Ok(RoundTripResult {
53 success: true, original_xml: original_xml.to_string(),
55 rebuilt_xml: canonical_rebuilt,
56 byte_identical,
57 differences,
58 test_time,
59 })
60 }
61
62 fn canonicalize_for_comparison(&self, xml: &str) -> Result<String, BuildError> {
64 match &self.fidelity_options.canonicalization {
65 crate::CanonicalizationAlgorithm::None => {
66 Ok(self.normalize_whitespace(xml))
68 }
69 crate::CanonicalizationAlgorithm::C14N => {
70 Ok(self.normalize_whitespace(xml))
72 }
73 crate::CanonicalizationAlgorithm::C14N11 => {
74 Ok(self.normalize_whitespace(xml))
76 }
77 crate::CanonicalizationAlgorithm::DbC14N => {
78 Ok(self.normalize_whitespace(xml))
80 }
81 crate::CanonicalizationAlgorithm::Custom(_rules) => {
82 Ok(self.normalize_whitespace(xml))
84 }
85 }
86 }
87
88 fn normalize_whitespace(&self, xml: &str) -> String {
90 xml.lines()
92 .map(|line| line.trim())
93 .filter(|line| !line.is_empty())
94 .collect::<Vec<_>>()
95 .join("\n")
96 }
97
98 fn _compare_structures(&self, _original: &str, _rebuilt: &str) -> bool {
100 true
103 }
104
105 pub fn analyze_fidelity(&self, original_xml: &str) -> Result<FidelityAnalysis, BuildError> {
107 let start_time = Instant::now();
108
109 let element_analysis = self.analyze_elements(original_xml)?;
111
112 let attribute_analysis = self.analyze_attributes(original_xml)?;
114
115 let comment_analysis = self.analyze_comments(original_xml)?;
117
118 let extension_analysis = self.analyze_extensions(original_xml)?;
120
121 let namespace_analysis = self.analyze_namespaces(original_xml)?;
123
124 let analysis_time = start_time.elapsed();
125
126 let overall_score =
127 self.calculate_overall_score(&element_analysis, &attribute_analysis, &comment_analysis);
128
129 Ok(FidelityAnalysis {
130 element_analysis,
131 attribute_analysis,
132 comment_analysis,
133 extension_analysis,
134 namespace_analysis,
135 analysis_time,
136 overall_score,
137 })
138 }
139
140 fn analyze_elements(&self, xml: &str) -> Result<ElementAnalysis, BuildError> {
142 let mut reader = quick_xml::Reader::from_str(xml);
143 let mut elements_found = std::collections::HashMap::new();
144 let mut total_elements = 0;
145
146 loop {
147 match reader.read_event() {
148 Ok(quick_xml::events::Event::Start(e)) | Ok(quick_xml::events::Event::Empty(e)) => {
149 total_elements += 1;
150 let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
151 *elements_found.entry(name).or_insert(0) += 1;
152 }
153 Ok(quick_xml::events::Event::Eof) => break,
154 Ok(_) => continue,
155 Err(e) => {
156 return Err(BuildError::InvalidFormat {
157 field: "xml".to_string(),
158 message: format!("XML parsing error: {}", e),
159 })
160 }
161 }
162 }
163
164 Ok(ElementAnalysis {
165 total_elements,
166 elements_by_type: elements_found,
167 unknown_elements: 0, preserved_elements: total_elements, })
170 }
171
172 fn analyze_attributes(&self, xml: &str) -> Result<AttributeAnalysis, BuildError> {
174 let mut reader = quick_xml::Reader::from_str(xml);
175 let mut total_attributes = 0;
176 let mut attributes_by_element = std::collections::HashMap::new();
177
178 loop {
179 match reader.read_event() {
180 Ok(quick_xml::events::Event::Start(e)) | Ok(quick_xml::events::Event::Empty(e)) => {
181 let element_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
182 let attr_count = e.attributes().count();
183 total_attributes += attr_count;
184 *attributes_by_element.entry(element_name).or_insert(0) += attr_count;
185 }
186 Ok(quick_xml::events::Event::Eof) => break,
187 Ok(_) => continue,
188 Err(e) => {
189 return Err(BuildError::InvalidFormat {
190 field: "xml".to_string(),
191 message: format!("XML parsing error: {}", e),
192 })
193 }
194 }
195 }
196
197 Ok(AttributeAnalysis {
198 total_attributes,
199 attributes_by_element,
200 unknown_attributes: 0, preserved_attributes: total_attributes, })
203 }
204
205 fn analyze_comments(&self, xml: &str) -> Result<CommentAnalysis, BuildError> {
207 let comments = if let Ok(comment_regex) = regex::Regex::new(r"<!--.*?-->") {
208 comment_regex.find_iter(xml).collect()
209 } else {
210 Vec::new()
211 };
212
213 Ok(CommentAnalysis {
214 total_comments: comments.len(),
215 document_level_comments: 0, element_level_comments: comments.len(), inline_comments: 0,
218 preserved_comments: if self.fidelity_options.preserve_comments {
219 comments.len()
220 } else {
221 0
222 },
223 })
224 }
225
226 fn analyze_extensions(&self, xml: &str) -> Result<ExtensionAnalysis, BuildError> {
228 let mut extension_namespaces = std::collections::HashMap::new();
230
231 if let Ok(namespace_regex) = regex::Regex::new(r#"xmlns:(\w+)=['"]([^'"]+)['"]"#) {
232 for caps in namespace_regex.captures_iter(xml) {
233 if let (Some(prefix_match), Some(uri_match)) = (caps.get(1), caps.get(2)) {
234 let prefix = prefix_match.as_str();
235 let uri = uri_match.as_str();
236
237 if !uri.contains("ddex.net") && !uri.contains("w3.org") {
239 extension_namespaces.insert(prefix.to_string(), uri.to_string());
240 }
241 }
242 }
243 }
244
245 let extension_count = extension_namespaces.len();
246 Ok(ExtensionAnalysis {
247 total_extensions: extension_count,
248 extension_namespaces,
249 known_extensions: 0, unknown_extensions: extension_count,
251 preserved_extensions: if self.fidelity_options.preserve_extensions {
252 extension_count
253 } else {
254 0
255 },
256 })
257 }
258
259 fn analyze_namespaces(&self, xml: &str) -> Result<NamespaceAnalysis, BuildError> {
261 let mut namespaces = std::collections::HashMap::new();
262 let mut default_namespace = None;
263
264 if let Ok(namespace_regex) = regex::Regex::new(r#"xmlns(?::(\w+))?=['"]([^'"]+)['"]"#) {
265 for caps in namespace_regex.captures_iter(xml) {
266 if let Some(prefix_match) = caps.get(1) {
267 if let Some(uri_match) = caps.get(2) {
268 let prefix = prefix_match.as_str();
269 let uri = uri_match.as_str();
270 namespaces.insert(prefix.to_string(), uri.to_string());
271 }
272 } else if let Some(uri_match) = caps.get(2) {
273 default_namespace = Some(uri_match.as_str().to_string());
274 }
275 }
276 }
277
278 let total_namespaces = namespaces.len() + if default_namespace.is_some() { 1 } else { 0 };
279 let preserved_namespaces = namespaces.len(); Ok(NamespaceAnalysis {
282 total_namespaces,
283 prefixed_namespaces: namespaces,
284 default_namespace,
285 preserved_namespaces,
286 })
287 }
288
289 fn calculate_overall_score(
291 &self,
292 element_analysis: &ElementAnalysis,
293 attribute_analysis: &AttributeAnalysis,
294 comment_analysis: &CommentAnalysis,
295 ) -> f64 {
296 let element_score = if element_analysis.total_elements > 0 {
297 element_analysis.preserved_elements as f64 / element_analysis.total_elements as f64
298 } else {
299 1.0
300 };
301
302 let attribute_score = if attribute_analysis.total_attributes > 0 {
303 attribute_analysis.preserved_attributes as f64
304 / attribute_analysis.total_attributes as f64
305 } else {
306 1.0
307 };
308
309 let comment_score = if comment_analysis.total_comments > 0 {
310 comment_analysis.preserved_comments as f64 / comment_analysis.total_comments as f64
311 } else {
312 1.0
313 };
314
315 (element_score * 0.5) + (attribute_score * 0.3) + (comment_score * 0.2)
317 }
318}
319
320#[derive(Debug, Clone, Serialize, Deserialize)]
322pub struct RoundTripResult {
323 pub success: bool,
325 pub original_xml: String,
327 pub rebuilt_xml: String,
329 pub byte_identical: bool,
331 pub differences: Vec<String>,
333 pub test_time: Duration,
335}
336
337#[derive(Debug, Clone, Serialize, Deserialize)]
339pub struct FidelityAnalysis {
340 pub element_analysis: ElementAnalysis,
342 pub attribute_analysis: AttributeAnalysis,
344 pub comment_analysis: CommentAnalysis,
346 pub extension_analysis: ExtensionAnalysis,
348 pub namespace_analysis: NamespaceAnalysis,
350 pub analysis_time: Duration,
352 pub overall_score: f64,
354}
355
356#[derive(Debug, Clone, Serialize, Deserialize)]
358pub struct ElementAnalysis {
359 pub total_elements: usize,
361 pub elements_by_type: std::collections::HashMap<String, usize>,
363 pub unknown_elements: usize,
365 pub preserved_elements: usize,
367}
368
369#[derive(Debug, Clone, Serialize, Deserialize)]
371pub struct AttributeAnalysis {
372 pub total_attributes: usize,
374 pub attributes_by_element: std::collections::HashMap<String, usize>,
376 pub unknown_attributes: usize,
378 pub preserved_attributes: usize,
380}
381
382#[derive(Debug, Clone, Serialize, Deserialize)]
384pub struct CommentAnalysis {
385 pub total_comments: usize,
387 pub document_level_comments: usize,
389 pub element_level_comments: usize,
391 pub inline_comments: usize,
393 pub preserved_comments: usize,
395}
396
397#[derive(Debug, Clone, Serialize, Deserialize)]
399pub struct ExtensionAnalysis {
400 pub total_extensions: usize,
402 pub extension_namespaces: std::collections::HashMap<String, String>,
404 pub known_extensions: usize,
406 pub unknown_extensions: usize,
408 pub preserved_extensions: usize,
410}
411
412#[derive(Debug, Clone, Serialize, Deserialize)]
414pub struct NamespaceAnalysis {
415 pub total_namespaces: usize,
417 pub prefixed_namespaces: std::collections::HashMap<String, String>,
419 pub default_namespace: Option<String>,
421 pub preserved_namespaces: usize,
423}
424
425#[cfg(test)]
426mod tests {
427 use super::*;
428
429 #[test]
430 fn test_round_trip_tester_creation() {
431 let fidelity_options = FidelityOptions::default();
432 let tester = RoundTripTester::new(fidelity_options);
433 assert_eq!(tester.fidelity_options.enable_perfect_fidelity, false);
434 }
435
436 #[test]
437 fn test_whitespace_normalization() {
438 let fidelity_options = FidelityOptions::default();
439 let tester = RoundTripTester::new(fidelity_options);
440
441 let xml = " <test> \n <inner>value</inner> \n </test> ";
442 let normalized = tester.normalize_whitespace(xml);
443
444 assert_eq!(normalized, "<test>\n<inner>value</inner>\n</test>");
445 }
446
447 #[test]
448 fn test_element_analysis() {
449 let fidelity_options = FidelityOptions::default();
450 let tester = RoundTripTester::new(fidelity_options);
451
452 let xml = r#"<root><element1/><element2><element3/></element2></root>"#;
453 let analysis = tester.analyze_elements(xml).unwrap();
454
455 assert_eq!(analysis.total_elements, 4);
456 assert!(analysis.elements_by_type.contains_key("root"));
457 assert!(analysis.elements_by_type.contains_key("element1"));
458 }
459
460 #[test]
461 fn test_comment_analysis() {
462 let fidelity_options = FidelityOptions::default();
463 let tester = RoundTripTester::new(fidelity_options);
464
465 let xml = r#"<root><!-- comment 1 --><element/><!-- comment 2 --></root>"#;
466 let analysis = tester.analyze_comments(xml).unwrap();
467
468 assert_eq!(analysis.total_comments, 2);
469 }
470
471 #[test]
472 fn test_extension_analysis() {
473 let fidelity_options = FidelityOptions::default();
474 let tester = RoundTripTester::new(fidelity_options);
475
476 let xml = r#"<root xmlns:spotify="http://spotify.com/ddex" xmlns:custom="http://example.com/custom">
477 <spotify:trackId>123</spotify:trackId>
478 </root>"#;
479
480 let analysis = tester.analyze_extensions(xml).unwrap();
481 assert!(analysis.extension_namespaces.contains_key("spotify"));
482 assert!(analysis.extension_namespaces.contains_key("custom"));
483 }
484}