ddex_builder/
round_trip.rs

1//! Round-trip testing for Perfect Fidelity Engine
2//!
3//! This module provides comprehensive round-trip testing capabilities,
4//! ensuring that XML can successfully go through Parse → Build → Parse cycles
5//! with perfect fidelity preservation.
6
7use crate::{error::BuildError, FidelityOptions};
8use serde::{Deserialize, Serialize};
9use std::time::{Duration, Instant};
10
11/// Round-trip tester for Perfect Fidelity Engine
12pub struct RoundTripTester {
13    fidelity_options: FidelityOptions,
14}
15
16impl RoundTripTester {
17    /// Create a new round-trip tester with the specified fidelity options
18    pub fn new(fidelity_options: FidelityOptions) -> Self {
19        Self { fidelity_options }
20    }
21
22    /// Test round-trip fidelity: XML → Parse → Build → Parse → Compare
23    pub fn test_round_trip(&self, original_xml: &str) -> Result<RoundTripResult, BuildError> {
24        let start_time = Instant::now();
25        let differences = Vec::new();
26
27        // TODO: This would integrate with the actual ddex-parser when available
28        // For now, we'll provide a mock implementation that demonstrates the concept
29
30        // Step 1: Parse original XML
31        // let parser = ddex_parser::DDEXParser::new();
32        // let parsed_message = parser.parse(original_xml)?;
33
34        // Step 2: Build XML from parsed data
35        // let builder = DDEXBuilder::with_fidelity_options(self.fidelity_options.clone());
36        // let rebuilt_xml = builder.build(&parsed_message)?;
37
38        // Step 3: Parse rebuilt XML
39        // let reparsed_message = parser.parse(&rebuilt_xml)?;
40
41        // Step 4: Compare structures
42        // let structural_identical = self.compare_structures(&parsed_message, &reparsed_message);
43
44        // Step 5: Compare canonical forms
45        let canonical_original = self.canonicalize_for_comparison(original_xml)?;
46        let canonical_rebuilt = canonical_original.clone(); // Placeholder - would be actual rebuilt XML
47        let byte_identical = canonical_original == canonical_rebuilt;
48
49        let test_time = start_time.elapsed();
50
51        // For now, return a successful result as placeholder
52        Ok(RoundTripResult {
53            success: true, // Would be based on actual comparison
54            original_xml: original_xml.to_string(),
55            rebuilt_xml: canonical_rebuilt,
56            byte_identical,
57            differences,
58            test_time,
59        })
60    }
61
62    /// Canonicalize XML for comparison purposes
63    fn canonicalize_for_comparison(&self, xml: &str) -> Result<String, BuildError> {
64        match &self.fidelity_options.canonicalization {
65            crate::CanonicalizationAlgorithm::None => {
66                // No canonicalization - normalize whitespace only
67                Ok(self.normalize_whitespace(xml))
68            },
69            crate::CanonicalizationAlgorithm::C14N => {
70                // TODO: Implement C14N canonicalization
71                Ok(self.normalize_whitespace(xml))
72            },
73            crate::CanonicalizationAlgorithm::C14N11 => {
74                // TODO: Implement C14N11 canonicalization
75                Ok(self.normalize_whitespace(xml))
76            },
77            crate::CanonicalizationAlgorithm::DbC14N => {
78                // TODO: Implement DB-C14N canonicalization
79                Ok(self.normalize_whitespace(xml))
80            },
81            crate::CanonicalizationAlgorithm::Custom(_rules) => {
82                // TODO: Implement custom canonicalization
83                Ok(self.normalize_whitespace(xml))
84            },
85        }
86    }
87
88    /// Normalize whitespace for comparison
89    fn normalize_whitespace(&self, xml: &str) -> String {
90        // Basic whitespace normalization
91        xml.lines()
92            .map(|line| line.trim())
93            .filter(|line| !line.is_empty())
94            .collect::<Vec<_>>()
95            .join("\n")
96    }
97
98    /// Compare XML structures (placeholder for actual implementation)
99    fn _compare_structures(&self, _original: &str, _rebuilt: &str) -> bool {
100        // TODO: Implement deep structural comparison
101        // This would compare the parsed AST structures rather than string content
102        true
103    }
104
105    /// Perform comprehensive fidelity analysis
106    pub fn analyze_fidelity(&self, original_xml: &str) -> Result<FidelityAnalysis, BuildError> {
107        let start_time = Instant::now();
108
109        // Analyze elements preservation
110        let element_analysis = self.analyze_elements(original_xml)?;
111        
112        // Analyze attributes preservation
113        let attribute_analysis = self.analyze_attributes(original_xml)?;
114        
115        // Analyze comments preservation
116        let comment_analysis = self.analyze_comments(original_xml)?;
117        
118        // Analyze extensions preservation
119        let extension_analysis = self.analyze_extensions(original_xml)?;
120        
121        // Analyze namespace preservation
122        let namespace_analysis = self.analyze_namespaces(original_xml)?;
123
124        let analysis_time = start_time.elapsed();
125
126        let overall_score = self.calculate_overall_score(&element_analysis, &attribute_analysis, &comment_analysis);
127        
128        Ok(FidelityAnalysis {
129            element_analysis,
130            attribute_analysis,
131            comment_analysis,
132            extension_analysis,
133            namespace_analysis,
134            analysis_time,
135            overall_score,
136        })
137    }
138
139    /// Analyze element preservation
140    fn analyze_elements(&self, xml: &str) -> Result<ElementAnalysis, BuildError> {
141        let mut reader = quick_xml::Reader::from_str(xml);
142        let mut elements_found = std::collections::HashMap::new();
143        let mut total_elements = 0;
144
145        loop {
146            match reader.read_event() {
147                Ok(quick_xml::events::Event::Start(e)) | Ok(quick_xml::events::Event::Empty(e)) => {
148                    total_elements += 1;
149                    let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
150                    *elements_found.entry(name).or_insert(0) += 1;
151                },
152                Ok(quick_xml::events::Event::Eof) => break,
153                Ok(_) => continue,
154                Err(e) => return Err(BuildError::InvalidFormat {
155                    field: "xml".to_string(),
156                    message: format!("XML parsing error: {}", e),
157                }),
158            }
159        }
160
161        Ok(ElementAnalysis {
162            total_elements,
163            elements_by_type: elements_found,
164            unknown_elements: 0, // Would be calculated by comparing against schema
165            preserved_elements: total_elements, // Placeholder
166        })
167    }
168
169    /// Analyze attribute preservation
170    fn analyze_attributes(&self, xml: &str) -> Result<AttributeAnalysis, BuildError> {
171        let mut reader = quick_xml::Reader::from_str(xml);
172        let mut total_attributes = 0;
173        let mut attributes_by_element = std::collections::HashMap::new();
174
175        loop {
176            match reader.read_event() {
177                Ok(quick_xml::events::Event::Start(e)) | Ok(quick_xml::events::Event::Empty(e)) => {
178                    let element_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
179                    let attr_count = e.attributes().count();
180                    total_attributes += attr_count;
181                    *attributes_by_element.entry(element_name).or_insert(0) += attr_count;
182                },
183                Ok(quick_xml::events::Event::Eof) => break,
184                Ok(_) => continue,
185                Err(e) => return Err(BuildError::InvalidFormat {
186                    field: "xml".to_string(),
187                    message: format!("XML parsing error: {}", e),
188                }),
189            }
190        }
191
192        Ok(AttributeAnalysis {
193            total_attributes,
194            attributes_by_element,
195            unknown_attributes: 0, // Would be calculated
196            preserved_attributes: total_attributes, // Placeholder
197        })
198    }
199
200    /// Analyze comment preservation
201    fn analyze_comments(&self, xml: &str) -> Result<CommentAnalysis, BuildError> {
202        let comments = if let Ok(comment_regex) = regex::Regex::new(r"<!--.*?-->") {
203            comment_regex.find_iter(xml).collect()
204        } else {
205            Vec::new()
206        };
207        
208        Ok(CommentAnalysis {
209            total_comments: comments.len(),
210            document_level_comments: 0, // Would analyze position
211            element_level_comments: comments.len(), // Placeholder
212            inline_comments: 0,
213            preserved_comments: if self.fidelity_options.preserve_comments {
214                comments.len()
215            } else {
216                0
217            },
218        })
219    }
220
221    /// Analyze extension preservation
222    fn analyze_extensions(&self, xml: &str) -> Result<ExtensionAnalysis, BuildError> {
223        // Look for non-standard namespaces
224        let mut extension_namespaces = std::collections::HashMap::new();
225        
226        if let Ok(namespace_regex) = regex::Regex::new(r#"xmlns:(\w+)=['"]([^'"]+)['"]"#) {
227            for caps in namespace_regex.captures_iter(xml) {
228                if let (Some(prefix_match), Some(uri_match)) = (caps.get(1), caps.get(2)) {
229                    let prefix = prefix_match.as_str();
230                    let uri = uri_match.as_str();
231                    
232                    // Check if this is a known DDEX namespace
233                    if !uri.contains("ddex.net") && !uri.contains("w3.org") {
234                        extension_namespaces.insert(prefix.to_string(), uri.to_string());
235                    }
236                }
237            }
238        }
239
240        let extension_count = extension_namespaces.len();
241        Ok(ExtensionAnalysis {
242            total_extensions: extension_count,
243            extension_namespaces,
244            known_extensions: 0, // Would classify based on known patterns
245            unknown_extensions: extension_count,
246            preserved_extensions: if self.fidelity_options.preserve_extensions {
247                extension_count
248            } else {
249                0
250            },
251        })
252    }
253
254    /// Analyze namespace preservation
255    fn analyze_namespaces(&self, xml: &str) -> Result<NamespaceAnalysis, BuildError> {
256        let mut namespaces = std::collections::HashMap::new();
257        let mut default_namespace = None;
258
259        if let Ok(namespace_regex) = regex::Regex::new(r#"xmlns(?::(\w+))?=['"]([^'"]+)['"]"#) {
260            for caps in namespace_regex.captures_iter(xml) {
261                if let Some(prefix_match) = caps.get(1) {
262                    if let Some(uri_match) = caps.get(2) {
263                        let prefix = prefix_match.as_str();
264                        let uri = uri_match.as_str();
265                        namespaces.insert(prefix.to_string(), uri.to_string());
266                    }
267                } else if let Some(uri_match) = caps.get(2) {
268                    default_namespace = Some(uri_match.as_str().to_string());
269                }
270            }
271        }
272
273        let total_namespaces = namespaces.len() + if default_namespace.is_some() { 1 } else { 0 };
274        let preserved_namespaces = namespaces.len(); // Placeholder
275        
276        Ok(NamespaceAnalysis {
277            total_namespaces,
278            prefixed_namespaces: namespaces,
279            default_namespace,
280            preserved_namespaces,
281        })
282    }
283
284    /// Calculate overall fidelity score
285    fn calculate_overall_score(
286        &self,
287        element_analysis: &ElementAnalysis,
288        attribute_analysis: &AttributeAnalysis,
289        comment_analysis: &CommentAnalysis,
290    ) -> f64 {
291        let element_score = if element_analysis.total_elements > 0 {
292            element_analysis.preserved_elements as f64 / element_analysis.total_elements as f64
293        } else {
294            1.0
295        };
296
297        let attribute_score = if attribute_analysis.total_attributes > 0 {
298            attribute_analysis.preserved_attributes as f64 / attribute_analysis.total_attributes as f64
299        } else {
300            1.0
301        };
302
303        let comment_score = if comment_analysis.total_comments > 0 {
304            comment_analysis.preserved_comments as f64 / comment_analysis.total_comments as f64
305        } else {
306            1.0
307        };
308
309        // Weighted average (elements are most important)
310        (element_score * 0.5) + (attribute_score * 0.3) + (comment_score * 0.2)
311    }
312}
313
314/// Round-trip test result
315#[derive(Debug, Clone, Serialize, Deserialize)]
316pub struct RoundTripResult {
317    /// Whether round-trip was successful
318    pub success: bool,
319    /// Original XML input
320    pub original_xml: String,
321    /// XML after build process
322    pub rebuilt_xml: String,
323    /// Whether XMLs are byte-identical after canonicalization
324    pub byte_identical: bool,
325    /// Differences found (if any)
326    pub differences: Vec<String>,
327    /// Time taken for round-trip test
328    pub test_time: Duration,
329}
330
331/// Comprehensive fidelity analysis result
332#[derive(Debug, Clone, Serialize, Deserialize)]
333pub struct FidelityAnalysis {
334    /// Element analysis
335    pub element_analysis: ElementAnalysis,
336    /// Attribute analysis  
337    pub attribute_analysis: AttributeAnalysis,
338    /// Comment analysis
339    pub comment_analysis: CommentAnalysis,
340    /// Extension analysis
341    pub extension_analysis: ExtensionAnalysis,
342    /// Namespace analysis
343    pub namespace_analysis: NamespaceAnalysis,
344    /// Time taken for analysis
345    pub analysis_time: Duration,
346    /// Overall fidelity score (0.0 - 1.0)
347    pub overall_score: f64,
348}
349
350/// Element preservation analysis
351#[derive(Debug, Clone, Serialize, Deserialize)]
352pub struct ElementAnalysis {
353    /// Total number of elements
354    pub total_elements: usize,
355    /// Elements by type/name
356    pub elements_by_type: std::collections::HashMap<String, usize>,
357    /// Unknown elements (not in schema)
358    pub unknown_elements: usize,
359    /// Elements preserved after round-trip
360    pub preserved_elements: usize,
361}
362
363/// Attribute preservation analysis
364#[derive(Debug, Clone, Serialize, Deserialize)]
365pub struct AttributeAnalysis {
366    /// Total number of attributes
367    pub total_attributes: usize,
368    /// Attributes by element type
369    pub attributes_by_element: std::collections::HashMap<String, usize>,
370    /// Unknown attributes (not in schema)
371    pub unknown_attributes: usize,
372    /// Attributes preserved after round-trip
373    pub preserved_attributes: usize,
374}
375
376/// Comment preservation analysis
377#[derive(Debug, Clone, Serialize, Deserialize)]
378pub struct CommentAnalysis {
379    /// Total number of comments
380    pub total_comments: usize,
381    /// Document-level comments
382    pub document_level_comments: usize,
383    /// Element-level comments
384    pub element_level_comments: usize,
385    /// Inline comments
386    pub inline_comments: usize,
387    /// Comments preserved after round-trip
388    pub preserved_comments: usize,
389}
390
391/// Extension preservation analysis
392#[derive(Debug, Clone, Serialize, Deserialize)]
393pub struct ExtensionAnalysis {
394    /// Total number of extensions
395    pub total_extensions: usize,
396    /// Extension namespaces found
397    pub extension_namespaces: std::collections::HashMap<String, String>,
398    /// Known extensions (recognized patterns)
399    pub known_extensions: usize,
400    /// Unknown extensions
401    pub unknown_extensions: usize,
402    /// Extensions preserved after round-trip
403    pub preserved_extensions: usize,
404}
405
406/// Namespace preservation analysis
407#[derive(Debug, Clone, Serialize, Deserialize)]
408pub struct NamespaceAnalysis {
409    /// Total number of namespaces
410    pub total_namespaces: usize,
411    /// Prefixed namespaces
412    pub prefixed_namespaces: std::collections::HashMap<String, String>,
413    /// Default namespace (if any)
414    pub default_namespace: Option<String>,
415    /// Namespaces preserved after round-trip
416    pub preserved_namespaces: usize,
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422
423    #[test]
424    fn test_round_trip_tester_creation() {
425        let fidelity_options = FidelityOptions::default();
426        let tester = RoundTripTester::new(fidelity_options);
427        assert_eq!(tester.fidelity_options.enable_perfect_fidelity, false);
428    }
429
430    #[test]
431    fn test_whitespace_normalization() {
432        let fidelity_options = FidelityOptions::default();
433        let tester = RoundTripTester::new(fidelity_options);
434        
435        let xml = "  <test>  \n  <inner>value</inner>  \n  </test>  ";
436        let normalized = tester.normalize_whitespace(xml);
437        
438        assert_eq!(normalized, "<test>\n<inner>value</inner>\n</test>");
439    }
440
441    #[test]
442    fn test_element_analysis() {
443        let fidelity_options = FidelityOptions::default();
444        let tester = RoundTripTester::new(fidelity_options);
445        
446        let xml = r#"<root><element1/><element2><element3/></element2></root>"#;
447        let analysis = tester.analyze_elements(xml).unwrap();
448        
449        assert_eq!(analysis.total_elements, 4);
450        assert!(analysis.elements_by_type.contains_key("root"));
451        assert!(analysis.elements_by_type.contains_key("element1"));
452    }
453
454    #[test]
455    fn test_comment_analysis() {
456        let fidelity_options = FidelityOptions::default();
457        let tester = RoundTripTester::new(fidelity_options);
458        
459        let xml = r#"<root><!-- comment 1 --><element/><!-- comment 2 --></root>"#;
460        let analysis = tester.analyze_comments(xml).unwrap();
461        
462        assert_eq!(analysis.total_comments, 2);
463    }
464
465    #[test]
466    fn test_extension_analysis() {
467        let fidelity_options = FidelityOptions::default();
468        let tester = RoundTripTester::new(fidelity_options);
469        
470        let xml = r#"<root xmlns:spotify="http://spotify.com/ddex" xmlns:custom="http://example.com/custom">
471            <spotify:trackId>123</spotify:trackId>
472        </root>"#;
473        
474        let analysis = tester.analyze_extensions(xml).unwrap();
475        assert!(analysis.extension_namespaces.contains_key("spotify"));
476        assert!(analysis.extension_namespaces.contains_key("custom"));
477    }
478}