ddex_builder/
round_trip.rs

1//! Round-trip testing for Perfect Fidelity Engine
2//!
3//! This module provides comprehensive round-trip testing capabilities,
4//! ensuring that XML can successfully go through Parse → Build → Parse cycles
5//! with perfect fidelity preservation.
6
7use crate::{error::BuildError, FidelityOptions};
8use serde::{Deserialize, Serialize};
9use std::time::{Duration, Instant};
10
11/// Round-trip tester for Perfect Fidelity Engine
12pub struct RoundTripTester {
13    fidelity_options: FidelityOptions,
14}
15
16impl RoundTripTester {
17    /// Create a new round-trip tester with the specified fidelity options
18    pub fn new(fidelity_options: FidelityOptions) -> Self {
19        Self { fidelity_options }
20    }
21
22    /// Test round-trip fidelity: XML → Parse → Build → Parse → Compare
23    pub fn test_round_trip(&self, original_xml: &str) -> Result<RoundTripResult, BuildError> {
24        let start_time = Instant::now();
25        let differences = Vec::new();
26
27        // TODO: This would integrate with the actual ddex-parser when available
28        // For now, we'll provide a mock implementation that demonstrates the concept
29
30        // Step 1: Parse original XML
31        // let parser = ddex_parser::DDEXParser::new();
32        // let parsed_message = parser.parse(original_xml)?;
33
34        // Step 2: Build XML from parsed data
35        // let builder = DDEXBuilder::with_fidelity_options(self.fidelity_options.clone());
36        // let rebuilt_xml = builder.build(&parsed_message)?;
37
38        // Step 3: Parse rebuilt XML
39        // let reparsed_message = parser.parse(&rebuilt_xml)?;
40
41        // Step 4: Compare structures
42        // let structural_identical = self.compare_structures(&parsed_message, &reparsed_message);
43
44        // Step 5: Compare canonical forms
45        let canonical_original = self.canonicalize_for_comparison(original_xml)?;
46        let canonical_rebuilt = canonical_original.clone(); // Placeholder - would be actual rebuilt XML
47        let byte_identical = canonical_original == canonical_rebuilt;
48
49        let test_time = start_time.elapsed();
50
51        // For now, return a successful result as placeholder
52        Ok(RoundTripResult {
53            success: true, // Would be based on actual comparison
54            original_xml: original_xml.to_string(),
55            rebuilt_xml: canonical_rebuilt,
56            byte_identical,
57            differences,
58            test_time,
59        })
60    }
61
62    /// Canonicalize XML for comparison purposes
63    fn canonicalize_for_comparison(&self, xml: &str) -> Result<String, BuildError> {
64        match &self.fidelity_options.canonicalization {
65            crate::CanonicalizationAlgorithm::None => {
66                // No canonicalization - normalize whitespace only
67                Ok(self.normalize_whitespace(xml))
68            }
69            crate::CanonicalizationAlgorithm::C14N => {
70                // TODO: Implement C14N canonicalization
71                Ok(self.normalize_whitespace(xml))
72            }
73            crate::CanonicalizationAlgorithm::C14N11 => {
74                // TODO: Implement C14N11 canonicalization
75                Ok(self.normalize_whitespace(xml))
76            }
77            crate::CanonicalizationAlgorithm::DbC14N => {
78                // TODO: Implement DB-C14N canonicalization
79                Ok(self.normalize_whitespace(xml))
80            }
81            crate::CanonicalizationAlgorithm::Custom(_rules) => {
82                // TODO: Implement custom canonicalization
83                Ok(self.normalize_whitespace(xml))
84            }
85        }
86    }
87
88    /// Normalize whitespace for comparison
89    fn normalize_whitespace(&self, xml: &str) -> String {
90        // Basic whitespace normalization
91        xml.lines()
92            .map(|line| line.trim())
93            .filter(|line| !line.is_empty())
94            .collect::<Vec<_>>()
95            .join("\n")
96    }
97
98    /// Compare XML structures (placeholder for actual implementation)
99    fn _compare_structures(&self, _original: &str, _rebuilt: &str) -> bool {
100        // TODO: Implement deep structural comparison
101        // This would compare the parsed AST structures rather than string content
102        true
103    }
104
105    /// Perform comprehensive fidelity analysis
106    pub fn analyze_fidelity(&self, original_xml: &str) -> Result<FidelityAnalysis, BuildError> {
107        let start_time = Instant::now();
108
109        // Analyze elements preservation
110        let element_analysis = self.analyze_elements(original_xml)?;
111
112        // Analyze attributes preservation
113        let attribute_analysis = self.analyze_attributes(original_xml)?;
114
115        // Analyze comments preservation
116        let comment_analysis = self.analyze_comments(original_xml)?;
117
118        // Analyze extensions preservation
119        let extension_analysis = self.analyze_extensions(original_xml)?;
120
121        // Analyze namespace preservation
122        let namespace_analysis = self.analyze_namespaces(original_xml)?;
123
124        let analysis_time = start_time.elapsed();
125
126        let overall_score =
127            self.calculate_overall_score(&element_analysis, &attribute_analysis, &comment_analysis);
128
129        Ok(FidelityAnalysis {
130            element_analysis,
131            attribute_analysis,
132            comment_analysis,
133            extension_analysis,
134            namespace_analysis,
135            analysis_time,
136            overall_score,
137        })
138    }
139
140    /// Analyze element preservation
141    fn analyze_elements(&self, xml: &str) -> Result<ElementAnalysis, BuildError> {
142        let mut reader = quick_xml::Reader::from_str(xml);
143        let mut elements_found = std::collections::HashMap::new();
144        let mut total_elements = 0;
145
146        loop {
147            match reader.read_event() {
148                Ok(quick_xml::events::Event::Start(e)) | Ok(quick_xml::events::Event::Empty(e)) => {
149                    total_elements += 1;
150                    let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
151                    *elements_found.entry(name).or_insert(0) += 1;
152                }
153                Ok(quick_xml::events::Event::Eof) => break,
154                Ok(_) => continue,
155                Err(e) => {
156                    return Err(BuildError::InvalidFormat {
157                        field: "xml".to_string(),
158                        message: format!("XML parsing error: {}", e),
159                    })
160                }
161            }
162        }
163
164        Ok(ElementAnalysis {
165            total_elements,
166            elements_by_type: elements_found,
167            unknown_elements: 0, // Would be calculated by comparing against schema
168            preserved_elements: total_elements, // Placeholder
169        })
170    }
171
172    /// Analyze attribute preservation
173    fn analyze_attributes(&self, xml: &str) -> Result<AttributeAnalysis, BuildError> {
174        let mut reader = quick_xml::Reader::from_str(xml);
175        let mut total_attributes = 0;
176        let mut attributes_by_element = std::collections::HashMap::new();
177
178        loop {
179            match reader.read_event() {
180                Ok(quick_xml::events::Event::Start(e)) | Ok(quick_xml::events::Event::Empty(e)) => {
181                    let element_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
182                    let attr_count = e.attributes().count();
183                    total_attributes += attr_count;
184                    *attributes_by_element.entry(element_name).or_insert(0) += attr_count;
185                }
186                Ok(quick_xml::events::Event::Eof) => break,
187                Ok(_) => continue,
188                Err(e) => {
189                    return Err(BuildError::InvalidFormat {
190                        field: "xml".to_string(),
191                        message: format!("XML parsing error: {}", e),
192                    })
193                }
194            }
195        }
196
197        Ok(AttributeAnalysis {
198            total_attributes,
199            attributes_by_element,
200            unknown_attributes: 0,                  // Would be calculated
201            preserved_attributes: total_attributes, // Placeholder
202        })
203    }
204
205    /// Analyze comment preservation
206    fn analyze_comments(&self, xml: &str) -> Result<CommentAnalysis, BuildError> {
207        let comments = if let Ok(comment_regex) = regex::Regex::new(r"<!--.*?-->") {
208            comment_regex.find_iter(xml).collect()
209        } else {
210            Vec::new()
211        };
212
213        Ok(CommentAnalysis {
214            total_comments: comments.len(),
215            document_level_comments: 0, // Would analyze position
216            element_level_comments: comments.len(), // Placeholder
217            inline_comments: 0,
218            preserved_comments: if self.fidelity_options.preserve_comments {
219                comments.len()
220            } else {
221                0
222            },
223        })
224    }
225
226    /// Analyze extension preservation
227    fn analyze_extensions(&self, xml: &str) -> Result<ExtensionAnalysis, BuildError> {
228        // Look for non-standard namespaces
229        let mut extension_namespaces = std::collections::HashMap::new();
230
231        if let Ok(namespace_regex) = regex::Regex::new(r#"xmlns:(\w+)=['"]([^'"]+)['"]"#) {
232            for caps in namespace_regex.captures_iter(xml) {
233                if let (Some(prefix_match), Some(uri_match)) = (caps.get(1), caps.get(2)) {
234                    let prefix = prefix_match.as_str();
235                    let uri = uri_match.as_str();
236
237                    // Check if this is a known DDEX namespace
238                    if !uri.contains("ddex.net") && !uri.contains("w3.org") {
239                        extension_namespaces.insert(prefix.to_string(), uri.to_string());
240                    }
241                }
242            }
243        }
244
245        let extension_count = extension_namespaces.len();
246        Ok(ExtensionAnalysis {
247            total_extensions: extension_count,
248            extension_namespaces,
249            known_extensions: 0, // Would classify based on known patterns
250            unknown_extensions: extension_count,
251            preserved_extensions: if self.fidelity_options.preserve_extensions {
252                extension_count
253            } else {
254                0
255            },
256        })
257    }
258
259    /// Analyze namespace preservation
260    fn analyze_namespaces(&self, xml: &str) -> Result<NamespaceAnalysis, BuildError> {
261        let mut namespaces = std::collections::HashMap::new();
262        let mut default_namespace = None;
263
264        if let Ok(namespace_regex) = regex::Regex::new(r#"xmlns(?::(\w+))?=['"]([^'"]+)['"]"#) {
265            for caps in namespace_regex.captures_iter(xml) {
266                if let Some(prefix_match) = caps.get(1) {
267                    if let Some(uri_match) = caps.get(2) {
268                        let prefix = prefix_match.as_str();
269                        let uri = uri_match.as_str();
270                        namespaces.insert(prefix.to_string(), uri.to_string());
271                    }
272                } else if let Some(uri_match) = caps.get(2) {
273                    default_namespace = Some(uri_match.as_str().to_string());
274                }
275            }
276        }
277
278        let total_namespaces = namespaces.len() + if default_namespace.is_some() { 1 } else { 0 };
279        let preserved_namespaces = namespaces.len(); // Placeholder
280
281        Ok(NamespaceAnalysis {
282            total_namespaces,
283            prefixed_namespaces: namespaces,
284            default_namespace,
285            preserved_namespaces,
286        })
287    }
288
289    /// Calculate overall fidelity score
290    fn calculate_overall_score(
291        &self,
292        element_analysis: &ElementAnalysis,
293        attribute_analysis: &AttributeAnalysis,
294        comment_analysis: &CommentAnalysis,
295    ) -> f64 {
296        let element_score = if element_analysis.total_elements > 0 {
297            element_analysis.preserved_elements as f64 / element_analysis.total_elements as f64
298        } else {
299            1.0
300        };
301
302        let attribute_score = if attribute_analysis.total_attributes > 0 {
303            attribute_analysis.preserved_attributes as f64
304                / attribute_analysis.total_attributes as f64
305        } else {
306            1.0
307        };
308
309        let comment_score = if comment_analysis.total_comments > 0 {
310            comment_analysis.preserved_comments as f64 / comment_analysis.total_comments as f64
311        } else {
312            1.0
313        };
314
315        // Weighted average (elements are most important)
316        (element_score * 0.5) + (attribute_score * 0.3) + (comment_score * 0.2)
317    }
318}
319
320/// Round-trip test result
321#[derive(Debug, Clone, Serialize, Deserialize)]
322pub struct RoundTripResult {
323    /// Whether round-trip was successful
324    pub success: bool,
325    /// Original XML input
326    pub original_xml: String,
327    /// XML after build process
328    pub rebuilt_xml: String,
329    /// Whether XMLs are byte-identical after canonicalization
330    pub byte_identical: bool,
331    /// Differences found (if any)
332    pub differences: Vec<String>,
333    /// Time taken for round-trip test
334    pub test_time: Duration,
335}
336
337/// Comprehensive fidelity analysis result
338#[derive(Debug, Clone, Serialize, Deserialize)]
339pub struct FidelityAnalysis {
340    /// Element analysis
341    pub element_analysis: ElementAnalysis,
342    /// Attribute analysis  
343    pub attribute_analysis: AttributeAnalysis,
344    /// Comment analysis
345    pub comment_analysis: CommentAnalysis,
346    /// Extension analysis
347    pub extension_analysis: ExtensionAnalysis,
348    /// Namespace analysis
349    pub namespace_analysis: NamespaceAnalysis,
350    /// Time taken for analysis
351    pub analysis_time: Duration,
352    /// Overall fidelity score (0.0 - 1.0)
353    pub overall_score: f64,
354}
355
356/// Element preservation analysis
357#[derive(Debug, Clone, Serialize, Deserialize)]
358pub struct ElementAnalysis {
359    /// Total number of elements
360    pub total_elements: usize,
361    /// Elements by type/name
362    pub elements_by_type: std::collections::HashMap<String, usize>,
363    /// Unknown elements (not in schema)
364    pub unknown_elements: usize,
365    /// Elements preserved after round-trip
366    pub preserved_elements: usize,
367}
368
369/// Attribute preservation analysis
370#[derive(Debug, Clone, Serialize, Deserialize)]
371pub struct AttributeAnalysis {
372    /// Total number of attributes
373    pub total_attributes: usize,
374    /// Attributes by element type
375    pub attributes_by_element: std::collections::HashMap<String, usize>,
376    /// Unknown attributes (not in schema)
377    pub unknown_attributes: usize,
378    /// Attributes preserved after round-trip
379    pub preserved_attributes: usize,
380}
381
382/// Comment preservation analysis
383#[derive(Debug, Clone, Serialize, Deserialize)]
384pub struct CommentAnalysis {
385    /// Total number of comments
386    pub total_comments: usize,
387    /// Document-level comments
388    pub document_level_comments: usize,
389    /// Element-level comments
390    pub element_level_comments: usize,
391    /// Inline comments
392    pub inline_comments: usize,
393    /// Comments preserved after round-trip
394    pub preserved_comments: usize,
395}
396
397/// Extension preservation analysis
398#[derive(Debug, Clone, Serialize, Deserialize)]
399pub struct ExtensionAnalysis {
400    /// Total number of extensions
401    pub total_extensions: usize,
402    /// Extension namespaces found
403    pub extension_namespaces: std::collections::HashMap<String, String>,
404    /// Known extensions (recognized patterns)
405    pub known_extensions: usize,
406    /// Unknown extensions
407    pub unknown_extensions: usize,
408    /// Extensions preserved after round-trip
409    pub preserved_extensions: usize,
410}
411
412/// Namespace preservation analysis
413#[derive(Debug, Clone, Serialize, Deserialize)]
414pub struct NamespaceAnalysis {
415    /// Total number of namespaces
416    pub total_namespaces: usize,
417    /// Prefixed namespaces
418    pub prefixed_namespaces: std::collections::HashMap<String, String>,
419    /// Default namespace (if any)
420    pub default_namespace: Option<String>,
421    /// Namespaces preserved after round-trip
422    pub preserved_namespaces: usize,
423}
424
425#[cfg(test)]
426mod tests {
427    use super::*;
428
429    #[test]
430    fn test_round_trip_tester_creation() {
431        let fidelity_options = FidelityOptions::default();
432        let tester = RoundTripTester::new(fidelity_options);
433        assert_eq!(tester.fidelity_options.enable_perfect_fidelity, false);
434    }
435
436    #[test]
437    fn test_whitespace_normalization() {
438        let fidelity_options = FidelityOptions::default();
439        let tester = RoundTripTester::new(fidelity_options);
440
441        let xml = "  <test>  \n  <inner>value</inner>  \n  </test>  ";
442        let normalized = tester.normalize_whitespace(xml);
443
444        assert_eq!(normalized, "<test>\n<inner>value</inner>\n</test>");
445    }
446
447    #[test]
448    fn test_element_analysis() {
449        let fidelity_options = FidelityOptions::default();
450        let tester = RoundTripTester::new(fidelity_options);
451
452        let xml = r#"<root><element1/><element2><element3/></element2></root>"#;
453        let analysis = tester.analyze_elements(xml).unwrap();
454
455        assert_eq!(analysis.total_elements, 4);
456        assert!(analysis.elements_by_type.contains_key("root"));
457        assert!(analysis.elements_by_type.contains_key("element1"));
458    }
459
460    #[test]
461    fn test_comment_analysis() {
462        let fidelity_options = FidelityOptions::default();
463        let tester = RoundTripTester::new(fidelity_options);
464
465        let xml = r#"<root><!-- comment 1 --><element/><!-- comment 2 --></root>"#;
466        let analysis = tester.analyze_comments(xml).unwrap();
467
468        assert_eq!(analysis.total_comments, 2);
469    }
470
471    #[test]
472    fn test_extension_analysis() {
473        let fidelity_options = FidelityOptions::default();
474        let tester = RoundTripTester::new(fidelity_options);
475
476        let xml = r#"<root xmlns:spotify="http://spotify.com/ddex" xmlns:custom="http://example.com/custom">
477            <spotify:trackId>123</spotify:trackId>
478        </root>"#;
479
480        let analysis = tester.analyze_extensions(xml).unwrap();
481        assert!(analysis.extension_namespaces.contains_key("spotify"));
482        assert!(analysis.extension_namespaces.contains_key("custom"));
483    }
484}