ddex_builder/diff/
mod.rs

1//! Semantic diff engine for DDEX messages
2//!
3//! This module provides intelligent diffing that understands DDEX business semantics,
4//! not just XML structure. It can detect meaningful changes while ignoring formatting
5//! differences, reference variations, and insignificant ordering changes.
6
7pub mod formatter;
8pub mod types;
9
10#[cfg(test)]
11pub mod test_data;
12
13#[cfg(test)]
14mod diff_tests;
15
16use crate::ast::{Element, Node, AST};
17use crate::error::BuildError;
18use indexmap::{IndexMap, IndexSet};
19use serde::{Deserialize, Serialize};
20use types::{ChangeSet, ChangeType, DiffPath, SemanticChange};
21
22/// Configuration for semantic diffing behavior
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct DiffConfig {
25    /// Ignore formatting differences (whitespace, indentation)
26    pub ignore_formatting: bool,
27
28    /// Ignore reference ID differences if content is same
29    pub ignore_reference_ids: bool,
30
31    /// Ignore insignificant ordering changes
32    pub ignore_order_changes: bool,
33
34    /// DDEX version compatibility mode
35    pub version_compatibility: VersionCompatibility,
36
37    /// Fields to ignore during comparison
38    pub ignored_fields: IndexSet<String>,
39
40    /// Business-critical fields that should be highlighted
41    pub critical_fields: IndexSet<String>,
42
43    /// Tolerance for numeric differences (e.g., 0.01 for currency)
44    pub numeric_tolerance: Option<f64>,
45}
46
47impl Default for DiffConfig {
48    fn default() -> Self {
49        let mut critical_fields = IndexSet::new();
50        critical_fields.insert("CommercialModelType".to_string());
51        critical_fields.insert("TerritoryCode".to_string());
52        critical_fields.insert("ValidityPeriod".to_string());
53        critical_fields.insert("ReleaseDate".to_string());
54        critical_fields.insert("UPC".to_string());
55        critical_fields.insert("ISRC".to_string());
56        critical_fields.insert("Price".to_string());
57
58        let mut ignored_fields = IndexSet::new();
59        ignored_fields.insert("MessageId".to_string());
60        ignored_fields.insert("MessageCreatedDateTime".to_string());
61
62        Self {
63            ignore_formatting: true,
64            ignore_reference_ids: true,
65            ignore_order_changes: true,
66            version_compatibility: VersionCompatibility::Strict,
67            ignored_fields,
68            critical_fields,
69            numeric_tolerance: Some(0.01),
70        }
71    }
72}
73
74/// Version compatibility modes for DDEX diffing
75#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
76pub enum VersionCompatibility {
77    /// Strict - versions must match exactly
78    Strict,
79    /// Compatible - allow compatible versions (4.2 <-> 4.3)
80    Compatible,
81    /// Lenient - ignore version differences entirely
82    Lenient,
83}
84
85/// Semantic diff engine for DDEX messages
86pub struct DiffEngine {
87    config: DiffConfig,
88    // Cache for reference resolution
89    reference_cache: IndexMap<String, Element>,
90}
91
92impl DiffEngine {
93    /// Create a new diff engine with default configuration
94    pub fn new() -> Self {
95        Self {
96            config: DiffConfig::default(),
97            reference_cache: IndexMap::new(),
98        }
99    }
100
101    /// Create a new diff engine with custom configuration
102    pub fn new_with_config(config: DiffConfig) -> Self {
103        Self {
104            config,
105            reference_cache: IndexMap::new(),
106        }
107    }
108
109    /// Compare two DDEX ASTs and return a semantic diff
110    pub fn diff(&mut self, old: &AST, new: &AST) -> Result<ChangeSet, BuildError> {
111        // Clear reference cache for this comparison
112        self.reference_cache.clear();
113
114        // Build reference maps for both documents
115        self.build_reference_cache(&old.root, "old");
116        self.build_reference_cache(&new.root, "new");
117
118        let mut changeset = ChangeSet::new();
119
120        // Compare root elements
121        self.compare_elements(&old.root, &new.root, DiffPath::root(), &mut changeset)?;
122
123        // Analyze changes for business impact
124        self.analyze_business_impact(&mut changeset);
125
126        Ok(changeset)
127    }
128
129    /// Compare two elements semantically
130    fn compare_elements(
131        &self,
132        old: &Element,
133        new: &Element,
134        path: DiffPath,
135        changeset: &mut ChangeSet,
136    ) -> Result<(), BuildError> {
137        // Check if elements represent the same logical entity
138        if old.name != new.name {
139            changeset.add_change(SemanticChange {
140                path: path.clone(),
141                change_type: ChangeType::ElementRenamed,
142                old_value: Some(old.name.clone()),
143                new_value: Some(new.name.clone()),
144                is_critical: self.is_critical_field(&old.name),
145                description: format!("Element renamed from '{}' to '{}'", old.name, new.name),
146            });
147            return Ok(());
148        }
149
150        // Compare attributes
151        self.compare_attributes(&old.attributes, &new.attributes, &path, changeset);
152
153        // Compare children with semantic understanding
154        self.compare_children(&old.children, &new.children, &path, changeset)?;
155
156        Ok(())
157    }
158
159    /// Compare attributes with semantic understanding
160    fn compare_attributes(
161        &self,
162        old: &IndexMap<String, String>,
163        new: &IndexMap<String, String>,
164        path: &DiffPath,
165        changeset: &mut ChangeSet,
166    ) {
167        // Find added, removed, and modified attributes
168        let old_keys: IndexSet<_> = old.keys().collect();
169        let new_keys: IndexSet<_> = new.keys().collect();
170
171        // Removed attributes
172        for &key in old_keys.difference(&new_keys) {
173            if !self.should_ignore_field(key) {
174                changeset.add_change(SemanticChange {
175                    path: path.with_attribute(key),
176                    change_type: ChangeType::AttributeRemoved,
177                    old_value: old.get(key).cloned(),
178                    new_value: None,
179                    is_critical: self.is_critical_field(key),
180                    description: format!("Attribute '{}' removed", key),
181                });
182            }
183        }
184
185        // Added attributes
186        for &key in new_keys.difference(&old_keys) {
187            if !self.should_ignore_field(key) {
188                changeset.add_change(SemanticChange {
189                    path: path.with_attribute(key),
190                    change_type: ChangeType::AttributeAdded,
191                    old_value: None,
192                    new_value: new.get(key).cloned(),
193                    is_critical: self.is_critical_field(key),
194                    description: format!("Attribute '{}' added", key),
195                });
196            }
197        }
198
199        // Modified attributes
200        for &key in old_keys.intersection(&new_keys) {
201            if !self.should_ignore_field(key) {
202                let old_val = &old[key];
203                let new_val = &new[key];
204
205                if !self.are_values_equivalent(old_val, new_val, key) {
206                    changeset.add_change(SemanticChange {
207                        path: path.with_attribute(key),
208                        change_type: ChangeType::AttributeModified,
209                        old_value: Some(old_val.clone()),
210                        new_value: Some(new_val.clone()),
211                        is_critical: self.is_critical_field(key),
212                        description: format!(
213                            "Attribute '{}' changed from '{}' to '{}'",
214                            key, old_val, new_val
215                        ),
216                    });
217                }
218            }
219        }
220    }
221
222    /// Compare children with semantic understanding
223    fn compare_children(
224        &self,
225        old: &[Node],
226        new: &[Node],
227        path: &DiffPath,
228        changeset: &mut ChangeSet,
229    ) -> Result<(), BuildError> {
230        // Separate elements from text nodes
231        let old_elements: Vec<&Element> = old
232            .iter()
233            .filter_map(|n| {
234                if let Node::Element(e) = n {
235                    Some(e)
236                } else {
237                    None
238                }
239            })
240            .collect();
241        let new_elements: Vec<&Element> = new
242            .iter()
243            .filter_map(|n| {
244                if let Node::Element(e) = n {
245                    Some(e)
246                } else {
247                    None
248                }
249            })
250            .collect();
251
252        // Compare text content
253        let old_text = self.extract_text_content(old);
254        let new_text = self.extract_text_content(new);
255
256        // Only report text changes if the content actually differs after applying normalization
257        if old_text != new_text && (!old_text.trim().is_empty() || !new_text.trim().is_empty()) {
258            changeset.add_change(SemanticChange {
259                path: path.with_text(),
260                change_type: ChangeType::TextModified,
261                old_value: if old_text.trim().is_empty() {
262                    None
263                } else {
264                    Some(old_text)
265                },
266                new_value: if new_text.trim().is_empty() {
267                    None
268                } else {
269                    Some(new_text)
270                },
271                is_critical: false,
272                description: "Text content changed".to_string(),
273            });
274        }
275
276        // Group elements by semantic identity for comparison
277        let old_groups = self.group_elements_by_identity(&old_elements);
278        let new_groups = self.group_elements_by_identity(&new_elements);
279
280        // Compare element groups
281        self.compare_element_groups(&old_groups, &new_groups, path, changeset)?;
282
283        Ok(())
284    }
285
286    /// Group elements by their semantic identity (name + key attributes)
287    fn group_elements_by_identity<'a>(
288        &self,
289        elements: &[&'a Element],
290    ) -> IndexMap<String, Vec<&'a Element>> {
291        let mut groups = IndexMap::new();
292
293        for element in elements {
294            let identity = self.get_element_identity(element);
295            groups
296                .entry(identity)
297                .or_insert_with(Vec::new)
298                .push(*element);
299        }
300
301        groups
302    }
303
304    /// Get semantic identity key for an element
305    fn get_element_identity(&self, element: &Element) -> String {
306        // Use element name and key identifying attributes
307        let mut identity = element.name.clone();
308
309        // Add key attributes that identify this element uniquely
310        let key_attrs = match element.name.as_str() {
311            "Release" => vec!["ReleaseId", "ReleaseReference"],
312            "SoundRecording" | "VideoRecording" => vec!["ResourceId", "ResourceReference"],
313            "Deal" => vec!["DealReference"],
314            "Party" => vec!["PartyId", "PartyReference"],
315            _ => vec!["Id", "Reference"], // Generic fallback
316        };
317
318        for attr in key_attrs {
319            if let Some(value) = element.attributes.get(attr) {
320                identity.push_str(&format!(":{}", value));
321                break; // Use first found key attribute
322            }
323        }
324
325        identity
326    }
327
328    /// Compare groups of elements
329    fn compare_element_groups(
330        &self,
331        old_groups: &IndexMap<String, Vec<&Element>>,
332        new_groups: &IndexMap<String, Vec<&Element>>,
333        path: &DiffPath,
334        changeset: &mut ChangeSet,
335    ) -> Result<(), BuildError> {
336        let old_keys: IndexSet<_> = old_groups.keys().collect();
337        let new_keys: IndexSet<_> = new_groups.keys().collect();
338
339        // Removed element groups
340        for &key in old_keys.difference(&new_keys) {
341            for element in &old_groups[key] {
342                changeset.add_change(SemanticChange {
343                    path: path.with_element(&element.name),
344                    change_type: ChangeType::ElementRemoved,
345                    old_value: Some(self.element_to_string(element)),
346                    new_value: None,
347                    is_critical: self.is_critical_field(&element.name),
348                    description: format!("Element '{}' removed", element.name),
349                });
350            }
351        }
352
353        // Added element groups
354        for &key in new_keys.difference(&old_keys) {
355            for element in &new_groups[key] {
356                changeset.add_change(SemanticChange {
357                    path: path.with_element(&element.name),
358                    change_type: ChangeType::ElementAdded,
359                    old_value: None,
360                    new_value: Some(self.element_to_string(element)),
361                    is_critical: self.is_critical_field(&element.name),
362                    description: format!("Element '{}' added", element.name),
363                });
364            }
365        }
366
367        // Compare matching element groups
368        for &key in old_keys.intersection(&new_keys) {
369            let old_elements = &old_groups[key];
370            let new_elements = &new_groups[key];
371
372            // For now, compare first element of each group
373            // In a more sophisticated implementation, we'd do optimal matching
374            if let (Some(&old_elem), Some(&new_elem)) = (old_elements.first(), new_elements.first())
375            {
376                self.compare_elements(
377                    old_elem,
378                    new_elem,
379                    path.with_element(&old_elem.name),
380                    changeset,
381                )?;
382            }
383        }
384
385        Ok(())
386    }
387
388    /// Extract text content from nodes, ignoring formatting
389    fn extract_text_content(&self, nodes: &[Node]) -> String {
390        let mut text = String::new();
391        for node in nodes {
392            if let Node::Text(t) = node {
393                if self.config.ignore_formatting {
394                    text.push_str(t.trim());
395                } else {
396                    text.push_str(t);
397                }
398            }
399        }
400        text
401    }
402
403    /// Check if two values are semantically equivalent
404    fn are_values_equivalent(&self, old: &str, new: &str, field_name: &str) -> bool {
405        // Reference equivalence - if we're ignoring reference IDs
406        if self.config.ignore_reference_ids && self.is_reference_field(field_name) {
407            return self.are_references_equivalent(old, new);
408        }
409
410        // Numeric tolerance for prices and monetary values
411        if let Some(tolerance) = self.config.numeric_tolerance {
412            if field_name.contains("Price") || field_name.contains("Amount") {
413                if let (Ok(old_num), Ok(new_num)) = (old.parse::<f64>(), new.parse::<f64>()) {
414                    return (old_num - new_num).abs() < tolerance;
415                }
416            }
417        }
418
419        // Formatting equivalence
420        if self.config.ignore_formatting {
421            return old.trim() == new.trim();
422        }
423
424        old == new
425    }
426
427    /// Check if a field represents a reference
428    fn is_reference_field(&self, field_name: &str) -> bool {
429        field_name.ends_with("Reference")
430            || field_name.ends_with("Ref")
431            || field_name == "ResourceId"
432            || field_name == "ReleaseId"
433            || field_name == "DealId"
434    }
435
436    /// Check if two references are equivalent by content
437    fn are_references_equivalent(&self, old_ref: &str, new_ref: &str) -> bool {
438        // If they're the same, they're equivalent
439        if old_ref == new_ref {
440            return true;
441        }
442
443        // Look up referenced content in cache
444        let old_key = format!("old:{}", old_ref);
445        let new_key = format!("new:{}", new_ref);
446
447        if let (Some(old_elem), Some(new_elem)) = (
448            self.reference_cache.get(&old_key),
449            self.reference_cache.get(&new_key),
450        ) {
451            // Compare the referenced elements for semantic equivalence
452            self.elements_semantically_equal(old_elem, new_elem)
453        } else {
454            false
455        }
456    }
457
458    /// Check if two elements are semantically equal
459    fn elements_semantically_equal(&self, old: &Element, new: &Element) -> bool {
460        // This is a simplified check - in practice, you'd want recursive comparison
461        // excluding the reference IDs themselves
462        old.name == new.name && self.text_content_equal(&old.children, &new.children)
463    }
464
465    /// Compare text content of children for equality
466    fn text_content_equal(&self, old: &[Node], new: &[Node]) -> bool {
467        self.extract_text_content(old) == self.extract_text_content(new)
468    }
469
470    /// Build reference cache for resolving reference equivalence
471    fn build_reference_cache(&mut self, element: &Element, prefix: &str) {
472        // Store elements that can be referenced
473        if let Some(ref_id) = self.get_reference_id(element) {
474            let cache_key = format!("{}:{}", prefix, ref_id);
475            self.reference_cache.insert(cache_key, element.clone());
476        }
477
478        // Recursively build cache for children
479        for child in &element.children {
480            if let Node::Element(child_elem) = child {
481                self.build_reference_cache(child_elem, prefix);
482            }
483        }
484    }
485
486    /// Get reference ID from element if it has one
487    fn get_reference_id(&self, element: &Element) -> Option<String> {
488        // Look for common reference attributes
489        let ref_attrs = [
490            "ResourceReference",
491            "ReleaseReference",
492            "DealReference",
493            "PartyReference",
494            "Reference",
495            "ResourceId",
496            "ReleaseId",
497        ];
498
499        for attr in &ref_attrs {
500            if let Some(value) = element.attributes.get(*attr) {
501                return Some(value.clone());
502            }
503        }
504
505        None
506    }
507
508    /// Check if a field should be ignored during comparison
509    fn should_ignore_field(&self, field_name: &str) -> bool {
510        self.config.ignored_fields.contains(field_name)
511    }
512
513    /// Check if a field is business-critical
514    fn is_critical_field(&self, field_name: &str) -> bool {
515        self.config.critical_fields.contains(field_name)
516    }
517
518    /// Convert element to string representation
519    fn element_to_string(&self, element: &Element) -> String {
520        // Simplified string representation - in practice you'd want proper XML serialization
521        format!("<{}>", element.name)
522    }
523
524    /// Analyze changes for business impact
525    fn analyze_business_impact(&self, changeset: &mut ChangeSet) {
526        // Count critical changes
527        let critical_changes = changeset.changes.iter().filter(|c| c.is_critical).count();
528
529        changeset
530            .metadata
531            .insert("critical_changes".to_string(), critical_changes.to_string());
532
533        // Determine overall impact level
534        let impact = if critical_changes > 0 {
535            "HIGH"
536        } else if changeset.changes.len() > 10 {
537            "MEDIUM"
538        } else {
539            "LOW"
540        };
541
542        changeset
543            .metadata
544            .insert("impact_level".to_string(), impact.to_string());
545    }
546}
547
548impl Default for DiffEngine {
549    fn default() -> Self {
550        Self::new()
551    }
552}
553
554#[cfg(test)]
555mod tests {
556    use super::*;
557    use crate::ast::Element;
558
559    fn create_test_element(name: &str, text: &str) -> Element {
560        Element::new(name).with_text(text)
561    }
562
563    #[test]
564    fn test_basic_diff() {
565        let mut engine = DiffEngine::new();
566
567        let old_ast = AST {
568            root: create_test_element("Root", "old content"),
569            namespaces: IndexMap::new(),
570            schema_location: None,
571        };
572
573        let new_ast = AST {
574            root: create_test_element("Root", "new content"),
575            namespaces: IndexMap::new(),
576            schema_location: None,
577        };
578
579        let changeset = engine.diff(&old_ast, &new_ast).unwrap();
580        assert!(!changeset.changes.is_empty());
581    }
582
583    #[test]
584    fn test_ignore_formatting() {
585        let mut engine = DiffEngine::new();
586
587        let old_ast = AST {
588            root: create_test_element("Root", "  content  "),
589            namespaces: IndexMap::new(),
590            schema_location: None,
591        };
592
593        let new_ast = AST {
594            root: create_test_element("Root", "content"),
595            namespaces: IndexMap::new(),
596            schema_location: None,
597        };
598
599        let changeset = engine.diff(&old_ast, &new_ast).unwrap();
600        // Should have no changes due to formatting being ignored
601        let text_changes: Vec<_> = changeset
602            .changes
603            .iter()
604            .filter(|c| matches!(c.change_type, ChangeType::TextModified))
605            .collect();
606        assert!(text_changes.is_empty());
607    }
608}