1pub mod formatter;
8pub mod types;
9
10#[cfg(test)]
11pub mod test_data;
12
13#[cfg(test)]
14mod diff_tests;
15
16use crate::ast::{Element, Node, AST};
17use crate::error::BuildError;
18use indexmap::{IndexMap, IndexSet};
19use serde::{Deserialize, Serialize};
20use types::{ChangeSet, ChangeType, DiffPath, SemanticChange};
21
22#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct DiffConfig {
25 pub ignore_formatting: bool,
27
28 pub ignore_reference_ids: bool,
30
31 pub ignore_order_changes: bool,
33
34 pub version_compatibility: VersionCompatibility,
36
37 pub ignored_fields: IndexSet<String>,
39
40 pub critical_fields: IndexSet<String>,
42
43 pub numeric_tolerance: Option<f64>,
45}
46
47impl Default for DiffConfig {
48 fn default() -> Self {
49 let mut critical_fields = IndexSet::new();
50 critical_fields.insert("CommercialModelType".to_string());
51 critical_fields.insert("TerritoryCode".to_string());
52 critical_fields.insert("ValidityPeriod".to_string());
53 critical_fields.insert("ReleaseDate".to_string());
54 critical_fields.insert("UPC".to_string());
55 critical_fields.insert("ISRC".to_string());
56 critical_fields.insert("Price".to_string());
57
58 let mut ignored_fields = IndexSet::new();
59 ignored_fields.insert("MessageId".to_string());
60 ignored_fields.insert("MessageCreatedDateTime".to_string());
61
62 Self {
63 ignore_formatting: true,
64 ignore_reference_ids: true,
65 ignore_order_changes: true,
66 version_compatibility: VersionCompatibility::Strict,
67 ignored_fields,
68 critical_fields,
69 numeric_tolerance: Some(0.01),
70 }
71 }
72}
73
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
76pub enum VersionCompatibility {
77 Strict,
79 Compatible,
81 Lenient,
83}
84
85pub struct DiffEngine {
87 config: DiffConfig,
88 reference_cache: IndexMap<String, Element>,
90}
91
92impl DiffEngine {
93 pub fn new() -> Self {
95 Self {
96 config: DiffConfig::default(),
97 reference_cache: IndexMap::new(),
98 }
99 }
100
101 pub fn new_with_config(config: DiffConfig) -> Self {
103 Self {
104 config,
105 reference_cache: IndexMap::new(),
106 }
107 }
108
109 pub fn diff(&mut self, old: &AST, new: &AST) -> Result<ChangeSet, BuildError> {
111 self.reference_cache.clear();
113
114 self.build_reference_cache(&old.root, "old");
116 self.build_reference_cache(&new.root, "new");
117
118 let mut changeset = ChangeSet::new();
119
120 self.compare_elements(&old.root, &new.root, DiffPath::root(), &mut changeset)?;
122
123 self.analyze_business_impact(&mut changeset);
125
126 Ok(changeset)
127 }
128
129 fn compare_elements(
131 &self,
132 old: &Element,
133 new: &Element,
134 path: DiffPath,
135 changeset: &mut ChangeSet,
136 ) -> Result<(), BuildError> {
137 if old.name != new.name {
139 changeset.add_change(SemanticChange {
140 path: path.clone(),
141 change_type: ChangeType::ElementRenamed,
142 old_value: Some(old.name.clone()),
143 new_value: Some(new.name.clone()),
144 is_critical: self.is_critical_field(&old.name),
145 description: format!("Element renamed from '{}' to '{}'", old.name, new.name),
146 });
147 return Ok(());
148 }
149
150 self.compare_attributes(&old.attributes, &new.attributes, &path, changeset);
152
153 self.compare_children(&old.children, &new.children, &path, changeset)?;
155
156 Ok(())
157 }
158
159 fn compare_attributes(
161 &self,
162 old: &IndexMap<String, String>,
163 new: &IndexMap<String, String>,
164 path: &DiffPath,
165 changeset: &mut ChangeSet,
166 ) {
167 let old_keys: IndexSet<_> = old.keys().collect();
169 let new_keys: IndexSet<_> = new.keys().collect();
170
171 for &key in old_keys.difference(&new_keys) {
173 if !self.should_ignore_field(key) {
174 changeset.add_change(SemanticChange {
175 path: path.with_attribute(key),
176 change_type: ChangeType::AttributeRemoved,
177 old_value: old.get(key).cloned(),
178 new_value: None,
179 is_critical: self.is_critical_field(key),
180 description: format!("Attribute '{}' removed", key),
181 });
182 }
183 }
184
185 for &key in new_keys.difference(&old_keys) {
187 if !self.should_ignore_field(key) {
188 changeset.add_change(SemanticChange {
189 path: path.with_attribute(key),
190 change_type: ChangeType::AttributeAdded,
191 old_value: None,
192 new_value: new.get(key).cloned(),
193 is_critical: self.is_critical_field(key),
194 description: format!("Attribute '{}' added", key),
195 });
196 }
197 }
198
199 for &key in old_keys.intersection(&new_keys) {
201 if !self.should_ignore_field(key) {
202 let old_val = &old[key];
203 let new_val = &new[key];
204
205 if !self.are_values_equivalent(old_val, new_val, key) {
206 changeset.add_change(SemanticChange {
207 path: path.with_attribute(key),
208 change_type: ChangeType::AttributeModified,
209 old_value: Some(old_val.clone()),
210 new_value: Some(new_val.clone()),
211 is_critical: self.is_critical_field(key),
212 description: format!(
213 "Attribute '{}' changed from '{}' to '{}'",
214 key, old_val, new_val
215 ),
216 });
217 }
218 }
219 }
220 }
221
222 fn compare_children(
224 &self,
225 old: &[Node],
226 new: &[Node],
227 path: &DiffPath,
228 changeset: &mut ChangeSet,
229 ) -> Result<(), BuildError> {
230 let old_elements: Vec<&Element> = old
232 .iter()
233 .filter_map(|n| {
234 if let Node::Element(e) = n {
235 Some(e)
236 } else {
237 None
238 }
239 })
240 .collect();
241 let new_elements: Vec<&Element> = new
242 .iter()
243 .filter_map(|n| {
244 if let Node::Element(e) = n {
245 Some(e)
246 } else {
247 None
248 }
249 })
250 .collect();
251
252 let old_text = self.extract_text_content(old);
254 let new_text = self.extract_text_content(new);
255
256 if old_text != new_text && (!old_text.trim().is_empty() || !new_text.trim().is_empty()) {
258 changeset.add_change(SemanticChange {
259 path: path.with_text(),
260 change_type: ChangeType::TextModified,
261 old_value: if old_text.trim().is_empty() {
262 None
263 } else {
264 Some(old_text)
265 },
266 new_value: if new_text.trim().is_empty() {
267 None
268 } else {
269 Some(new_text)
270 },
271 is_critical: false,
272 description: "Text content changed".to_string(),
273 });
274 }
275
276 let old_groups = self.group_elements_by_identity(&old_elements);
278 let new_groups = self.group_elements_by_identity(&new_elements);
279
280 self.compare_element_groups(&old_groups, &new_groups, path, changeset)?;
282
283 Ok(())
284 }
285
286 fn group_elements_by_identity<'a>(
288 &self,
289 elements: &[&'a Element],
290 ) -> IndexMap<String, Vec<&'a Element>> {
291 let mut groups = IndexMap::new();
292
293 for element in elements {
294 let identity = self.get_element_identity(element);
295 groups
296 .entry(identity)
297 .or_insert_with(Vec::new)
298 .push(*element);
299 }
300
301 groups
302 }
303
304 fn get_element_identity(&self, element: &Element) -> String {
306 let mut identity = element.name.clone();
308
309 let key_attrs = match element.name.as_str() {
311 "Release" => vec!["ReleaseId", "ReleaseReference"],
312 "SoundRecording" | "VideoRecording" => vec!["ResourceId", "ResourceReference"],
313 "Deal" => vec!["DealReference"],
314 "Party" => vec!["PartyId", "PartyReference"],
315 _ => vec!["Id", "Reference"], };
317
318 for attr in key_attrs {
319 if let Some(value) = element.attributes.get(attr) {
320 identity.push_str(&format!(":{}", value));
321 break; }
323 }
324
325 identity
326 }
327
328 fn compare_element_groups(
330 &self,
331 old_groups: &IndexMap<String, Vec<&Element>>,
332 new_groups: &IndexMap<String, Vec<&Element>>,
333 path: &DiffPath,
334 changeset: &mut ChangeSet,
335 ) -> Result<(), BuildError> {
336 let old_keys: IndexSet<_> = old_groups.keys().collect();
337 let new_keys: IndexSet<_> = new_groups.keys().collect();
338
339 for &key in old_keys.difference(&new_keys) {
341 for element in &old_groups[key] {
342 changeset.add_change(SemanticChange {
343 path: path.with_element(&element.name),
344 change_type: ChangeType::ElementRemoved,
345 old_value: Some(self.element_to_string(element)),
346 new_value: None,
347 is_critical: self.is_critical_field(&element.name),
348 description: format!("Element '{}' removed", element.name),
349 });
350 }
351 }
352
353 for &key in new_keys.difference(&old_keys) {
355 for element in &new_groups[key] {
356 changeset.add_change(SemanticChange {
357 path: path.with_element(&element.name),
358 change_type: ChangeType::ElementAdded,
359 old_value: None,
360 new_value: Some(self.element_to_string(element)),
361 is_critical: self.is_critical_field(&element.name),
362 description: format!("Element '{}' added", element.name),
363 });
364 }
365 }
366
367 for &key in old_keys.intersection(&new_keys) {
369 let old_elements = &old_groups[key];
370 let new_elements = &new_groups[key];
371
372 if let (Some(&old_elem), Some(&new_elem)) = (old_elements.first(), new_elements.first())
375 {
376 self.compare_elements(
377 old_elem,
378 new_elem,
379 path.with_element(&old_elem.name),
380 changeset,
381 )?;
382 }
383 }
384
385 Ok(())
386 }
387
388 fn extract_text_content(&self, nodes: &[Node]) -> String {
390 let mut text = String::new();
391 for node in nodes {
392 if let Node::Text(t) = node {
393 if self.config.ignore_formatting {
394 text.push_str(t.trim());
395 } else {
396 text.push_str(t);
397 }
398 }
399 }
400 text
401 }
402
403 fn are_values_equivalent(&self, old: &str, new: &str, field_name: &str) -> bool {
405 if self.config.ignore_reference_ids && self.is_reference_field(field_name) {
407 return self.are_references_equivalent(old, new);
408 }
409
410 if let Some(tolerance) = self.config.numeric_tolerance {
412 if field_name.contains("Price") || field_name.contains("Amount") {
413 if let (Ok(old_num), Ok(new_num)) = (old.parse::<f64>(), new.parse::<f64>()) {
414 return (old_num - new_num).abs() < tolerance;
415 }
416 }
417 }
418
419 if self.config.ignore_formatting {
421 return old.trim() == new.trim();
422 }
423
424 old == new
425 }
426
427 fn is_reference_field(&self, field_name: &str) -> bool {
429 field_name.ends_with("Reference")
430 || field_name.ends_with("Ref")
431 || field_name == "ResourceId"
432 || field_name == "ReleaseId"
433 || field_name == "DealId"
434 }
435
436 fn are_references_equivalent(&self, old_ref: &str, new_ref: &str) -> bool {
438 if old_ref == new_ref {
440 return true;
441 }
442
443 let old_key = format!("old:{}", old_ref);
445 let new_key = format!("new:{}", new_ref);
446
447 if let (Some(old_elem), Some(new_elem)) = (
448 self.reference_cache.get(&old_key),
449 self.reference_cache.get(&new_key),
450 ) {
451 self.elements_semantically_equal(old_elem, new_elem)
453 } else {
454 false
455 }
456 }
457
458 fn elements_semantically_equal(&self, old: &Element, new: &Element) -> bool {
460 old.name == new.name && self.text_content_equal(&old.children, &new.children)
463 }
464
465 fn text_content_equal(&self, old: &[Node], new: &[Node]) -> bool {
467 self.extract_text_content(old) == self.extract_text_content(new)
468 }
469
470 fn build_reference_cache(&mut self, element: &Element, prefix: &str) {
472 if let Some(ref_id) = self.get_reference_id(element) {
474 let cache_key = format!("{}:{}", prefix, ref_id);
475 self.reference_cache.insert(cache_key, element.clone());
476 }
477
478 for child in &element.children {
480 if let Node::Element(child_elem) = child {
481 self.build_reference_cache(child_elem, prefix);
482 }
483 }
484 }
485
486 fn get_reference_id(&self, element: &Element) -> Option<String> {
488 let ref_attrs = [
490 "ResourceReference",
491 "ReleaseReference",
492 "DealReference",
493 "PartyReference",
494 "Reference",
495 "ResourceId",
496 "ReleaseId",
497 ];
498
499 for attr in &ref_attrs {
500 if let Some(value) = element.attributes.get(*attr) {
501 return Some(value.clone());
502 }
503 }
504
505 None
506 }
507
508 fn should_ignore_field(&self, field_name: &str) -> bool {
510 self.config.ignored_fields.contains(field_name)
511 }
512
513 fn is_critical_field(&self, field_name: &str) -> bool {
515 self.config.critical_fields.contains(field_name)
516 }
517
518 fn element_to_string(&self, element: &Element) -> String {
520 format!("<{}>", element.name)
522 }
523
524 fn analyze_business_impact(&self, changeset: &mut ChangeSet) {
526 let critical_changes = changeset.changes.iter().filter(|c| c.is_critical).count();
528
529 changeset
530 .metadata
531 .insert("critical_changes".to_string(), critical_changes.to_string());
532
533 let impact = if critical_changes > 0 {
535 "HIGH"
536 } else if changeset.changes.len() > 10 {
537 "MEDIUM"
538 } else {
539 "LOW"
540 };
541
542 changeset
543 .metadata
544 .insert("impact_level".to_string(), impact.to_string());
545 }
546}
547
548impl Default for DiffEngine {
549 fn default() -> Self {
550 Self::new()
551 }
552}
553
554#[cfg(test)]
555mod tests {
556 use super::*;
557 use crate::ast::Element;
558
559 fn create_test_element(name: &str, text: &str) -> Element {
560 Element::new(name).with_text(text)
561 }
562
563 #[test]
564 fn test_basic_diff() {
565 let mut engine = DiffEngine::new();
566
567 let old_ast = AST {
568 root: create_test_element("Root", "old content"),
569 namespaces: IndexMap::new(),
570 schema_location: None,
571 };
572
573 let new_ast = AST {
574 root: create_test_element("Root", "new content"),
575 namespaces: IndexMap::new(),
576 schema_location: None,
577 };
578
579 let changeset = engine.diff(&old_ast, &new_ast).unwrap();
580 assert!(!changeset.changes.is_empty());
581 }
582
583 #[test]
584 fn test_ignore_formatting() {
585 let mut engine = DiffEngine::new();
586
587 let old_ast = AST {
588 root: create_test_element("Root", " content "),
589 namespaces: IndexMap::new(),
590 schema_location: None,
591 };
592
593 let new_ast = AST {
594 root: create_test_element("Root", "content"),
595 namespaces: IndexMap::new(),
596 schema_location: None,
597 };
598
599 let changeset = engine.diff(&old_ast, &new_ast).unwrap();
600 let text_changes: Vec<_> = changeset
602 .changes
603 .iter()
604 .filter(|c| matches!(c.change_type, ChangeType::TextModified))
605 .collect();
606 assert!(text_changes.is_empty());
607 }
608}