Skip to main content

clayers_xml/
diff.rs

1//! Pure XML diff engine.
2//!
3//! Compares two XML strings and produces a list of changes with XPath-like
4//! location paths. No store dependency – works entirely on parsed XML trees.
5
6use std::fmt;
7
8use xot::Xot;
9
10use crate::error::Error;
11
12/// An XPath-like path to a node in an XML document.
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct XmlPath {
15    /// Path segments, e.g. `["root", "section[2]", "title"]`.
16    pub segments: Vec<String>,
17}
18
19#[cfg(feature = "serde")]
20impl serde::Serialize for XmlPath {
21    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
22        serializer.serialize_str(&self.to_string())
23    }
24}
25
26impl XmlPath {
27    fn child(&self, segment: &str) -> Self {
28        let mut p = self.clone();
29        p.segments.push(segment.to_string());
30        p
31    }
32}
33
34impl fmt::Display for XmlPath {
35    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36        if self.segments.is_empty() {
37            write!(f, "/")
38        } else {
39            for seg in &self.segments {
40                write!(f, "/{seg}")?;
41            }
42            Ok(())
43        }
44    }
45}
46
47/// A single change between two XML documents.
48#[derive(Debug)]
49#[cfg_attr(feature = "serde", derive(serde::Serialize))]
50#[cfg_attr(feature = "serde", serde(tag = "type", rename_all = "snake_case"))]
51pub enum XmlChange {
52    /// An element was added.
53    ElementAdded {
54        /// XPath-like location.
55        path: XmlPath,
56        /// Serialized XML content of the added element.
57        content: String,
58    },
59    /// An element was removed.
60    ElementRemoved {
61        /// XPath-like location.
62        path: XmlPath,
63        /// Serialized XML content of the removed element.
64        content: String,
65    },
66    /// An attribute value changed (added, removed, or modified).
67    AttributeChanged {
68        /// XPath-like location of the owning element.
69        path: XmlPath,
70        /// Attribute name.
71        name: String,
72        /// Old value (`None` if attribute was added).
73        old: Option<String>,
74        /// New value (`None` if attribute was removed).
75        new: Option<String>,
76    },
77    /// Text content changed.
78    TextChanged {
79        /// XPath-like location of the parent element.
80        path: XmlPath,
81        /// Old text.
82        old: String,
83        /// New text.
84        new: String,
85    },
86    /// Comment content changed.
87    CommentChanged {
88        /// XPath-like location of the parent element.
89        path: XmlPath,
90        /// Old comment text.
91        old: String,
92        /// New comment text.
93        new: String,
94    },
95}
96
97impl fmt::Display for XmlChange {
98    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
99        match self {
100            Self::ElementAdded { path, .. } => write!(f, "  + {path}"),
101            Self::ElementRemoved { path, .. } => write!(f, "  - {path}"),
102            Self::AttributeChanged {
103                path,
104                name,
105                old,
106                new,
107            } => {
108                write!(f, "  ~ {path}/@{name}")?;
109                match (old, new) {
110                    (Some(o), Some(n)) => write!(f, ": \"{o}\" -> \"{n}\""),
111                    (None, Some(n)) => write!(f, ": (added) \"{n}\""),
112                    (Some(o), None) => write!(f, ": \"{o}\" (removed)"),
113                    (None, None) => Ok(()),
114                }
115            }
116            Self::TextChanged { path, old, new } => {
117                writeln!(f, "  ~ {path}")?;
118                write!(f, "    text: \"{old}\" -> \"{new}\"")
119            }
120            Self::CommentChanged { path, old, new } => {
121                writeln!(f, "  ~ {path}")?;
122                write!(f, "    comment: \"{old}\" -> \"{new}\"")
123            }
124        }
125    }
126}
127
128/// The result of diffing two XML documents.
129#[derive(Debug)]
130#[cfg_attr(feature = "serde", derive(serde::Serialize))]
131pub struct XmlDiff {
132    /// The individual changes.
133    pub changes: Vec<XmlChange>,
134}
135
136impl XmlDiff {
137    /// True if there are no changes.
138    #[must_use]
139    pub fn is_empty(&self) -> bool {
140        self.changes.is_empty()
141    }
142}
143
144impl fmt::Display for XmlDiff {
145    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
146        for change in &self.changes {
147            writeln!(f, "{change}")?;
148        }
149        Ok(())
150    }
151}
152
153/// Context passed through the recursive diff to avoid repeated lookups.
154struct DiffCtx {
155    id_name: xot::NameId,
156}
157
158/// Diff two XML strings and produce a list of changes with XPath-like paths.
159///
160/// Both strings must be well-formed XML documents. The diff is position-based:
161/// children at the same position are compared pairwise.
162///
163/// # Errors
164///
165/// Returns an error if either XML string cannot be parsed.
166pub fn diff_xml(old: &str, new: &str) -> Result<XmlDiff, Error> {
167    let mut xot = Xot::new();
168    let id_name = xot.add_name("id");
169
170    let doc_old = xot
171        .parse(old)
172        .map_err(|e| Error::XmlParse(e.to_string()))?;
173    let doc_new = xot
174        .parse(new)
175        .map_err(|e| Error::XmlParse(e.to_string()))?;
176
177    let root_old = xot
178        .document_element(doc_old)
179        .map_err(|e| Error::XmlParse(e.to_string()))?;
180    let root_new = xot
181        .document_element(doc_new)
182        .map_err(|e| Error::XmlParse(e.to_string()))?;
183
184    let ctx = DiffCtx { id_name };
185    let mut changes = Vec::new();
186
187    let old_name = xot.element(root_old).map(xot::Element::name);
188    let new_name = xot.element(root_new).map(xot::Element::name);
189
190    let seg = element_segment(&xot, root_new, &ctx);
191    let path = XmlPath {
192        segments: vec![seg],
193    };
194
195    if old_name == new_name {
196        diff_elements(&xot, root_old, root_new, &path, &ctx, &mut changes);
197    } else {
198        changes.push(XmlChange::ElementRemoved {
199            path: path.clone(),
200            content: xot.to_string(root_old).unwrap_or_default(),
201        });
202        changes.push(XmlChange::ElementAdded {
203            path,
204            content: xot.to_string(root_new).unwrap_or_default(),
205        });
206    }
207
208    Ok(XmlDiff { changes })
209}
210
211/// Get the path segment for an element (local name, optionally with `@id`).
212fn element_segment(xot: &Xot, node: xot::Node, ctx: &DiffCtx) -> String {
213    let Some(el) = xot.element(node) else {
214        return "?".to_string();
215    };
216    let (local, _) = xot.name_ns_str(el.name());
217
218    if let Some(id_val) = xot.get_attribute(node, ctx.id_name) {
219        format!("{local}[@id=\"{id_val}\"]")
220    } else {
221        local.to_string()
222    }
223}
224
225/// Build a path segment for a child element, with `@id` or positional `[N]`.
226fn child_element_segment(
227    xot: &Xot,
228    parent: xot::Node,
229    child: xot::Node,
230    ctx: &DiffCtx,
231) -> String {
232    let Some(child_el) = xot.element(child) else {
233        return "text()".to_string();
234    };
235    let child_name = child_el.name();
236    let (local, _) = xot.name_ns_str(child_name);
237
238    // Prefer @id if present.
239    if let Some(id_val) = xot.get_attribute(child, ctx.id_name) {
240        return format!("{local}[@id=\"{id_val}\"]");
241    }
242
243    // Count same-name siblings and find position.
244    let mut count = 0usize;
245    let mut position = 0usize;
246    for sib in xot.children(parent) {
247        if let Some(sib_el) = xot.element(sib)
248            && sib_el.name() == child_name
249        {
250            count += 1;
251            if sib == child {
252                position = count;
253            }
254        }
255    }
256
257    if count > 1 {
258        format!("{local}[{position}]")
259    } else {
260        local.to_string()
261    }
262}
263
264/// Identity key for matching children across old/new trees.
265///
266/// Elements with `@id` are keyed by `(name, id)`. Elements without `@id`
267/// are keyed by `(name, positional_index_among_same_name_no_id_siblings)`.
268/// This prevents a single insertion or deletion from cascading mismatches
269/// through all subsequent siblings.
270#[derive(Debug, Clone, PartialEq, Eq, Hash)]
271enum ChildKey {
272    /// Element with `@id` attribute.
273    ElementById(xot::NameId, String),
274    /// Element without `@id`, keyed by name + occurrence index.
275    ElementByPos(xot::NameId, usize),
276    /// Text node keyed by occurrence index among text siblings.
277    Text(usize),
278    /// Comment node keyed by occurrence index among comment siblings.
279    Comment(usize),
280    /// Other node types (PIs, etc.) keyed by raw position.
281    Other(usize),
282}
283
284/// Assign a `ChildKey` to each child node.
285fn key_children(xot: &Xot, parent: xot::Node, ctx: &DiffCtx) -> Vec<(ChildKey, xot::Node)> {
286    let mut result = Vec::new();
287    // Counters per element-name for positional disambiguation.
288    let mut name_counts: std::collections::HashMap<xot::NameId, usize> =
289        std::collections::HashMap::new();
290    let mut text_idx = 0usize;
291    let mut comment_idx = 0usize;
292    let mut other_idx = 0usize;
293
294    for child in xot.children(parent) {
295        if let Some(el) = xot.element(child) {
296            let name = el.name();
297            if let Some(id_val) = xot.get_attribute(child, ctx.id_name) {
298                result.push((ChildKey::ElementById(name, id_val.to_string()), child));
299            } else {
300                let idx = name_counts.entry(name).or_insert(0);
301                result.push((ChildKey::ElementByPos(name, *idx), child));
302                *idx += 1;
303            }
304        } else if xot.text_str(child).is_some() {
305            result.push((ChildKey::Text(text_idx), child));
306            text_idx += 1;
307        } else if xot.comment_str(child).is_some() {
308            result.push((ChildKey::Comment(comment_idx), child));
309            comment_idx += 1;
310        } else {
311            result.push((ChildKey::Other(other_idx), child));
312            other_idx += 1;
313        }
314    }
315    result
316}
317
318/// Compare two elements that are known to have the same expanded name.
319fn diff_elements(
320    xot: &Xot,
321    old: xot::Node,
322    new: xot::Node,
323    path: &XmlPath,
324    ctx: &DiffCtx,
325    changes: &mut Vec<XmlChange>,
326) {
327    diff_attributes(xot, old, new, path, changes);
328
329    let old_keyed = key_children(xot, old, ctx);
330    let new_keyed = key_children(xot, new, ctx);
331
332    // Build lookup from key → node for old children.
333    let old_map: std::collections::HashMap<&ChildKey, xot::Node> =
334        old_keyed.iter().map(|(k, n)| (k, *n)).collect();
335    // Track which old keys were matched.
336    let mut matched_old: std::collections::HashSet<&ChildKey> =
337        std::collections::HashSet::new();
338
339    // Walk new children: match or report added.
340    for (new_key, new_child) in &new_keyed {
341        if let Some(&old_child) = old_map.get(new_key) {
342            matched_old.insert(new_key);
343            diff_matched_pair(xot, old, old_child, new, *new_child, path, ctx, changes);
344        } else {
345            // Added.
346            if xot.element(*new_child).is_some() {
347                let seg = child_element_segment(xot, new, *new_child, ctx);
348                changes.push(XmlChange::ElementAdded {
349                    path: path.child(&seg),
350                    content: xot.to_string(*new_child).unwrap_or_default(),
351                });
352            } else if let Some(text) = xot.text_str(*new_child)
353                && !text.trim().is_empty()
354            {
355                changes.push(XmlChange::TextChanged {
356                    path: path.clone(),
357                    old: String::new(),
358                    new: text.to_string(),
359                });
360            }
361        }
362    }
363
364    // Walk old children: report unmatched as removed.
365    for (old_key, old_child) in &old_keyed {
366        if !matched_old.contains(old_key) {
367            if xot.element(*old_child).is_some() {
368                let seg = child_element_segment(xot, old, *old_child, ctx);
369                changes.push(XmlChange::ElementRemoved {
370                    path: path.child(&seg),
371                    content: xot.to_string(*old_child).unwrap_or_default(),
372                });
373            } else if let Some(text) = xot.text_str(*old_child)
374                && !text.trim().is_empty()
375            {
376                changes.push(XmlChange::TextChanged {
377                    path: path.clone(),
378                    old: text.to_string(),
379                    new: String::new(),
380                });
381            }
382        }
383    }
384}
385
386/// Compare two matched children (same key).
387#[allow(clippy::too_many_arguments)]
388fn diff_matched_pair(
389    xot: &Xot,
390    old_parent: xot::Node,
391    old_child: xot::Node,
392    new_parent: xot::Node,
393    new_child: xot::Node,
394    path: &XmlPath,
395    ctx: &DiffCtx,
396    changes: &mut Vec<XmlChange>,
397) {
398    let old_is_el = xot.element(old_child).is_some();
399    let new_is_el = xot.element(new_child).is_some();
400
401    match (old_is_el, new_is_el) {
402        (true, true) => {
403            let old_name = xot.element(old_child).unwrap().name();
404            let new_name = xot.element(new_child).unwrap().name();
405
406            if old_name == new_name {
407                let seg = child_element_segment(xot, new_parent, new_child, ctx);
408                let child_path = path.child(&seg);
409                diff_elements(xot, old_child, new_child, &child_path, ctx, changes);
410            } else {
411                let old_seg = child_element_segment(xot, old_parent, old_child, ctx);
412                changes.push(XmlChange::ElementRemoved {
413                    path: path.child(&old_seg),
414                    content: xot.to_string(old_child).unwrap_or_default(),
415                });
416                let new_seg = child_element_segment(xot, new_parent, new_child, ctx);
417                changes.push(XmlChange::ElementAdded {
418                    path: path.child(&new_seg),
419                    content: xot.to_string(new_child).unwrap_or_default(),
420                });
421            }
422        }
423        (false, false) => {
424            if let (Some(ot), Some(nt)) = (xot.text_str(old_child), xot.text_str(new_child)) {
425                if ot != nt {
426                    changes.push(XmlChange::TextChanged {
427                        path: path.clone(),
428                        old: ot.to_string(),
429                        new: nt.to_string(),
430                    });
431                }
432            } else if let (Some(oc), Some(nc)) =
433                (xot.comment_str(old_child), xot.comment_str(new_child))
434                && oc != nc
435            {
436                changes.push(XmlChange::CommentChanged {
437                    path: path.clone(),
438                    old: oc.to_string(),
439                    new: nc.to_string(),
440                });
441            }
442        }
443        _ => {
444            // Key matched but types differ (shouldn't happen with proper keying,
445            // but handle gracefully).
446            if old_is_el {
447                let seg = child_element_segment(xot, old_parent, old_child, ctx);
448                changes.push(XmlChange::ElementRemoved {
449                    path: path.child(&seg),
450                    content: xot.to_string(old_child).unwrap_or_default(),
451                });
452            }
453            if new_is_el {
454                let seg = child_element_segment(xot, new_parent, new_child, ctx);
455                changes.push(XmlChange::ElementAdded {
456                    path: path.child(&seg),
457                    content: xot.to_string(new_child).unwrap_or_default(),
458                });
459            }
460        }
461    }
462}
463
464/// Compare attributes between two elements.
465fn diff_attributes(
466    xot: &Xot,
467    old: xot::Node,
468    new: xot::Node,
469    path: &XmlPath,
470    changes: &mut Vec<XmlChange>,
471) {
472    let old_attrs: Vec<(xot::NameId, String)> = xot
473        .attributes(old)
474        .iter()
475        .map(|(name_id, value)| (name_id, value.clone()))
476        .collect();
477    let new_attrs: Vec<(xot::NameId, String)> = xot
478        .attributes(new)
479        .iter()
480        .map(|(name_id, value)| (name_id, value.clone()))
481        .collect();
482
483    // Removed or changed attributes.
484    for (old_name_id, old_value) in &old_attrs {
485        let matching = new_attrs.iter().find(|(n, _)| n == old_name_id);
486        let (local, _) = xot.name_ns_str(*old_name_id);
487        match matching {
488            Some((_, new_value)) if new_value != old_value => {
489                changes.push(XmlChange::AttributeChanged {
490                    path: path.clone(),
491                    name: local.to_string(),
492                    old: Some(old_value.clone()),
493                    new: Some(new_value.clone()),
494                });
495            }
496            None => {
497                changes.push(XmlChange::AttributeChanged {
498                    path: path.clone(),
499                    name: local.to_string(),
500                    old: Some(old_value.clone()),
501                    new: None,
502                });
503            }
504            _ => {}
505        }
506    }
507
508    // Added attributes.
509    for (new_name_id, new_value) in &new_attrs {
510        if !old_attrs.iter().any(|(n, _)| n == new_name_id) {
511            let (local, _) = xot.name_ns_str(*new_name_id);
512            changes.push(XmlChange::AttributeChanged {
513                path: path.clone(),
514                name: local.to_string(),
515                old: None,
516                new: Some(new_value.clone()),
517            });
518        }
519    }
520}
521
522#[cfg(test)]
523mod tests {
524    use super::*;
525
526    /// Count changes of a specific variant.
527    fn count<F>(diff: &XmlDiff, pred: F) -> usize
528    where
529        F: Fn(&XmlChange) -> bool,
530    {
531        diff.changes.iter().filter(|c| pred(c)).count()
532    }
533
534    // -----------------------------------------------------------------
535    // Identity / no-op
536    // -----------------------------------------------------------------
537
538    #[test]
539    fn identical_xml_no_changes() {
540        let xml = "<root><item>hello</item></root>";
541        let diff = diff_xml(xml, xml).unwrap();
542        assert!(diff.is_empty(), "identical XML should produce no changes");
543        assert_eq!(diff.changes.len(), 0);
544    }
545
546    // -----------------------------------------------------------------
547    // Text changes
548    // -----------------------------------------------------------------
549
550    #[test]
551    fn text_content_change_exact() {
552        let old = "<root><item>hello</item></root>";
553        let new = "<root><item>world</item></root>";
554        let diff = diff_xml(old, new).unwrap();
555        assert_eq!(diff.changes.len(), 1, "exactly one change expected");
556        assert!(matches!(
557            &diff.changes[0],
558            XmlChange::TextChanged { old, new, .. }
559                if old == "hello" && new == "world"
560        ));
561    }
562
563    #[test]
564    fn whitespace_text_change() {
565        let old = "<root><item> x </item></root>";
566        let new = "<root><item>x</item></root>";
567        let diff = diff_xml(old, new).unwrap();
568        // Whitespace difference in text nodes is a real change.
569        assert_eq!(
570            count(&diff, |c| matches!(c, XmlChange::TextChanged { .. })),
571            1,
572            "whitespace-significant text difference should be reported"
573        );
574    }
575
576    // -----------------------------------------------------------------
577    // Attribute changes
578    // -----------------------------------------------------------------
579
580    #[test]
581    fn attribute_value_change_exact() {
582        let old = r#"<root><item id="1" class="old">x</item></root>"#;
583        let new = r#"<root><item id="1" class="new">x</item></root>"#;
584        let diff = diff_xml(old, new).unwrap();
585        let attr_changes = count(&diff, |c| matches!(c, XmlChange::AttributeChanged { .. }));
586        assert_eq!(attr_changes, 1, "only the 'class' attr changed");
587        assert!(matches!(
588            &diff.changes[0],
589            XmlChange::AttributeChanged { name, old: Some(o), new: Some(n), .. }
590                if name == "class" && o == "old" && n == "new"
591        ));
592    }
593
594    #[test]
595    fn attribute_added_exact() {
596        let old = "<root><item>x</item></root>";
597        let new = r#"<root><item color="red">x</item></root>"#;
598        let diff = diff_xml(old, new).unwrap();
599        assert_eq!(
600            count(&diff, |c| matches!(c, XmlChange::AttributeChanged { .. })),
601            1
602        );
603        assert!(matches!(
604            &diff.changes[0],
605            XmlChange::AttributeChanged { name, old: None, new: Some(n), .. }
606                if name == "color" && n == "red"
607        ));
608    }
609
610    #[test]
611    fn attribute_removed_exact() {
612        let old = r#"<root><item color="red">x</item></root>"#;
613        let new = "<root><item>x</item></root>";
614        let diff = diff_xml(old, new).unwrap();
615        assert_eq!(
616            count(&diff, |c| matches!(c, XmlChange::AttributeChanged { .. })),
617            1
618        );
619        assert!(matches!(
620            &diff.changes[0],
621            XmlChange::AttributeChanged { name, old: Some(o), new: None, .. }
622                if name == "color" && o == "red"
623        ));
624    }
625
626    // -----------------------------------------------------------------
627    // Element add / remove
628    // -----------------------------------------------------------------
629
630    #[test]
631    fn element_added_exact() {
632        let old = "<root><a>one</a></root>";
633        let new = "<root><a>one</a><b>two</b></root>";
634        let diff = diff_xml(old, new).unwrap();
635        let added = count(&diff, |c| matches!(c, XmlChange::ElementAdded { .. }));
636        assert_eq!(added, 1, "exactly one element added");
637        assert!(matches!(
638            &diff.changes[0],
639            XmlChange::ElementAdded { path, .. }
640                if path.to_string() == "/root/b"
641        ));
642    }
643
644    #[test]
645    fn element_removed_exact() {
646        let old = "<root><a>one</a><b>two</b></root>";
647        let new = "<root><a>one</a></root>";
648        let diff = diff_xml(old, new).unwrap();
649        let removed = count(&diff, |c| matches!(c, XmlChange::ElementRemoved { .. }));
650        assert_eq!(removed, 1, "exactly one element removed");
651        assert!(matches!(
652            &diff.changes[0],
653            XmlChange::ElementRemoved { path, .. }
654                if path.to_string() == "/root/b"
655        ));
656    }
657
658    // -----------------------------------------------------------------
659    // Nested changes
660    // -----------------------------------------------------------------
661
662    #[test]
663    fn nested_change_exact_path() {
664        let old = "<root><section><title>Old</title></section></root>";
665        let new = "<root><section><title>New</title></section></root>";
666        let diff = diff_xml(old, new).unwrap();
667        assert_eq!(diff.changes.len(), 1);
668        assert!(matches!(
669            &diff.changes[0],
670            XmlChange::TextChanged { path, old, new }
671                if path.to_string() == "/root/section/title"
672                    && old == "Old" && new == "New"
673        ));
674    }
675
676    // -----------------------------------------------------------------
677    // @id path enrichment
678    // -----------------------------------------------------------------
679
680    #[test]
681    fn path_includes_id_attribute() {
682        let old = r#"<root><item id="x">old</item></root>"#;
683        let new = r#"<root><item id="x">new</item></root>"#;
684        let diff = diff_xml(old, new).unwrap();
685        assert_eq!(diff.changes.len(), 1);
686        if let XmlChange::TextChanged { path, .. } = &diff.changes[0] {
687            assert_eq!(
688                path.to_string(),
689                r#"/root/item[@id="x"]"#,
690                "path should use @id predicate"
691            );
692        } else {
693            panic!("expected TextChanged, got {:?}", diff.changes[0]);
694        }
695    }
696
697    // -----------------------------------------------------------------
698    // Same-name sibling disambiguation ([N])
699    // -----------------------------------------------------------------
700
701    #[test]
702    fn same_name_siblings_use_positional_index() {
703        let old = "<root><item>a</item><item>b</item><item>c</item></root>";
704        let new = "<root><item>a</item><item>CHANGED</item><item>c</item></root>";
705        let diff = diff_xml(old, new).unwrap();
706        assert_eq!(diff.changes.len(), 1, "only second item changed");
707        if let XmlChange::TextChanged { path, old, new } = &diff.changes[0] {
708            assert_eq!(path.to_string(), "/root/item[2]");
709            assert_eq!(old, "b");
710            assert_eq!(new, "CHANGED");
711        } else {
712            panic!("expected TextChanged, got {:?}", diff.changes[0]);
713        }
714    }
715
716    #[test]
717    fn same_name_siblings_id_preferred_over_position() {
718        let old = r#"<root><item id="a">1</item><item id="b">2</item></root>"#;
719        let new = r#"<root><item id="a">1</item><item id="b">CHANGED</item></root>"#;
720        let diff = diff_xml(old, new).unwrap();
721        assert_eq!(diff.changes.len(), 1);
722        if let XmlChange::TextChanged { path, .. } = &diff.changes[0] {
723            assert!(
724                path.to_string().contains(r#"@id="b""#),
725                "should use @id, not [2]: {path}"
726            );
727        } else {
728            panic!("expected TextChanged");
729        }
730    }
731
732    // -----------------------------------------------------------------
733    // Comment changes
734    // -----------------------------------------------------------------
735
736    #[test]
737    fn comment_change_detected() {
738        let old = "<root><!-- old comment --></root>";
739        let new = "<root><!-- new comment --></root>";
740        let diff = diff_xml(old, new).unwrap();
741        assert_eq!(
742            count(&diff, |c| matches!(c, XmlChange::CommentChanged { .. })),
743            1
744        );
745        assert!(matches!(
746            &diff.changes[0],
747            XmlChange::CommentChanged { old, new, .. }
748                if old.contains("old") && new.contains("new")
749        ));
750    }
751
752    #[test]
753    fn identical_comments_no_change() {
754        let xml = "<root><!-- same --></root>";
755        let diff = diff_xml(xml, xml).unwrap();
756        assert!(diff.is_empty());
757    }
758
759    // -----------------------------------------------------------------
760    // Namespace handling
761    // -----------------------------------------------------------------
762
763    #[test]
764    fn namespace_aware_same_uri_no_change() {
765        // Same namespace URI, same local name → no change even if prefix differs
766        // is NOT tested here because xot normalizes by namespace URI.
767        let old = r#"<ns:root xmlns:ns="urn:test"><ns:item>x</ns:item></ns:root>"#;
768        let new = r#"<ns:root xmlns:ns="urn:test"><ns:item>x</ns:item></ns:root>"#;
769        let diff = diff_xml(old, new).unwrap();
770        assert!(diff.is_empty(), "identical namespaced XML → no changes");
771    }
772
773    #[test]
774    fn namespace_text_change() {
775        let old = r#"<ns:root xmlns:ns="urn:test"><ns:item>old</ns:item></ns:root>"#;
776        let new = r#"<ns:root xmlns:ns="urn:test"><ns:item>new</ns:item></ns:root>"#;
777        let diff = diff_xml(old, new).unwrap();
778        assert_eq!(diff.changes.len(), 1);
779        assert!(matches!(
780            &diff.changes[0],
781            XmlChange::TextChanged { old, new, .. }
782                if old == "old" && new == "new"
783        ));
784    }
785
786    #[test]
787    fn different_namespace_is_different_element() {
788        let old = r#"<root xmlns:a="urn:a"><a:item>x</a:item></root>"#;
789        let new = r#"<root xmlns:b="urn:b"><b:item>x</b:item></root>"#;
790        let diff = diff_xml(old, new).unwrap();
791        // Different namespace URI → different element → removed + added
792        let removed = count(&diff, |c| matches!(c, XmlChange::ElementRemoved { .. }));
793        let added = count(&diff, |c| matches!(c, XmlChange::ElementAdded { .. }));
794        assert!(removed >= 1, "old namespaced element should be removed");
795        assert!(added >= 1, "new namespaced element should be added");
796    }
797
798    // -----------------------------------------------------------------
799    // Different root elements
800    // -----------------------------------------------------------------
801
802    #[test]
803    fn different_root_elements() {
804        let old = "<alpha>content</alpha>";
805        let new = "<beta>content</beta>";
806        let diff = diff_xml(old, new).unwrap();
807        let removed = count(&diff, |c| matches!(c, XmlChange::ElementRemoved { .. }));
808        let added = count(&diff, |c| matches!(c, XmlChange::ElementAdded { .. }));
809        assert_eq!(removed, 1, "old root should be removed");
810        assert_eq!(added, 1, "new root should be added");
811    }
812
813    // -----------------------------------------------------------------
814    // Mixed content
815    // -----------------------------------------------------------------
816
817    #[test]
818    fn mixed_content_text_change() {
819        let old = "<p>Hello <b>world</b> end</p>";
820        let new = "<p>Goodbye <b>world</b> end</p>";
821        let diff = diff_xml(old, new).unwrap();
822        assert_eq!(diff.changes.len(), 1);
823        assert!(matches!(
824            &diff.changes[0],
825            XmlChange::TextChanged { old, new, .. }
826                if old == "Hello " && new == "Goodbye "
827        ));
828    }
829
830    #[test]
831    fn mixed_content_element_and_text() {
832        let old = "<p>text <em>a</em> more</p>";
833        let new = "<p>text <em>a</em> more <strong>new</strong></p>";
834        let diff = diff_xml(old, new).unwrap();
835        let added = count(&diff, |c| matches!(c, XmlChange::ElementAdded { .. }));
836        assert!(added >= 1, "added <strong> element should be detected");
837    }
838
839    // -----------------------------------------------------------------
840    // Empty elements
841    // -----------------------------------------------------------------
842
843    #[test]
844    fn empty_to_content() {
845        let old = "<root><item/></root>";
846        let new = "<root><item>text</item></root>";
847        let diff = diff_xml(old, new).unwrap();
848        // <item/> has no children; <item>text</item> has one text child → added text
849        let text_changes = count(&diff, |c| matches!(c, XmlChange::TextChanged { .. }));
850        assert!(text_changes >= 1, "should detect text added to empty element");
851    }
852
853    #[test]
854    fn content_to_empty() {
855        let old = "<root><item>text</item></root>";
856        let new = "<root><item/></root>";
857        let diff = diff_xml(old, new).unwrap();
858        let text_changes = count(&diff, |c| matches!(c, XmlChange::TextChanged { .. }));
859        assert!(text_changes >= 1, "should detect text removed from element");
860    }
861
862    // -----------------------------------------------------------------
863    // Display formatting
864    // -----------------------------------------------------------------
865
866    #[test]
867    fn display_format() {
868        let old = "<root><item>old</item></root>";
869        let new = "<root><item>new</item></root>";
870        let diff = diff_xml(old, new).unwrap();
871        let formatted = diff.to_string();
872        assert!(formatted.contains('~'), "display should use ~ for changes");
873        assert!(
874            formatted.contains("text:"),
875            "display should show text changes"
876        );
877    }
878
879    #[test]
880    fn display_element_added_uses_plus() {
881        let old = "<root/>";
882        let new = "<root><child>x</child></root>";
883        let diff = diff_xml(old, new).unwrap();
884        let formatted = diff.to_string();
885        assert!(formatted.contains("+ /"), "added element display uses +");
886    }
887
888    #[test]
889    fn display_element_removed_uses_minus() {
890        let old = "<root><child>x</child></root>";
891        let new = "<root/>";
892        let diff = diff_xml(old, new).unwrap();
893        let formatted = diff.to_string();
894        assert!(formatted.contains("- /"), "removed element display uses -");
895    }
896
897    // -----------------------------------------------------------------
898    // Edge cases
899    // -----------------------------------------------------------------
900
901    #[test]
902    fn multiple_changes_in_one_diff() {
903        let old = r#"<root><a>1</a><b x="old">2</b><c>3</c></root>"#;
904        let new = r#"<root><a>CHANGED</a><b x="new">2</b><d>4</d></root>"#;
905        let diff = diff_xml(old, new).unwrap();
906        // a: text changed
907        // b: attr changed
908        // c removed, d added (position-based: c→d is a replacement)
909        assert!(
910            diff.changes.len() >= 3,
911            "should detect text, attr, and element changes: got {}",
912            diff.changes.len()
913        );
914        assert!(
915            count(&diff, |c| matches!(c, XmlChange::TextChanged { .. })) >= 1,
916            "text change in <a>"
917        );
918        assert!(
919            count(&diff, |c| matches!(c, XmlChange::AttributeChanged { .. })) >= 1,
920            "attr change in <b>"
921        );
922    }
923
924    #[test]
925    fn deeply_nested_change() {
926        let old = "<a><b><c><d><e>old</e></d></c></b></a>";
927        let new = "<a><b><c><d><e>new</e></d></c></b></a>";
928        let diff = diff_xml(old, new).unwrap();
929        assert_eq!(diff.changes.len(), 1);
930        if let XmlChange::TextChanged { path, .. } = &diff.changes[0] {
931            assert_eq!(path.to_string(), "/a/b/c/d/e");
932        } else {
933            panic!("expected TextChanged");
934        }
935    }
936
937    #[test]
938    fn parse_error_returns_err() {
939        let result = diff_xml("<valid/>", "not xml at all");
940        assert!(result.is_err(), "malformed XML should return Err");
941    }
942
943    // -----------------------------------------------------------------
944    // Key-based matching (no cascade on insertion/deletion)
945    // -----------------------------------------------------------------
946
947    #[test]
948    fn remove_middle_element_no_cascade() {
949        // Removing B from [A, B, C, D] should report only B removed,
950        // not a cascade of mismatches for C and D.
951        let old = "<root><a>1</a><b>2</b><c>3</c><d>4</d></root>";
952        let new = "<root><a>1</a><c>3</c><d>4</d></root>";
953        let diff = diff_xml(old, new).unwrap();
954        assert_eq!(
955            diff.changes.len(),
956            1,
957            "only one removal, no cascade: {:#?}",
958            diff.changes
959        );
960        assert!(matches!(
961            &diff.changes[0],
962            XmlChange::ElementRemoved { path, .. }
963                if path.to_string() == "/root/b"
964        ));
965    }
966
967    #[test]
968    fn insert_middle_element_no_cascade() {
969        let old = "<root><a>1</a><c>3</c><d>4</d></root>";
970        let new = "<root><a>1</a><b>2</b><c>3</c><d>4</d></root>";
971        let diff = diff_xml(old, new).unwrap();
972        assert_eq!(
973            diff.changes.len(),
974            1,
975            "only one addition, no cascade: {:#?}",
976            diff.changes
977        );
978        assert!(matches!(
979            &diff.changes[0],
980            XmlChange::ElementAdded { path, .. }
981                if path.to_string() == "/root/b"
982        ));
983    }
984
985    #[test]
986    fn remove_id_element_no_cascade() {
987        // Same test but with @id attributes - the primary matching key.
988        let old = r#"<root><item id="a">1</item><item id="b">2</item><item id="c">3</item></root>"#;
989        let new = r#"<root><item id="a">1</item><item id="c">3</item></root>"#;
990        let diff = diff_xml(old, new).unwrap();
991        assert_eq!(
992            diff.changes.len(),
993            1,
994            "only id=b removed, no cascade: {:#?}",
995            diff.changes
996        );
997        assert!(matches!(
998            &diff.changes[0],
999            XmlChange::ElementRemoved { path, .. }
1000                if path.to_string().contains(r#"@id="b""#)
1001        ));
1002    }
1003
1004    #[test]
1005    fn remove_and_modify_no_false_positives() {
1006        // Remove B, modify C's text. Should get exactly 2 changes.
1007        let old = "<root><a>1</a><b>2</b><c>old</c></root>";
1008        let new = "<root><a>1</a><c>new</c></root>";
1009        let diff = diff_xml(old, new).unwrap();
1010        let removed = count(&diff, |c| matches!(c, XmlChange::ElementRemoved { .. }));
1011        let text_changed = count(&diff, |c| matches!(c, XmlChange::TextChanged { .. }));
1012        assert_eq!(removed, 1, "b removed");
1013        assert_eq!(text_changed, 1, "c text changed");
1014        assert_eq!(diff.changes.len(), 2, "exactly 2 changes, no cascade");
1015    }
1016}