natural_xml_diff/
convenience.rs

1use std::ops::Range;
2use xot::{Error, Node, Xot};
3
4use crate::apply::apply_diff;
5use crate::comparison::Comparison;
6use crate::vtree::Status;
7use crate::Edit;
8
9/// A natural XML diff comparison of two documents.
10pub struct NaturalXmlDiff<'a> {
11    xot: Xot<'a>,
12    comparison: Comparison,
13}
14
15impl<'a> NaturalXmlDiff<'a> {
16    /// Construct a comparison between two XML documents from XML strings.
17    pub fn new(xml_a: &str, xml_b: &str) -> Result<Self, Error> {
18        let mut xot = Xot::new();
19        let comparison = Comparison::from_xml(&mut xot, xml_a, xml_b)?;
20        Ok(Self { xot, comparison })
21    }
22
23    /// Returns a diff document for this comparison.
24    ///
25    /// This diff document contains information on how to change document A
26    /// into document B. If this diff document is applied using `apply`,
27    /// the result should be semantically equivalent to document B.
28    pub fn diff(&mut self) -> String {
29        let node = self.comparison.diff(&mut self.xot);
30        self.xot.serialize_to_string(node)
31    }
32
33    /// Verify that a correct diff gets produced in one step.
34    ///
35    /// This is a convenience function that first constructs the diff document
36    /// and then immediately applies it to document A and checks that the
37    /// result is semantically equal to document B. If not, it returns the
38    /// XML for the erroneous document as part of the error.
39    ///
40    /// It's useful for testing purposes if you want to ensure that correct
41    /// diffs are produced.
42    pub fn verify(&mut self) -> Result<(), String> {
43        self.comparison.verify(&mut self.xot)
44    }
45
46    /// Returns a list of edits that transform document A into document B.
47    ///
48    /// This is a low-level API to see the details of the edit operations.
49    /// Normally you'd use the higher level `NaturalXmlDiff::diff` method to
50    /// obtain the diff document.
51    pub fn edits(&mut self) -> Vec<Edit> {
52        self.comparison.edits(&mut self.xot)
53    }
54
55    /// Returns a list of partitions of a and b where nodes are equal.
56    ///
57    /// A low-level API to see the details of the diffing algorithm.
58    pub fn partition(&mut self) -> Vec<(Range<usize>, Range<usize>)> {
59        self.comparison.partition()
60    }
61
62    /// Returns an iterator of nodes in document `a` and `b` and their status.
63    ///
64    /// A low-level API to see the details of the diffing algorithm.
65    pub fn diff_status(
66        &mut self,
67    ) -> (
68        impl Iterator<Item = (Node, Status)> + '_,
69        impl Iterator<Item = (Node, Status)> + '_,
70    ) {
71        self.comparison.diff_status(&self.xot);
72        let status_a = self
73            .comparison
74            .vtree_a
75            .nodes
76            .iter()
77            .map(|vnode| (vnode.node, vnode.status));
78        let status_b = self
79            .comparison
80            .vtree_b
81            .nodes
82            .iter()
83            .map(|vnode| (vnode.node, vnode.status));
84        (status_a, status_b)
85    }
86
87    /// Serialize a node to a string.
88    ///
89    /// You can get a node if you access the edits and have an `InsertContent::XmlNode`
90    pub fn serialize(&mut self, node: Node) -> String {
91        self.xot.serialize_node_to_string(node)
92    }
93
94    /// Access to the low-level [`Xot`] instance - this is useful if you
95    /// need access to XML values and names.
96    ///
97    /// Normally should not be needed if you stick to the high-level API.
98    pub fn xot(&self) -> &Xot {
99        &self.xot
100    }
101}
102
103/// Given XML document A and XML document B produce a diff document.
104///
105/// This document describes how to get from A to B.
106///
107/// The diff document adds a new namespace prefix `diff` for the
108/// `http://paligo.net/nxd` namespace URI.
109///
110/// ## Elements
111///
112/// New elements that are inserted are marked by the `diff:insert` attribute
113/// with an empty value:
114///
115/// ```xml
116/// <section><p diff:insert="">This is a new paragraph.</p></section>
117/// ```
118///
119/// Elements that are deleted are marked by the `diff:delete` attribute with an
120/// empty value:
121///
122/// ```xml
123/// <section><p diff:delete="">This is a paragraph that is deleted.</p></section>
124/// ```
125///
126/// If the document (top) element is replaced, we can't just do a bare
127/// `diff:delete` and `diff:insert` of the elements, because well-formed XML
128/// documents can only have a single document element. Instead, a special
129/// placeholder `diff:root` element is inserted to wrap the insert and delete:
130///
131/// ```xml
132/// <diff:root xmlns:diff="http://paligo.net/nxd"><doc diff:delete="">A</doc><doc diff:insert="">B</doc></diff:root>
133/// ```
134///
135/// ## Text
136///
137/// Text updates are marked with `diff:text-insert` and `diff:text-delete`
138/// elements:
139///
140/// ```xml
141/// <section><p>This is a paragraph with <diff:text-insert>new text</diff:text-insert> and <diff:text-delete>deleted text</diff:text-delete>.</p></section>
142/// ```
143///
144/// ## Attributes
145///
146/// Attributes changes are represented by a new `diff:attributes` elements
147/// inserted as the first child of the element in which the attributes changed.
148/// Each attribute update is represented by a child element that is marked with
149/// the `diff:attr-update`, `diff:attr-insert` or `diff:attr-delete` attributes
150/// (with empty values):
151///
152/// Update attribute `a` to the new value `A!`:
153///
154/// ```xml
155/// <section a="A" b="B"><diff:attributes><a diff:attr-update>A!</a></diff:attributes><p>Txt</p></section>
156/// ```
157///
158/// Insert a new attribute `c` with value `C`:
159///
160/// ```xml
161/// <section a="A" b="B"><diff:attributes><c diff:attr-insert="">C</c></diff:attributes><p>Txt</p></section>
162/// ```
163///
164/// Delete attribute `b`:
165///
166/// ```xml
167/// <section a="A" b="B"><diff:attributes><b diff:attr-delete=""/></diff:attributes><p>Txt</p></section>
168/// ```
169///
170/// ## Processing instructions
171///
172/// Processing instructions that are to be inserted are presented like this:
173///
174/// ```xml
175/// <diff:pi-insert><?my pi?></diff:pi-insert>
176/// ```
177///
178/// And processing instructions that are to be deleted are presented like this:
179///
180/// ```xml
181/// <diff:pi-delete><?my pi?></diff:pi-delete>
182/// ```
183///
184/// ## Comments
185///
186/// Comments that are to be inserted are presented like this:
187///
188/// ```xml
189/// <diff:comment-insert><!-- my comment --></diff:comment-insert>
190/// ```
191///
192/// and for deleted comments:
193///
194/// ```xml
195/// <diff:comment-delete><!-- my comment --></diff:comment-delete>
196/// ```
197pub fn diff(xml_a: &str, xml_b: &str) -> Result<String, Error> {
198    let mut nxd = NaturalXmlDiff::new(xml_a, xml_b)?;
199    Ok(nxd.diff())
200}
201
202/// Given a diff document, apply it.
203///
204/// This returns the XML for the document that results from applying the diff.
205///
206/// This is a low-level API to support the CLI you don't normally need to use.
207/// If you want to verify that a diff document is correct, use
208/// [`NaturalXmlDiff::verify`] instead.
209pub fn apply(diff_xml: &str) -> Result<String, Error> {
210    let mut xot = Xot::new();
211    let doc = xot.parse(diff_xml)?;
212    apply_diff(&mut xot, doc);
213    Ok(xot.serialize_to_string(doc))
214}