natural-xml-diff 0.2.0

Natural diffing between XML documents
Documentation
use std::ops::Range;
use xot::{Error, Node, Xot};

use crate::apply::apply_diff;
use crate::comparison::Comparison;
use crate::vtree::Status;
use crate::Edit;

/// A natural XML diff comparison of two documents.
pub struct NaturalXmlDiff<'a> {
    xot: Xot<'a>,
    comparison: Comparison,
}

impl<'a> NaturalXmlDiff<'a> {
    /// Construct a comparison between two XML documents from XML strings.
    pub fn new(xml_a: &str, xml_b: &str) -> Result<Self, Error> {
        let mut xot = Xot::new();
        let comparison = Comparison::from_xml(&mut xot, xml_a, xml_b)?;
        Ok(Self { xot, comparison })
    }

    /// Returns a diff document for this comparison.
    ///
    /// This diff document contains information on how to change document A
    /// into document B. If this diff document is applied using `apply`,
    /// the result should be semantically equivalent to document B.
    pub fn diff(&mut self) -> String {
        let node = self.comparison.diff(&mut self.xot);
        self.xot.serialize_to_string(node)
    }

    /// Verify that a correct diff gets produced in one step.
    ///
    /// This is a convenience function that first constructs the diff document
    /// and then immediately applies it to document A and checks that the
    /// result is semantically equal to document B. If not, it returns the
    /// XML for the erroneous document as part of the error.
    ///
    /// It's useful for testing purposes if you want to ensure that correct
    /// diffs are produced.
    pub fn verify(&mut self) -> Result<(), String> {
        self.comparison.verify(&mut self.xot)
    }

    /// Returns a list of edits that transform document A into document B.
    ///
    /// This is a low-level API to see the details of the edit operations.
    /// Normally you'd use the higher level `NaturalXmlDiff::diff` method to
    /// obtain the diff document.
    pub fn edits(&mut self) -> Vec<Edit> {
        self.comparison.edits(&mut self.xot)
    }

    /// Returns a list of partitions of a and b where nodes are equal.
    ///
    /// A low-level API to see the details of the diffing algorithm.
    pub fn partition(&mut self) -> Vec<(Range<usize>, Range<usize>)> {
        self.comparison.partition()
    }

    /// Returns an iterator of nodes in document `a` and `b` and their status.
    ///
    /// A low-level API to see the details of the diffing algorithm.
    pub fn diff_status(
        &mut self,
    ) -> (
        impl Iterator<Item = (Node, Status)> + '_,
        impl Iterator<Item = (Node, Status)> + '_,
    ) {
        self.comparison.diff_status(&self.xot);
        let status_a = self
            .comparison
            .vtree_a
            .nodes
            .iter()
            .map(|vnode| (vnode.node, vnode.status));
        let status_b = self
            .comparison
            .vtree_b
            .nodes
            .iter()
            .map(|vnode| (vnode.node, vnode.status));
        (status_a, status_b)
    }

    /// Serialize a node to a string.
    ///
    /// You can get a node if you access the edits and have an `InsertContent::XmlNode`
    pub fn serialize(&mut self, node: Node) -> String {
        self.xot.serialize_node_to_string(node)
    }

    /// Access to the low-level [`Xot`] instance - this is useful if you
    /// need access to XML values and names.
    ///
    /// Normally should not be needed if you stick to the high-level API.
    pub fn xot(&self) -> &Xot {
        &self.xot
    }
}

/// Given XML document A and XML document B produce a diff document.
///
/// This document describes how to get from A to B.
///
/// The diff document adds a new namespace prefix `diff` for the
/// `http://paligo.net/nxd` namespace URI.
///
/// ## Elements
///
/// New elements that are inserted are marked by the `diff:insert` attribute
/// with an empty value:
///
/// ```xml
/// <section><p diff:insert="">This is a new paragraph.</p></section>
/// ```
///
/// Elements that are deleted are marked by the `diff:delete` attribute with an
/// empty value:
///
/// ```xml
/// <section><p diff:delete="">This is a paragraph that is deleted.</p></section>
/// ```
///
/// If the document (top) element is replaced, we can't just do a bare
/// `diff:delete` and `diff:insert` of the elements, because well-formed XML
/// documents can only have a single document element. Instead, a special
/// placeholder `diff:root` element is inserted to wrap the insert and delete:
///
/// ```xml
/// <diff:root xmlns:diff="http://paligo.net/nxd"><doc diff:delete="">A</doc><doc diff:insert="">B</doc></diff:root>
/// ```
///
/// ## Text
///
/// Text updates are marked with `diff:text-insert` and `diff:text-delete`
/// elements:
///
/// ```xml
/// <section><p>This is a paragraph with <diff:text-insert>new text</diff:text-insert> and <diff:text-delete>deleted text</diff:text-delete>.</p></section>
/// ```
///
/// ## Attributes
///
/// Attributes changes are represented by a new `diff:attributes` elements
/// inserted as the first child of the element in which the attributes changed.
/// Each attribute update is represented by a child element that is marked with
/// the `diff:attr-update`, `diff:attr-insert` or `diff:attr-delete` attributes
/// (with empty values):
///
/// Update attribute `a` to the new value `A!`:
///
/// ```xml
/// <section a="A" b="B"><diff:attributes><a diff:attr-update>A!</a></diff:attributes><p>Txt</p></section>
/// ```
///
/// Insert a new attribute `c` with value `C`:
///
/// ```xml
/// <section a="A" b="B"><diff:attributes><c diff:attr-insert="">C</c></diff:attributes><p>Txt</p></section>
/// ```
///
/// Delete attribute `b`:
///
/// ```xml
/// <section a="A" b="B"><diff:attributes><b diff:attr-delete=""/></diff:attributes><p>Txt</p></section>
/// ```
///
/// ## Processing instructions
///
/// Processing instructions that are to be inserted are presented like this:
///
/// ```xml
/// <diff:pi-insert><?my pi?></diff:pi-insert>
/// ```
///
/// And processing instructions that are to be deleted are presented like this:
///
/// ```xml
/// <diff:pi-delete><?my pi?></diff:pi-delete>
/// ```
///
/// ## Comments
///
/// Comments that are to be inserted are presented like this:
///
/// ```xml
/// <diff:comment-insert><!-- my comment --></diff:comment-insert>
/// ```
///
/// and for deleted comments:
///
/// ```xml
/// <diff:comment-delete><!-- my comment --></diff:comment-delete>
/// ```
pub fn diff(xml_a: &str, xml_b: &str) -> Result<String, Error> {
    let mut nxd = NaturalXmlDiff::new(xml_a, xml_b)?;
    Ok(nxd.diff())
}

/// Given a diff document, apply it.
///
/// This returns the XML for the document that results from applying the diff.
///
/// This is a low-level API to support the CLI you don't normally need to use.
/// If you want to verify that a diff document is correct, use
/// [`NaturalXmlDiff::verify`] instead.
pub fn apply(diff_xml: &str) -> Result<String, Error> {
    let mut xot = Xot::new();
    let doc = xot.parse(diff_xml)?;
    apply_diff(&mut xot, doc);
    Ok(xot.serialize_to_string(doc))
}