natural-xml-diff 0.2.0

Natural diffing between XML documents
Documentation
use std::ops::Range;

use crate::comparison::{Comparison, EqualPair};

pub(crate) type Overlap = (Range<usize>, Range<usize>);

impl Comparison {
    /// Partition the vtrees into equal and non-equal nodes
    /// Return the ranges of equal nodes
    ///
    /// This is a low-level API in order to get insight into the
    /// workings of the diff algorithm.
    pub(crate) fn partition(&self) -> Vec<(Range<usize>, Range<usize>)> {
        self.partition_in_range(0..self.vtree_a.nodes.len(), 0..self.vtree_b.nodes.len())
    }

    fn partition_in_range(
        &self,
        range_a: Range<usize>,
        range_b: Range<usize>,
    ) -> Vec<(Range<usize>, Range<usize>)> {
        let mut ranges = vec![(range_a, range_b)];
        let mut result = Vec::new();
        while !ranges.is_empty() {
            let (range_a, range_b) = ranges.pop().unwrap();
            let overlap = self.lcss_in_range(range_a.clone(), range_b.clone());
            let (overlap_a, overlap_b) = overlap.clone();
            if overlap_a.is_empty() {
                // if overlap_a is empty then overlap_b will be empty too
                continue;
            }
            result.push(overlap);
            let before_range_a = range_a.start..overlap_a.start;
            let after_range_a = overlap_a.end..range_a.end;
            let before_range_b = range_b.start..overlap_b.start;
            let after_range_b = overlap_b.end..range_b.end;
            ranges.push((before_range_a, before_range_b));
            ranges.push((after_range_a, after_range_b));
        }
        result.sort_by_key(|(range_a, _)| range_a.start);
        result
    }
}

pub(crate) fn equal_pairs(overlaps: &[Overlap]) -> Vec<EqualPair> {
    overlaps
        .iter()
        .flat_map(|(a_range, b_range)| a_range.clone().into_iter().zip(b_range.clone().into_iter()))
        .map(|(a, b)| EqualPair(a, b))
        .collect::<Vec<_>>()
}

#[cfg(test)]
mod tests {
    use super::*;
    use xot::Xot;

    #[test]
    fn test_partition() {
        let mut xot = Xot::new();
        let doc_a = xot.parse("<container><a/><b/></container>").unwrap();
        let doc_b = xot
            .parse("<container><x/><a/><b/><c/></container>")
            .unwrap();

        let comparison = Comparison::new(&xot, doc_a, doc_b);
        let result = comparison.partition();
        assert_eq!(result, vec![(2..4, 3..5)]);
    }

    #[test]
    fn test_partition_multiple_overlaps() {
        let mut xot = Xot::new();
        let doc_a = xot
            .parse("<container><a/><b/><x/><c/><d/></container>")
            .unwrap();
        let doc_b = xot
            .parse("<container><x/><a/><b/><c/><d/></container>")
            .unwrap();

        let comparison = Comparison::new(&xot, doc_a, doc_b);
        let result = comparison.partition();
        assert_eq!(result, vec![(2..4, 3..5), (5..7, 5..7)]);
    }

    #[test]
    fn test_partition_longer() {
        let mut xot = Xot::new();
        // example taken from the jndiff paper, page 11
        let doc_a = xot
            .parse(concat!(
                "<book><chapter><title>Text 1</title><para>Text 2</para></chapter>",
                "<chapter><title>Text 4</title><para>Text 5</para></chapter>",
                "<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
                "<chapter><title>Text 9</title><para>Text 10</para></chapter>",
                "<chapter><para>Text 11</para><para>Text 12</para></chapter></book>"
            ))
            .unwrap();
        let doc_b = xot
            .parse(concat!(
                "
            <book><chapter><para>Text 2</para></chapter>",
                "<chapter><title>Text 4</title><para>Text 25</para><para>Text 11</para></chapter>",
                "<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
                "<chapter><title>Text 9</title><para>Text 10</para></chapter>",
                "<chapter><para>Text 12</para></chapter></book>"
            ))
            .unwrap();

        let comparison = Comparison::new(&xot, doc_a, doc_b);
        let result = comparison.partition();
        assert_eq!(
            result,
            vec![
                (5..7, 3..5),
                (8..10, 6..8),
                (12..24, 12..24),
                (27..29, 25..27)
            ]
        );
    }

    #[test]
    fn test_partition_problematic() {
        let mut xot = Xot::new();
        let xml_a = r#"<section>
    <title>Section title</title>
    <para>Compare documents with different root element</para>

    <section>
        <title>Sit Ipsum Consectetur Sem Ligula</title>
        <para>Etiam porta sem malesuada <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
    </section>

    <section>
        <title>Ultricies Mollis Mattis Ullamcorper Ridiculus</title>
        <para>Morbi leo <code>porta ac consectetur</code>risus,  ac, vestibulum at eros. Cras mattis consectetur purus sit amet fermentum. Donec id elit non mi porta gravida at eget metus. <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
    </section>
</section>"#;
        let xml_b = r#"<section>
    <title>Section title</title>
    <para>Compare documents with different root element</para>

    <section>
        <title>Ultricies Mollis Mattis Ullamcorper Ridiculus</title>
        <para>Morbi leo <code>porta ac consectetur</code>risus,  ac, vestibulum at eros. Cras mattis consectetur purus sit amet fermentum. Donec id elit non mi porta gravida at eget metus. <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
    </section>

    <section>
        <title>Sit Ipsum Consectetur Sem Ligula</title>
        <para>Etiam porta sem malesuada <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
    </section>
</section>"#;
        let doc_a = xot.parse(xml_a).unwrap();
        let doc_b = xot.parse(xml_b).unwrap();
        let comparison = Comparison::new(&xot, doc_a, doc_b);
        let result = comparison.partition();
        assert_eq!(
            result,
            vec![(2..8, 2..8), (20..35, 8..23), (35..36, 35..36)]
        );
    }
}