natural-xml-diff 0.1.0

Natural diffing between XML documents
Documentation
use std::ops::Range;

use crate::comparison::Comparison;

impl Comparison {
    #[cfg(test)]
    pub(crate) fn lcss(&self) -> (Range<usize>, Range<usize>) {
        self.lcss_in_range(0..self.vtree_a.nodes.len(), 0..self.vtree_b.nodes.len())
    }

    pub(crate) fn lcss_in_range(
        &self,
        range_a: Range<usize>,
        range_b: Range<usize>,
    ) -> (Range<usize>, Range<usize>) {
        let mut max_weight = 0;
        let mut found_a = 0..0;
        let mut found_b = 0..0;
        let end_a = range_a.end;
        for i in range_a {
            let end_b = range_b.end;
            // look up candidates for b in the whole document
            for j in self
                .vtree_b_index
                .get(&self.vtree_a.nodes[i].tree_signature_id)
                .unwrap_or(&vec![])
            {
                // it has to be in range, otherwise don't consider it
                if range_b.contains(j) {
                    let (weight, length) = self.explorer(i..end_a, *j..end_b);
                    if weight > max_weight {
                        max_weight = weight;
                        found_a = i..i + length;
                        found_b = *j..j + length;
                    }
                }
            }
        }
        (found_a, found_b)
    }

    fn explorer(&self, range_a: Range<usize>, range_b: Range<usize>) -> (u32, usize) {
        let mut weight = 0;
        let mut length = 0;
        loop {
            let index_a = range_a.start + length;
            if index_a >= range_a.end {
                break;
            }
            let index_b = range_b.start + length;
            if index_b >= range_b.end {
                break;
            }
            let node_a = self.vtree_a.get(index_a);
            if index_a + node_a.descendant_count as usize >= range_a.end {
                break;
            }
            let node_b = self.vtree_b.get(index_b);
            if index_b + node_b.descendant_count as usize >= range_b.end {
                break;
            }
            if node_a.tree_signature_id != node_b.tree_signature_id {
                break;
            }
            weight += node_a.weight;
            length += node_b.descendant_count as usize + 1;
        }
        (weight, length)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    use xot::Xot;

    #[test]
    fn test_lcss_elements() {
        let mut xot = Xot::new();
        let doc_a = xot.parse("<container><a/><b/><c/></container>").unwrap();
        let doc_b = xot.parse("<container><a/><b/><d/></container>").unwrap();
        let comparison = Comparison::new(&xot, doc_a, doc_b);
        let (range_a, range_b) = comparison.lcss();
        assert_eq!(range_a, 2..4);
        assert_eq!(range_b, 2..4);
    }

    #[test]
    fn test_lcss_text() {
        let mut xot = Xot::new();
        let doc_a = xot
            .parse("<container><a>AAA</a><b>BBBBB</b></container>")
            .unwrap();
        let doc_b = xot.parse("<container><b>BBBBB</b></container>").unwrap();
        let comparison = Comparison::new(&xot, doc_a, doc_b);
        let (range_a, range_b) = comparison.lcss();
        assert_eq!(range_a, 4..6);
        assert_eq!(range_b, 2..4);
    }

    #[test]
    fn test_lcss_style_markers() {
        let mut xot = Xot::new();
        let doc_a = xot.parse("<p>Hello <i>world</i>!</p>").unwrap();
        let doc_b = xot.parse("<p>Hello <i>world</i>?</p>").unwrap();
        let comparison = Comparison::new(&xot, doc_a, doc_b);
        let (range_a, range_b) = comparison.lcss();
        assert_eq!(range_a, 2..5);
        assert_eq!(range_b, 2..5);
    }

    #[test]
    fn test_lcss_outer_range() {
        let mut xot = Xot::new();
        let doc_a = xot.parse("<container><a/><b/><c/></container>").unwrap();
        let doc_b = xot.parse("<container><a/><b/><c/></container>").unwrap();
        let comparison = Comparison::new(&xot, doc_a, doc_b);
        let (range_a, range_b) = comparison.lcss_in_range(
            3..comparison.vtree_a.nodes.len(),
            3..comparison.vtree_b.nodes.len(),
        );
        assert_eq!(range_a, 3..5);
        assert_eq!(range_b, 3..5);
    }
}