use std::ops::Range;
use crate::comparison::{Comparison, EqualPair};
pub(crate) type Overlap = (Range<usize>, Range<usize>);
impl Comparison {
pub(crate) fn partition(&self) -> Vec<(Range<usize>, Range<usize>)> {
self.partition_in_range(0..self.vtree_a.nodes.len(), 0..self.vtree_b.nodes.len())
}
fn partition_in_range(
&self,
range_a: Range<usize>,
range_b: Range<usize>,
) -> Vec<(Range<usize>, Range<usize>)> {
let mut ranges = vec![(range_a, range_b)];
let mut result = Vec::new();
while !ranges.is_empty() {
let (range_a, range_b) = ranges.pop().unwrap();
let overlap = self.lcss_in_range(range_a.clone(), range_b.clone());
let (overlap_a, overlap_b) = overlap.clone();
if overlap_a.is_empty() {
continue;
}
result.push(overlap);
let before_range_a = range_a.start..overlap_a.start;
let after_range_a = overlap_a.end..range_a.end;
let before_range_b = range_b.start..overlap_b.start;
let after_range_b = overlap_b.end..range_b.end;
ranges.push((before_range_a, before_range_b));
ranges.push((after_range_a, after_range_b));
}
result.sort_by_key(|(range_a, _)| range_a.start);
result
}
}
pub(crate) fn equal_pairs(overlaps: &[Overlap]) -> Vec<EqualPair> {
overlaps
.iter()
.flat_map(|(a_range, b_range)| a_range.clone().into_iter().zip(b_range.clone().into_iter()))
.map(|(a, b)| EqualPair(a, b))
.collect::<Vec<_>>()
}
#[cfg(test)]
mod tests {
use super::*;
use xot::Xot;
#[test]
fn test_partition() {
let mut xot = Xot::new();
let doc_a = xot.parse("<container><a/><b/></container>").unwrap();
let doc_b = xot
.parse("<container><x/><a/><b/><c/></container>")
.unwrap();
let comparison = Comparison::new(&xot, doc_a, doc_b);
let result = comparison.partition();
assert_eq!(result, vec![(2..4, 3..5)]);
}
#[test]
fn test_partition_multiple_overlaps() {
let mut xot = Xot::new();
let doc_a = xot
.parse("<container><a/><b/><x/><c/><d/></container>")
.unwrap();
let doc_b = xot
.parse("<container><x/><a/><b/><c/><d/></container>")
.unwrap();
let comparison = Comparison::new(&xot, doc_a, doc_b);
let result = comparison.partition();
assert_eq!(result, vec![(2..4, 3..5), (5..7, 5..7)]);
}
#[test]
fn test_partition_longer() {
let mut xot = Xot::new();
let doc_a = xot
.parse(concat!(
"<book><chapter><title>Text 1</title><para>Text 2</para></chapter>",
"<chapter><title>Text 4</title><para>Text 5</para></chapter>",
"<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
"<chapter><title>Text 9</title><para>Text 10</para></chapter>",
"<chapter><para>Text 11</para><para>Text 12</para></chapter></book>"
))
.unwrap();
let doc_b = xot
.parse(concat!(
"
<book><chapter><para>Text 2</para></chapter>",
"<chapter><title>Text 4</title><para>Text 25</para><para>Text 11</para></chapter>",
"<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
"<chapter><title>Text 9</title><para>Text 10</para></chapter>",
"<chapter><para>Text 12</para></chapter></book>"
))
.unwrap();
let comparison = Comparison::new(&xot, doc_a, doc_b);
let result = comparison.partition();
assert_eq!(
result,
vec![
(5..7, 3..5),
(8..10, 6..8),
(12..24, 12..24),
(27..29, 25..27)
]
);
}
#[test]
fn test_partition_problematic() {
let mut xot = Xot::new();
let xml_a = r#"<section>
<title>Section title</title>
<para>Compare documents with different root element</para>
<section>
<title>Sit Ipsum Consectetur Sem Ligula</title>
<para>Etiam porta sem malesuada <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
</section>
<section>
<title>Ultricies Mollis Mattis Ullamcorper Ridiculus</title>
<para>Morbi leo <code>porta ac consectetur</code>risus, ac, vestibulum at eros. Cras mattis consectetur purus sit amet fermentum. Donec id elit non mi porta gravida at eget metus. <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
</section>
</section>"#;
let xml_b = r#"<section>
<title>Section title</title>
<para>Compare documents with different root element</para>
<section>
<title>Ultricies Mollis Mattis Ullamcorper Ridiculus</title>
<para>Morbi leo <code>porta ac consectetur</code>risus, ac, vestibulum at eros. Cras mattis consectetur purus sit amet fermentum. Donec id elit non mi porta gravida at eget metus. <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
</section>
<section>
<title>Sit Ipsum Consectetur Sem Ligula</title>
<para>Etiam porta sem malesuada <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
</section>
</section>"#;
let doc_a = xot.parse(xml_a).unwrap();
let doc_b = xot.parse(xml_b).unwrap();
let comparison = Comparison::new(&xot, doc_a, doc_b);
let result = comparison.partition();
assert_eq!(
result,
vec![(2..8, 2..8), (20..35, 8..23), (35..36, 35..36)]
);
}
}