use dissimilar::{diff, Chunk};
use xot::{Element, NameId, Node, Text, Value, Xot};
use crate::comparison::Comparison;
use crate::vtree::{Status, Vtree};
#[derive(Debug, PartialEq, Eq)]
pub enum Edit {
Insert(InsertPosition, InsertContent),
Delete(usize),
TextUpdate(usize, Vec<TextChange>),
AttributeUpdate(usize, Vec<AttributeChange>),
}
#[derive(Debug, PartialEq, Eq)]
pub struct InsertPosition {
pub parent_node_id: usize,
pub child_position: u32,
pub intervening: u32,
}
#[derive(Debug, PartialEq, Eq)]
pub enum InsertContent {
XmlNode(Node),
Text(String),
}
#[derive(Debug, PartialEq, Eq)]
pub enum TextChange {
Equal(String),
Delete(String),
Insert(String),
}
#[derive(Debug, PartialEq, Eq)]
pub enum AttributeChange {
Update(NameId, String),
Delete(NameId),
Insert(NameId, String),
}
impl Comparison {
pub(crate) fn edits(&mut self, xot: &mut Xot) -> Vec<Edit> {
self.diff_status(xot);
let mut edits = Vec::new();
for delete_id in self.deletes() {
edits.push(Edit::Delete(delete_id));
}
for (node_id, parent_node_id, (child_position, intervening)) in self.inserts() {
let insert_node = self.get_node_b(node_id);
let content = match xot.value(insert_node) {
Value::Element(_) => InsertContent::XmlNode(insert_node),
Value::Text(text) => {
let text = text.get();
InsertContent::Text(text.to_owned())
}
_ => {
panic!("Unsupported node type");
}
};
edits.push(Edit::Insert(
InsertPosition {
parent_node_id,
child_position,
intervening,
},
content,
));
}
for (node_id_a, node_id_b) in self.updates() {
let node_a = self.get_node_a(node_id_a);
let node_b = self.get_node_b(node_id_b);
let value_pair = (xot.value(node_a), xot.value(node_b));
match value_pair {
(Value::Text(text_a), Value::Text(text_b)) => {
edits.push(Edit::TextUpdate(
node_id_a,
get_text_changes(text_a, text_b),
));
}
(Value::Element(element_a), Value::Element(element_b)) => {
edits.push(Edit::AttributeUpdate(
node_id_a,
get_attribute_changes(element_a, element_b),
));
}
_ => {
panic!("Unsupported node values for update");
}
}
}
edits.sort_by_key(|edit| match edit {
Edit::Delete(delete_id) => (*delete_id, 0),
Edit::TextUpdate(update_id, _changes) => (*update_id, 0),
Edit::AttributeUpdate(update_id, _changes) => (*update_id, 0),
Edit::Insert(position, _content) => {
(position.parent_node_id + position.intervening as usize, 1)
}
});
edits
}
fn deletes(&self) -> impl Iterator<Item = usize> + '_ {
let vtree_a = &self.vtree_a;
vtree_a
.nodes
.iter()
.enumerate()
.filter(|(_, vnode)| match vnode.status {
Status::Equal(_id_b) => false,
Status::Update(_id_b) => false,
Status::Different => true,
})
.filter(|(_, vnode)| {
let parent_id = vnode.parent_id;
if let Some(parent_id) = parent_id {
let parent = &vtree_a.nodes[parent_id];
match parent.status {
Status::Equal(_id_b) => true,
Status::Update(_id_b) => true,
Status::Different => false,
}
} else {
false
}
})
.map(|(id, _)| id)
}
fn inserts(&self) -> impl Iterator<Item = (usize, usize, (u32, u32))> + '_ {
let vtree_b = &self.vtree_b;
vtree_b
.nodes
.iter()
.enumerate()
.filter(|(_, vnode)| match vnode.status {
Status::Equal(_id_a) => false,
Status::Update(_id_a) => false,
Status::Different => true,
})
.filter_map(|(id, vnode)| {
let parent_id = vnode.parent_id;
if let Some(parent_id) = parent_id {
let vnode_parent = &self.vtree_b.nodes[parent_id];
match vnode_parent.status {
Status::Equal(id_a) | Status::Update(id_a) => Some((
id,
id_a as usize,
get_child_position(&self.vtree_b, parent_id, id),
)),
Status::Different => None,
}
} else {
None
}
})
}
fn updates(&self) -> impl Iterator<Item = (usize, usize)> + '_ {
let vtree_a = &self.vtree_a;
vtree_a
.nodes
.iter()
.enumerate()
.filter_map(|(index, node)| match node.status {
Status::Equal(_id_b) => None,
Status::Update(id_b) => Some((index, id_b as usize)),
Status::Different => None,
})
}
}
fn get_child_position(vtree: &Vtree, parent: usize, child: usize) -> (u32, u32) {
let parent_vnode = &vtree.nodes[parent];
let child_vnode = &vtree.nodes[child];
let mut intervening = 1;
for (i, candidate_child) in vtree.children(parent_vnode).enumerate() {
if std::ptr::eq(candidate_child, child_vnode) {
return (i as u32, intervening);
}
intervening += 1;
intervening += candidate_child.descendant_count;
}
unreachable!("child not found in parent");
}
fn get_text_changes(text_a: &Text, text_b: &Text) -> Vec<TextChange> {
diff(text_a.get(), text_b.get())
.iter()
.map(|chunk| match chunk {
Chunk::Equal(text) => TextChange::Equal(text.to_string()),
Chunk::Delete(text) => TextChange::Delete(text.to_string()),
Chunk::Insert(text) => TextChange::Insert(text.to_string()),
})
.collect()
}
fn get_attribute_changes(element_a: &Element, element_b: &Element) -> Vec<AttributeChange> {
let mut changes = Vec::new();
for (name, value) in element_a.attributes().iter() {
if let Some(value_b) = element_b.attributes().get(name) {
if value != value_b {
changes.push(AttributeChange::Update(*name, value_b.clone()));
}
} else {
changes.push(AttributeChange::Delete(*name));
}
}
for (name, value) in element_b.attributes().iter() {
if !element_a.attributes().contains_key(name) {
changes.push(AttributeChange::Insert(*name, value.clone()));
}
}
changes
}
#[cfg(test)]
mod tests {
use super::*;
use xot::Xot;
#[test]
fn test_deletes() {
let mut xot = Xot::new();
let doc_a = xot
.parse(concat!(
"<book><chapter><title>Text 1</title><para>Text 2</para></chapter>",
"<chapter><title>Text 4</title><para>Text 5</para></chapter>",
"<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
"<chapter><title>Text 9</title><para>Text 10</para></chapter>",
"<chapter><para>Text 11</para><para>Text 12</para></chapter></book>"
))
.unwrap();
let doc_b = xot
.parse(concat!(
"<book><chapter><para>Text 2</para></chapter>",
"<chapter><title>Text 4</title><para>Text 25</para><para>Text 11</para></chapter>",
"<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
"<chapter><title>Text 9</title><para>Text 10</para></chapter>",
"<chapter><para>Text 12</para></chapter></book>"
))
.unwrap();
let mut comparison = Comparison::new(&xot, doc_a, doc_b);
comparison.diff_status(&xot);
let deletes = comparison.deletes().collect::<Vec<_>>();
assert_eq!(deletes, vec![3, 25]);
}
#[test]
fn test_inserts() {
let mut xot = Xot::new();
let doc_a = xot
.parse(concat!(
"<book><chapter><title>Text 1</title><para>Text 2</para></chapter>",
"<chapter><title>Text 4</title><para>Text 5</para></chapter>",
"<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
"<chapter><title>Text 9</title><para>Text 10</para></chapter>",
"<chapter><para>Text 11</para><para>Text 12</para></chapter></book>"
))
.unwrap();
let doc_b = xot
.parse(concat!(
"<book><chapter><para>Text 2</para></chapter>",
"<chapter><title>Text 4</title><para>Text 25</para><para>Text 11</para></chapter>",
"<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
"<chapter><title>Text 9</title><para>Text 10</para></chapter>",
"<chapter><para>Text 12</para></chapter></book>"
))
.unwrap();
let mut comparison = Comparison::new(&xot, doc_a, doc_b);
comparison.diff_status(&xot);
let inserts = comparison.inserts().collect::<Vec<_>>();
assert_eq!(inserts, vec![(10, 7, (2, 5))]);
}
#[test]
fn test_updates() {
let mut xot = Xot::new();
let doc_a = xot
.parse(concat!(
"<book><chapter><title>Text 1</title><para>Text 2</para></chapter>",
"<chapter><title>Text 4</title><para>Text 5</para></chapter>",
"<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
"<chapter><title>Text 9</title><para>Text 10</para></chapter>",
"<chapter><para>Text 11</para><para>Text 12</para></chapter></book>"
))
.unwrap();
let doc_b = xot
.parse(concat!(
"<book><chapter><para>Text 2</para></chapter>",
"<chapter><title>Text 4</title><para>Text 25</para><para>Text 11</para></chapter>",
"<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
"<chapter><title>Text 9</title><para>Text 10</para></chapter>",
"<chapter><para>Text 12</para></chapter></book>"
))
.unwrap();
let mut comparison = Comparison::new(&xot, doc_a, doc_b);
comparison.diff_status(&xot);
let updates = comparison.updates().collect::<Vec<_>>();
assert_eq!(updates, vec![(11, 9)]);
}
#[test]
fn test_edits() {
let mut xot = Xot::new();
let xml_a = concat!(
"<book><chapter><title>Text 1</title><para>Text 2</para></chapter>",
"<chapter><title>Text 4</title><para>Text 5</para></chapter>",
"<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
"<chapter><title>Text 9</title><para>Text 10</para></chapter>",
"<chapter><para>Text 11</para><para>Text 12</para></chapter></book>"
);
let xml_b = concat!(
"<book><chapter><para>Text 2</para></chapter>",
"<chapter><title>Text 4</title><para>Text 25</para><para>Text 11</para></chapter>",
"<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
"<chapter><title>Text 9</title><para>Text 10</para></chapter>",
"<chapter><para>Text 12</para></chapter></book>"
);
let doc_a = xot.parse(xml_a).unwrap();
let doc_b = xot.parse(xml_b).unwrap();
let inserted_node = xot
.descendants(doc_b)
.find(|node| xot.text_content_str(*node) == Some("Text 11"))
.unwrap();
let mut comparison = Comparison::new(&xot, doc_a, doc_b);
let edits = comparison.edits(&mut xot);
assert_eq!(
edits,
vec![
Edit::Delete(3),
Edit::TextUpdate(
11,
vec![
(TextChange::Equal("Text ".to_string())),
(TextChange::Insert("2".to_string())),
(TextChange::Equal("5".to_string()))
]
),
Edit::Insert(
InsertPosition {
parent_node_id: 7,
child_position: 2,
intervening: 5
},
InsertContent::XmlNode(inserted_node)
),
Edit::Delete(25)
]
);
}
#[test]
fn test_edits_problematic() {
let mut xot = Xot::new();
let xml_a = r#"<section>
<title>T</title>
<para>P</para>
<section>First longer</section>
<section>Second</section>
</section>"#;
let xml_b = r#"<section>
<title>T</title>
<para>P</para>
<section>Second</section>
<section>First longer</section>
</section>"#;
let doc_a = xot.parse(xml_a).unwrap();
let doc_b = xot.parse(xml_b).unwrap();
let inserted_node = xot
.descendants(doc_b)
.find(|node| xot.text_content_str(*node) == Some("Second"))
.unwrap();
let mut comparison = Comparison::new(&xot, doc_a, doc_b);
let edits = comparison.edits(&mut xot);
assert_eq!(
edits,
vec![
Edit::Insert(
InsertPosition {
parent_node_id: 1,
child_position: 5,
intervening: 8
},
InsertContent::XmlNode(inserted_node)
),
Edit::Delete(11),
Edit::Insert(
InsertPosition {
parent_node_id: 1,
child_position: 6,
intervening: 10
},
InsertContent::Text("\n\n ".to_owned())
),
Edit::Delete(12)
]
);
}
#[test]
fn test_deletes_problematic() {
let mut xot = Xot::new();
let xml_a = r#"<section>
<title>Section title</title>
<para>Compare documents with different root element</para>
<section>
<title>Sit Ipsum Consectetur Sem Ligula</title>
<para>Etiam porta sem malesuada <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
</section>
<section>
<title>Ultricies Mollis Mattis Ullamcorper Ridiculus</title>
<para>Morbi leo <code>porta ac consectetur</code>risus, ac, vestibulum at eros. Cras mattis consectetur purus sit amet fermentum. Donec id elit non mi porta gravida at eget metus. <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
</section>
</section>"#;
let xml_b = r#"<section>
<title>Section title</title>
<para>Compare documents with different root element</para>
<section>
<title>Ultricies Mollis Mattis Ullamcorper Ridiculus</title>
<para>Morbi leo <code>porta ac consectetur</code>risus, ac, vestibulum at eros. Cras mattis consectetur purus sit amet fermentum. Donec id elit non mi porta gravida at eget metus. <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
</section>
<section>
<title>Sit Ipsum Consectetur Sem Ligula</title>
<para>Etiam porta sem malesuada <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
</section>
</section>"#;
let doc_a = xot.parse(xml_a).unwrap();
let doc_b = xot.parse(xml_b).unwrap();
let mut comparison = Comparison::new(&xot, doc_a, doc_b);
comparison.diff_status(&xot);
let deletes = comparison.deletes().collect::<Vec<_>>();
assert_eq!(deletes, vec![8, 9]);
}
#[test]
fn test_attribute_update() {
let mut xot = Xot::new();
let xml_a = r#"<r><a/></r>"#;
let xml_b = r#"<r><a id="1"/></r>"#;
let doc_a = xot.parse(xml_a).unwrap();
let doc_b = xot.parse(xml_b).unwrap();
let mut comparison = Comparison::new(&xot, doc_a, doc_b);
comparison.diff_status(&xot);
let updates = comparison.updates().collect::<Vec<_>>();
assert_eq!(updates, vec![(2, 2)]);
}
#[test]
fn test_attribute_update_edits() -> Result<(), xot::Error> {
let mut xot = Xot::new();
let xml_a = r#"<r><a/></r>"#;
let xml_b = r#"<r><a id="1"/></r>"#;
let doc_a = xot.parse(xml_a)?;
let doc_b = xot.parse(xml_b)?;
let mut comparison = Comparison::new(&xot, doc_a, doc_b);
let edits = comparison.edits(&mut xot);
let name_id = xot.add_name("id");
assert_eq!(
edits,
vec![Edit::AttributeUpdate(
2,
vec![AttributeChange::Insert(name_id, "1".to_string())]
)]
);
Ok(())
}
#[test]
fn test_attribute_propagation_edits() -> Result<(), xot::Error> {
let mut xot = Xot::new();
let xml_a = r#"<r><a>Text</a></r>"#;
let xml_b = r#"<r><a id="1">Text</a></r>"#;
let doc_a = xot.parse(xml_a)?;
let doc_b = xot.parse(xml_b)?;
let mut comparison = Comparison::new(&xot, doc_a, doc_b);
let edits = comparison.edits(&mut xot);
let name_id = xot.add_name("id");
assert_eq!(
edits,
vec![Edit::AttributeUpdate(
2,
vec![AttributeChange::Insert(name_id, "1".to_string())]
)]
);
Ok(())
}
}