natural-xml-diff 0.1.0

Natural diffing between XML documents
Documentation
use xot::{NameId, Node, Xot};

use crate::diff::DIFF_NS_URI;
use crate::edits::AttributeChange;

/// Applies a diff document.
///
/// Takes a document that contains diffing information. Applies the diffing
/// information in place, which will result in the target `b` document.
///
/// This is primarily useful to help verify that the diffing algorithm is correct.
pub(crate) fn apply_diff(xot: &mut Xot, root: Node) {
    let diff_ns = xot.add_namespace(DIFF_NS_URI);
    let text_insert = xot.add_name_ns("text-insert", diff_ns);
    let text_delete = xot.add_name_ns("text-delete", diff_ns);
    let insert = xot.add_name_ns("insert", diff_ns);
    let delete = xot.add_name_ns("delete", diff_ns);
    let attr_delete_name = xot.add_name_ns("attr-delete", diff_ns);
    let attr_insert_name = xot.add_name_ns("attr-insert", diff_ns);
    let attr_update_name = xot.add_name_ns("attr-update", diff_ns);
    let attributes_name = xot.add_name_ns("attributes", diff_ns);

    let nodes = xot.descendants(root).collect::<Vec<_>>();

    let mut attribute_changes = Vec::new();

    for node in nodes {
        // text consolidation may cause a node to be removed
        if xot.is_removed(node) {
            continue;
        }
        if let Some(element) = xot.element(node) {
            if element.name() == text_insert {
                // inserted text node
                xot.element_unwrap(node).unwrap();
            } else if element.name() == text_delete {
                // deleted text node
                xot.remove(node).unwrap();
            } else if element.get_attribute(insert).is_some() {
                // inserted element
                let element = xot.element_mut(node).unwrap();
                element.remove_attribute(insert);
            } else if element.get_attribute(delete).is_some() {
                // deleted element
                xot.remove(node).unwrap();
            } else if let Some(attributes_node) = attributes_node(xot, node, attributes_name) {
                for child in xot.children(attributes_node) {
                    if let Some(update_element) = xot.element(child) {
                        let attr_name = update_element.name();
                        let change = if update_element.get_attribute(attr_delete_name).is_some() {
                            AttributeChange::Delete(attr_name)
                        } else if update_element.get_attribute(attr_insert_name).is_some() {
                            let value = xot.text_content_str(child).unwrap();
                            AttributeChange::Insert(attr_name, value.to_string())
                        } else if update_element.get_attribute(attr_update_name).is_some() {
                            let value = xot.text_content_str(child).unwrap();
                            AttributeChange::Update(attr_name, value.to_string())
                        } else {
                            unreachable!();
                        };
                        attribute_changes.push((node, change));
                    }
                }
                // now remove attributes node as we got all the info
                xot.remove(attributes_node).unwrap();
            }
        }
    }
    // now go update all the attributes required
    for (node, change) in attribute_changes {
        let element = xot.element_mut(node).unwrap();
        match change {
            AttributeChange::Delete(name) => {
                element.remove_attribute(name);
            }
            AttributeChange::Insert(name, value) => {
                element.set_attribute(name, value);
            }
            AttributeChange::Update(name, value) => {
                element.set_attribute(name, value);
            }
        }
    }

    // remove namespace prefix on top
    let doc_id = xot.document_element(root).unwrap();
    let diff_prefix = xot.prefix("diff").unwrap();
    let doc = xot.element_mut(doc_id).unwrap();
    doc.remove_prefix(diff_prefix);
}

fn attributes_node(xot: &Xot, node: Node, attributes_name: NameId) -> Option<Node> {
    let attributes_node = xot.first_child(node);
    if let Some(attributes_node) = attributes_node {
        if let Some(attributes_element) = xot.element(attributes_node) {
            if attributes_element.name() == attributes_name {
                return Some(attributes_node);
            }
        }
    }
    None
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::diff::apply_edits;
    use crate::edits::Edit;
    use crate::edits::{InsertContent, InsertPosition};

    pub(crate) fn apply_doc(xot: &mut Xot, root: Node, edits: &[Edit]) {
        apply_edits(xot, root, edits);
        apply_diff(xot, root);
    }

    #[test]
    fn test_simple_delete() {
        let mut xot = Xot::new();
        let root = xot.parse("<a><b><c/></b></a>").unwrap();
        let edits = [Edit::Delete(3)];
        apply_doc(&mut xot, root, &edits);
        assert_eq!(xot.serialize_to_string(root), "<a><b/></a>");
    }

    #[test]
    fn test_simple_insert() {
        let mut xot = Xot::new();
        let root = xot.parse("<a></a>").unwrap();
        let to_insert = xot.parse("<b/>").unwrap();
        let to_insert = xot.document_element(to_insert).unwrap();

        let edits = [Edit::Insert(
            InsertPosition {
                parent_node_id: 1,
                child_position: 0,
                intervening: 0,
            },
            InsertContent::XmlNode(to_insert),
        )];
        apply_doc(&mut xot, root, &edits);
        assert_eq!(xot.serialize_to_string(root), "<a><b/></a>");
    }

    #[test]
    fn test_simple_insert_text() {
        let mut xot = Xot::new();
        let root = xot.parse("<a></a>").unwrap();
        let edits = [Edit::Insert(
            InsertPosition {
                parent_node_id: 1,
                child_position: 0,
                intervening: 0,
            },
            InsertContent::Text("Text!".to_owned()),
        )];
        apply_doc(&mut xot, root, &edits);
        assert_eq!(xot.serialize_to_string(root), "<a>Text!</a>");
    }

    #[test]
    fn test_insert_beginning() {
        let mut xot = Xot::new();
        let root = xot.parse("<a><two/><three/></a>").unwrap();
        let to_insert = xot.parse("<one/>").unwrap();
        let to_insert = xot.document_element(to_insert).unwrap();

        let edits = [Edit::Insert(
            InsertPosition {
                parent_node_id: 1,
                child_position: 0,
                intervening: 0,
            },
            InsertContent::XmlNode(to_insert),
        )];
        apply_doc(&mut xot, root, &edits);
        assert_eq!(xot.serialize_to_string(root), "<a><one/><two/><three/></a>");
    }

    #[test]
    fn test_insert_middle() {
        let mut xot = Xot::new();
        let root = xot.parse("<a><one/><three/></a>").unwrap();
        let to_insert = xot.parse("<two/>").unwrap();
        let to_insert = xot.document_element(to_insert).unwrap();

        let edits = [Edit::Insert(
            InsertPosition {
                parent_node_id: 1,
                child_position: 1,
                intervening: 0,
            },
            InsertContent::XmlNode(to_insert),
        )];
        apply_doc(&mut xot, root, &edits);
        assert_eq!(xot.serialize_to_string(root), "<a><one/><two/><three/></a>");
    }

    #[test]
    fn test_insert_end() {
        let mut xot = Xot::new();
        let root = xot.parse("<a><one/><two/></a>").unwrap();
        let to_insert = xot.parse("<three/>").unwrap();
        let to_insert = xot.document_element(to_insert).unwrap();

        let edits = [Edit::Insert(
            InsertPosition {
                parent_node_id: 1,
                child_position: 2,
                intervening: 2,
            },
            InsertContent::XmlNode(to_insert),
        )];
        apply_doc(&mut xot, root, &edits);
        assert_eq!(xot.serialize_to_string(root), "<a><one/><two/><three/></a>");
    }

    #[test]
    fn test_insert_text_beginning() {
        let mut xot = Xot::new();
        let root = xot.parse("<a><two/><three/></a>").unwrap();
        let edits = [Edit::Insert(
            InsertPosition {
                parent_node_id: 1,
                child_position: 0,
                intervening: 0,
            },
            InsertContent::Text("One".to_owned()),
        )];
        apply_doc(&mut xot, root, &edits);
        assert_eq!(xot.serialize_to_string(root), "<a>One<two/><three/></a>");
    }

    #[test]
    fn test_insert_text_middle() {
        let mut xot = Xot::new();
        let root = xot.parse("<a><one/><three/></a>").unwrap();
        let edits = [Edit::Insert(
            InsertPosition {
                parent_node_id: 1,
                child_position: 1,
                intervening: 1,
            },
            InsertContent::Text("Two".to_owned()),
        )];
        apply_doc(&mut xot, root, &edits);
        assert_eq!(xot.serialize_to_string(root), "<a><one/>Two<three/></a>");
    }

    #[test]
    fn test_insert_text_end() {
        let mut xot = Xot::new();
        let root = xot.parse("<a><one/><two/></a>").unwrap();
        let edits = [Edit::Insert(
            InsertPosition {
                parent_node_id: 1,
                child_position: 2,
                intervening: 2,
            },
            InsertContent::Text("Three".to_owned()),
        )];
        apply_doc(&mut xot, root, &edits);
        assert_eq!(xot.serialize_to_string(root), "<a><one/><two/>Three</a>");
    }

    #[test]
    fn test_delete_then_insert() {
        let mut xot = Xot::new();
        let root = xot.parse("<a><b/><c/></a>").unwrap();
        let to_insert = xot.parse("<d/>").unwrap();
        let to_insert = xot.document_element(to_insert).unwrap();

        let edits = [
            Edit::Delete(2),
            Edit::Insert(
                InsertPosition {
                    parent_node_id: 1,
                    child_position: 0,
                    intervening: 0,
                },
                InsertContent::XmlNode(to_insert),
            ),
        ];
        apply_doc(&mut xot, root, &edits);
        assert_eq!(xot.serialize_to_string(root), "<a><d/><c/></a>");
    }

    #[test]
    fn test_delete_then_insert_after() {
        let mut xot = Xot::new();
        let root = xot.parse("<a><b/><c/><d/></a>").unwrap();
        let to_insert = xot.parse("<x/>").unwrap();
        let to_insert = xot.document_element(to_insert).unwrap();

        let edits = [
            Edit::Delete(2),
            Edit::Insert(
                InsertPosition {
                    parent_node_id: 1,
                    child_position: 1,
                    intervening: 0,
                },
                InsertContent::XmlNode(to_insert),
            ),
        ];
        apply_doc(&mut xot, root, &edits);
        assert_eq!(xot.serialize_to_string(root), "<a><c/><x/><d/></a>");
    }
}