use ahash::AHashMap;
use std::borrow::Cow;
use xot::{Element, NameId, Node, Value, Xot};
#[derive(Debug, Hash, PartialEq, Eq)]
pub(crate) struct ElementSignature<'a> {
name: NameId,
attributes: Cow<'a, [(NameId, Cow<'a, str>)]>,
}
impl<'a> ElementSignature<'a> {
fn new(element: &'a Element) -> Self {
let mut attributes: Vec<(NameId, Cow<'a, str>)> = element
.attributes()
.iter()
.map(|(name, value)| (*name, value.into()))
.collect();
attributes.sort();
Self {
name: element.name(),
attributes: attributes.into(),
}
}
}
#[derive(Debug, Hash, PartialEq, Eq)]
pub(crate) enum NodeSignature<'a> {
Element(ElementSignature<'a>),
Text(Cow<'a, str>),
Root,
}
pub(crate) struct Signatures<T: Eq + std::hash::Hash> {
map: AHashMap<T, u32>,
next_id: u32,
}
impl<T: Eq + std::hash::Hash> Signatures<T> {
pub(crate) fn new() -> Self {
Self {
map: AHashMap::new(),
next_id: 0,
}
}
fn get_id(&mut self, signature: T) -> u32 {
if let Some(id) = self.map.get(&signature) {
*id
} else {
let id = self.next_id;
self.next_id += 1;
self.map.insert(signature, id);
id
}
}
}
#[derive(Debug, Hash, PartialEq, Eq)]
pub(crate) struct TreeSignature<'a> {
node_signature_id: u32,
tree_signature_ids: Cow<'a, [u32]>,
}
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum Status {
Equal(u32),
Update(u32),
Different,
}
#[derive(Debug)]
pub(crate) struct Vnode {
pub(crate) node: Node,
pub(crate) parent_id: Option<usize>,
pub(crate) first_child: Option<usize>,
pub(crate) next_sibling: Option<usize>,
pub(crate) node_signature_id: u32,
pub tree_signature_id: u32,
pub descendant_count: u32,
pub weight: u32,
pub status: Status,
}
type NodeToIndex = AHashMap<Node, usize>;
impl Vnode {
fn new(xot: &Xot, node_to_index: &NodeToIndex, node: Node, node_signature_id: u32) -> Self {
Self {
node,
parent_id: xot.parent(node).map(|n| node_to_index[&n]),
first_child: xot.first_child(node).map(|n| node_to_index[&n]),
next_sibling: xot.next_sibling(node).map(|n| node_to_index[&n]),
node_signature_id,
tree_signature_id: 0,
descendant_count: 0,
weight: 0,
status: Status::Different,
}
}
fn get_children_tree_signature_ids(&self, vnodes: &[Vnode]) -> Vec<u32> {
children(vnodes, self)
.map(|vnode| vnode.tree_signature_id)
.collect::<Vec<_>>()
}
}
pub(crate) struct Vtree {
pub(crate) nodes: Vec<Vnode>,
}
impl Vtree {
pub(crate) fn new<'a>(
xot: &'a Xot,
root: Node,
node_signatures: &mut Signatures<NodeSignature<'a>>,
tree_signatures: &mut Signatures<TreeSignature<'a>>,
) -> Self {
let node_to_index = xot
.descendants(root)
.enumerate()
.map(|(i, n)| (n, i))
.collect::<AHashMap<_, _>>();
let mut vnodes = xot
.descendants(root)
.map(|node| {
Vnode::new(
xot,
&node_to_index,
node,
get_node_signature_id(xot, node, node_signatures),
)
})
.collect::<Vec<_>>();
for i in (0..vnodes.len()).rev() {
let vnode = &vnodes[i];
let ids: Vec<u32> = vnode.get_children_tree_signature_ids(&vnodes);
let tree_signature = TreeSignature {
node_signature_id: vnode.node_signature_id,
tree_signature_ids: ids.into(),
};
let tree_signature_id = tree_signatures.get_id(tree_signature);
let mut weight = vnode.weight;
weight += match xot.value(vnode.node) {
Value::Element(_) => 1,
Value::Text(text) => text.get().len() as u32,
Value::Root => 0,
_ => {
panic!("Node type not supported yet");
}
};
let descendant_count = vnode.descendant_count;
let parent_id = vnode.parent_id;
let vnode = &mut vnodes[i];
vnode.tree_signature_id = tree_signature_id;
vnode.weight = weight;
if let Some(parent_id) = parent_id {
let parent = &mut vnodes[parent_id];
parent.weight += weight;
parent.descendant_count += descendant_count + 1;
}
}
Self { nodes: vnodes }
}
pub(crate) fn get(&self, index: usize) -> &Vnode {
&self.nodes[index]
}
pub(crate) fn children(&self, parent: &Vnode) -> impl Iterator<Item = &Vnode> {
children(&self.nodes, parent)
}
}
fn children<'a>(vnodes: &'a [Vnode], parent: &Vnode) -> impl Iterator<Item = &'a Vnode> {
let mut current = parent.first_child;
std::iter::from_fn(move || {
if let Some(current_node) = current {
let node = &vnodes[current_node];
current = node.next_sibling;
Some(node)
} else {
None
}
})
}
fn get_node_signature_id<'a>(
xot: &'a Xot,
node: Node,
node_signatures: &mut Signatures<NodeSignature<'a>>,
) -> u32 {
match xot.value(node) {
Value::Element(element) => {
node_signatures.get_id(NodeSignature::Element(ElementSignature::new(element)))
}
Value::Text(text) => node_signatures.get_id(NodeSignature::Text(text.get().into())),
Value::Root => node_signatures.get_id(NodeSignature::Root),
_ => {
panic!("Node type not supported yet")
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::comparison::Comparison;
#[test]
fn test_root_element() {
let mut xot = Xot::new();
let doc_a = xot.parse("<container />").unwrap();
let doc_b = xot.parse("<container />").unwrap();
let comparison = Comparison::new(&xot, doc_a, doc_b);
let (a, b) = comparison.vtrees();
assert_eq!(a.nodes.len(), b.nodes.len());
assert_eq!(a.nodes[0].node_signature_id, b.nodes[0].node_signature_id);
assert_eq!(a.nodes[1].node_signature_id, b.nodes[1].node_signature_id);
}
#[test]
fn test_multiple_elements() {
let mut xot = Xot::new();
let doc_a = xot.parse("<container><a/><b/></container>").unwrap();
let doc_b = xot.parse("<container><a/><c/></container>").unwrap();
let comparison = Comparison::new(&xot, doc_a, doc_b);
let (a, b) = comparison.vtrees();
assert_eq!(a.nodes.len(), b.nodes.len());
assert_eq!(a.nodes[0].node_signature_id, b.nodes[0].node_signature_id);
assert_eq!(a.nodes[1].node_signature_id, b.nodes[1].node_signature_id);
assert_eq!(a.nodes[2].node_signature_id, b.nodes[2].node_signature_id);
assert_ne!(a.nodes[3].node_signature_id, b.nodes[3].node_signature_id);
}
#[test]
fn test_multiple_elements_attribute_difference() {
let mut xot = Xot::new();
let doc_a = xot
.parse("<container><a s='A'/><b s='B'/></container>")
.unwrap();
let doc_b = xot
.parse("<container><a s='A'/><b s='C'/></container>")
.unwrap();
let comparison = Comparison::new(&xot, doc_a, doc_b);
let (a, b) = comparison.vtrees();
assert_eq!(a.nodes.len(), b.nodes.len());
assert_eq!(a.nodes[0].node_signature_id, b.nodes[0].node_signature_id);
assert_eq!(a.nodes[1].node_signature_id, b.nodes[1].node_signature_id);
assert_eq!(a.nodes[2].node_signature_id, b.nodes[2].node_signature_id);
assert_ne!(a.nodes[3].node_signature_id, b.nodes[3].node_signature_id);
}
#[test]
fn test_text_nodes() {
let mut xot = Xot::new();
let doc_a = xot
.parse("<container><a>A</a><b>B</b></container>")
.unwrap();
let doc_b = xot
.parse("<container><a>A</a><b>C</b></container>")
.unwrap();
let comparison = Comparison::new(&xot, doc_a, doc_b);
let (a, b) = comparison.vtrees();
assert_eq!(a.nodes.len(), b.nodes.len());
assert_eq!(a.nodes[0].node_signature_id, b.nodes[0].node_signature_id);
assert_eq!(a.nodes[1].node_signature_id, b.nodes[1].node_signature_id);
assert_eq!(a.nodes[2].node_signature_id, b.nodes[2].node_signature_id);
assert_eq!(a.nodes[3].node_signature_id, b.nodes[3].node_signature_id);
assert_eq!(a.nodes[4].node_signature_id, b.nodes[4].node_signature_id);
assert_ne!(a.nodes[5].node_signature_id, b.nodes[5].node_signature_id);
}
#[test]
fn test_simple_tree_signature_ids() {
let mut xot = Xot::new();
let doc_a = xot.parse("<a />").unwrap();
let doc_b = xot.parse("<b />").unwrap();
let comparison = Comparison::new(&xot, doc_a, doc_b);
let (a, b) = comparison.vtrees();
assert_ne!(a.nodes[0].tree_signature_id, b.nodes[0].tree_signature_id);
assert_eq!(a.nodes[0].node_signature_id, b.nodes[0].node_signature_id);
assert_ne!(a.nodes[1].tree_signature_id, b.nodes[1].tree_signature_id);
assert_ne!(a.nodes[1].node_signature_id, b.nodes[1].node_signature_id);
}
#[test]
fn test_tree_signature_ids() {
let mut xot = Xot::new();
let doc_a = xot
.parse("<container><a>A</a><a>A</a></container>")
.unwrap();
let doc_b = xot
.parse("<container><a>A</a><a>Aprime</a></container>")
.unwrap();
let comparison = Comparison::new(&xot, doc_a, doc_b);
let (a, b) = comparison.vtrees();
assert_ne!(a.nodes[0].tree_signature_id, b.nodes[0].tree_signature_id);
assert_ne!(a.nodes[1].tree_signature_id, b.nodes[1].tree_signature_id);
assert_eq!(a.nodes[2].tree_signature_id, b.nodes[2].tree_signature_id);
assert_eq!(a.nodes[2].tree_signature_id, a.nodes[4].tree_signature_id);
assert_ne!(a.nodes[4].tree_signature_id, b.nodes[4].tree_signature_id);
}
#[test]
fn test_element_weights() {
let mut xot = Xot::new();
let doc_a = xot.parse("<a />").unwrap();
let doc_b = xot.parse("<a><b/></a>").unwrap();
let comparison = Comparison::new(&xot, doc_a, doc_b);
let (a, b) = comparison.vtrees();
assert_eq!(a.nodes[0].weight, 1);
assert_eq!(b.nodes[0].weight, 2);
assert_eq!(a.nodes[1].weight, 1);
assert_eq!(b.nodes[1].weight, 2);
}
#[test]
fn test_weights_with_text() {
let mut xot = Xot::new();
let doc_a = xot.parse("<a>AAAA</a>").unwrap();
let doc_b = xot.parse("<a>BBB</a>").unwrap();
let comparison = Comparison::new(&xot, doc_a, doc_b);
let (a, b) = comparison.vtrees();
assert_eq!(a.nodes[0].weight, 5);
assert_eq!(b.nodes[0].weight, 4);
assert_eq!(a.nodes[1].weight, 5);
assert_eq!(b.nodes[1].weight, 4);
assert_eq!(a.nodes[2].weight, 4);
assert_eq!(b.nodes[2].weight, 3);
}
#[test]
fn test_descendant_count() {
let mut xot = Xot::new();
let doc_a = xot.parse("<a><b><c/></b></a>").unwrap();
let doc_b = xot.parse("<a><b/></a>").unwrap();
let comparison = Comparison::new(&xot, doc_a, doc_b);
let (a, b) = comparison.vtrees();
assert_eq!(a.nodes[0].descendant_count, 3);
assert_eq!(b.nodes[0].descendant_count, 2);
assert_eq!(a.nodes[1].descendant_count, 2);
assert_eq!(b.nodes[1].descendant_count, 1);
}
#[test]
fn test_vtree_children() {
let mut xot = Xot::new();
let doc_a = xot.parse("<a><b><d/></b><c></c></a>").unwrap();
let doc_b = xot.parse("<a><b/></a>").unwrap();
let comparison = Comparison::new(&xot, doc_a, doc_b);
let (a, _b) = comparison.vtrees();
let children_root = a.children(&a.nodes[0]).collect::<Vec<_>>();
assert_eq!(children_root.len(), 1);
let children_doc_el = a.children(children_root[0]).collect::<Vec<_>>();
assert_eq!(children_doc_el.len(), 2);
assert_eq!(a.children(children_doc_el[0]).count(), 1);
assert_eq!(a.children(children_doc_el[1]).count(), 0);
}
}