1use ahash::AHashMap;
2use std::borrow::Cow;
3use xot::{Element, NameId, Node, Value, Xot};
4
5#[derive(Debug, Hash, PartialEq, Eq)]
7pub(crate) struct ElementSignature<'a> {
8 name: NameId,
9 attributes: Cow<'a, [(NameId, Cow<'a, str>)]>,
10}
11
12impl<'a> ElementSignature<'a> {
13 fn new(element: &'a Element) -> Self {
14 let mut attributes: Vec<(NameId, Cow<'a, str>)> = element
15 .attributes()
16 .iter()
17 .map(|(name, value)| (*name, value.into()))
18 .collect();
19 attributes.sort();
21 Self {
22 name: element.name(),
23 attributes: attributes.into(),
24 }
25 }
26}
27
28#[derive(Debug, Hash, PartialEq, Eq)]
29pub(crate) enum NodeSignature<'a> {
30 Element(ElementSignature<'a>),
31 Text(Cow<'a, str>),
32 Root,
33 ProcessingInstruction(Cow<'a, str>, Option<Cow<'a, str>>),
34 Comment(Cow<'a, str>),
35}
36
37pub(crate) struct Signatures<T: Eq + std::hash::Hash> {
38 map: AHashMap<T, u32>,
39 next_id: u32,
40}
41
42impl<T: Eq + std::hash::Hash> Signatures<T> {
43 pub(crate) fn new() -> Self {
44 Self {
45 map: AHashMap::new(),
46 next_id: 0,
47 }
48 }
49
50 fn get_id(&mut self, signature: T) -> u32 {
51 if let Some(id) = self.map.get(&signature) {
52 *id
53 } else {
54 let id = self.next_id;
55 self.next_id += 1;
56 self.map.insert(signature, id);
57 id
58 }
59 }
60}
61
62#[derive(Debug, Hash, PartialEq, Eq)]
66pub(crate) struct TreeSignature<'a> {
67 node_signature_id: u32,
68 tree_signature_ids: Cow<'a, [u32]>,
69}
70
71#[derive(Debug, PartialEq, Eq, Copy, Clone)]
75pub enum Status {
76 Equal(u32),
80 Update(u32),
83 Different,
87}
88
89#[derive(Debug)]
90pub(crate) struct Vnode {
91 pub(crate) node: Node,
92 pub(crate) parent_id: Option<usize>,
94 pub(crate) first_child: Option<usize>,
95 pub(crate) next_sibling: Option<usize>,
96 pub(crate) node_signature_id: u32,
97 pub tree_signature_id: u32,
98 pub descendant_count: u32,
99 pub weight: u32,
100 pub status: Status,
101}
102
103type NodeToIndex = AHashMap<Node, usize>;
104
105impl Vnode {
106 fn new(xot: &Xot, node_to_index: &NodeToIndex, node: Node, node_signature_id: u32) -> Self {
107 Self {
108 node,
109 parent_id: xot.parent(node).map(|n| node_to_index[&n]),
110 first_child: xot.first_child(node).map(|n| node_to_index[&n]),
111 next_sibling: xot.next_sibling(node).map(|n| node_to_index[&n]),
112 node_signature_id,
113 tree_signature_id: 0,
114 descendant_count: 0,
115 weight: 0,
116 status: Status::Different,
117 }
118 }
119
120 fn get_children_tree_signature_ids(&self, vnodes: &[Vnode]) -> Vec<u32> {
121 children(vnodes, self)
122 .map(|vnode| vnode.tree_signature_id)
123 .collect::<Vec<_>>()
124 }
125}
126
127pub(crate) struct Vtree {
128 pub(crate) nodes: Vec<Vnode>,
129}
130
131impl Vtree {
132 pub(crate) fn new<'a>(
133 xot: &'a Xot,
134 root: Node,
135 node_signatures: &mut Signatures<NodeSignature<'a>>,
136 tree_signatures: &mut Signatures<TreeSignature<'a>>,
137 ) -> Self {
138 let node_to_index = xot
140 .descendants(root)
141 .enumerate()
142 .map(|(i, n)| (n, i))
143 .collect::<AHashMap<_, _>>();
144
145 let mut vnodes = xot
147 .descendants(root)
148 .map(|node| {
149 Vnode::new(
150 xot,
151 &node_to_index,
152 node,
153 get_node_signature_id(xot, node, node_signatures),
154 )
155 })
156 .collect::<Vec<_>>();
157
158 for i in (0..vnodes.len()).rev() {
160 let vnode = &vnodes[i];
162
163 let ids: Vec<u32> = vnode.get_children_tree_signature_ids(&vnodes);
166 let tree_signature = TreeSignature {
167 node_signature_id: vnode.node_signature_id,
168 tree_signature_ids: ids.into(),
169 };
170 let tree_signature_id = tree_signatures.get_id(tree_signature);
171
172 let mut weight = vnode.weight;
175 weight += match xot.value(vnode.node) {
176 Value::Element(_) => 1,
181 Value::Text(text) => text.get().len() as u32,
182 Value::Root => 0,
183 Value::ProcessingInstruction(_) => 1,
184 Value::Comment(_) => 1,
185 };
186 let descendant_count = vnode.descendant_count;
188 let parent_id = vnode.parent_id;
189 let vnode = &mut vnodes[i];
191 vnode.tree_signature_id = tree_signature_id;
193 vnode.weight = weight;
194
195 if let Some(parent_id) = parent_id {
197 let parent = &mut vnodes[parent_id];
198 parent.weight += weight;
199 parent.descendant_count += descendant_count + 1;
200 }
201 }
202 Self { nodes: vnodes }
203 }
204
205 pub(crate) fn get(&self, index: usize) -> &Vnode {
206 &self.nodes[index]
207 }
208
209 pub(crate) fn children(&self, parent: &Vnode) -> impl Iterator<Item = &Vnode> {
210 children(&self.nodes, parent)
211 }
212}
213
214fn children<'a>(vnodes: &'a [Vnode], parent: &Vnode) -> impl Iterator<Item = &'a Vnode> {
215 let mut current = parent.first_child;
216 std::iter::from_fn(move || {
217 if let Some(current_node) = current {
218 let node = &vnodes[current_node];
219 current = node.next_sibling;
220 Some(node)
221 } else {
222 None
223 }
224 })
225}
226
227fn get_node_signature_id<'a>(
228 xot: &'a Xot,
229 node: Node,
230 node_signatures: &mut Signatures<NodeSignature<'a>>,
231) -> u32 {
232 match xot.value(node) {
233 Value::Element(element) => {
234 node_signatures.get_id(NodeSignature::Element(ElementSignature::new(element)))
235 }
236 Value::Text(text) => node_signatures.get_id(NodeSignature::Text(text.get().into())),
237 Value::Root => node_signatures.get_id(NodeSignature::Root),
238 Value::ProcessingInstruction(pi) => node_signatures.get_id(
239 NodeSignature::ProcessingInstruction(pi.target().into(), pi.data().map(|v| v.into())),
240 ),
241 Value::Comment(comment) => {
242 node_signatures.get_id(NodeSignature::Comment(comment.get().into()))
243 }
244 }
245}
246
247#[cfg(test)]
248mod tests {
249 use super::*;
250 use crate::comparison::Comparison;
251
252 #[test]
253 fn test_root_element() {
254 let mut xot = Xot::new();
255 let doc_a = xot.parse("<container />").unwrap();
256 let doc_b = xot.parse("<container />").unwrap();
257 let comparison = Comparison::new(&xot, doc_a, doc_b);
258 let (a, b) = comparison.vtrees();
259 assert_eq!(a.nodes.len(), b.nodes.len());
260 assert_eq!(a.nodes[0].node_signature_id, b.nodes[0].node_signature_id);
262 assert_eq!(a.nodes[1].node_signature_id, b.nodes[1].node_signature_id);
264 }
265
266 #[test]
267 fn test_multiple_elements() {
268 let mut xot = Xot::new();
269 let doc_a = xot.parse("<container><a/><b/></container>").unwrap();
270 let doc_b = xot.parse("<container><a/><c/></container>").unwrap();
271 let comparison = Comparison::new(&xot, doc_a, doc_b);
272 let (a, b) = comparison.vtrees();
273 assert_eq!(a.nodes.len(), b.nodes.len());
274 assert_eq!(a.nodes[0].node_signature_id, b.nodes[0].node_signature_id);
276 assert_eq!(a.nodes[1].node_signature_id, b.nodes[1].node_signature_id);
278 assert_eq!(a.nodes[2].node_signature_id, b.nodes[2].node_signature_id);
280 assert_ne!(a.nodes[3].node_signature_id, b.nodes[3].node_signature_id);
282 }
283
284 #[test]
285 fn test_multiple_elements_attribute_difference() {
286 let mut xot = Xot::new();
287 let doc_a = xot
288 .parse("<container><a s='A'/><b s='B'/></container>")
289 .unwrap();
290 let doc_b = xot
291 .parse("<container><a s='A'/><b s='C'/></container>")
292 .unwrap();
293 let comparison = Comparison::new(&xot, doc_a, doc_b);
294 let (a, b) = comparison.vtrees();
295 assert_eq!(a.nodes.len(), b.nodes.len());
296 assert_eq!(a.nodes[0].node_signature_id, b.nodes[0].node_signature_id);
298 assert_eq!(a.nodes[1].node_signature_id, b.nodes[1].node_signature_id);
300 assert_eq!(a.nodes[2].node_signature_id, b.nodes[2].node_signature_id);
302 assert_ne!(a.nodes[3].node_signature_id, b.nodes[3].node_signature_id);
304 }
305
306 #[test]
307 fn test_text_nodes() {
308 let mut xot = Xot::new();
309 let doc_a = xot
310 .parse("<container><a>A</a><b>B</b></container>")
311 .unwrap();
312 let doc_b = xot
313 .parse("<container><a>A</a><b>C</b></container>")
314 .unwrap();
315 let comparison = Comparison::new(&xot, doc_a, doc_b);
316 let (a, b) = comparison.vtrees();
317 assert_eq!(a.nodes.len(), b.nodes.len());
318 assert_eq!(a.nodes[0].node_signature_id, b.nodes[0].node_signature_id);
320 assert_eq!(a.nodes[1].node_signature_id, b.nodes[1].node_signature_id);
322 assert_eq!(a.nodes[2].node_signature_id, b.nodes[2].node_signature_id);
324 assert_eq!(a.nodes[3].node_signature_id, b.nodes[3].node_signature_id);
326 assert_eq!(a.nodes[4].node_signature_id, b.nodes[4].node_signature_id);
328 assert_ne!(a.nodes[5].node_signature_id, b.nodes[5].node_signature_id);
330 }
331
332 #[test]
333 fn test_simple_tree_signature_ids() {
334 let mut xot = Xot::new();
335 let doc_a = xot.parse("<a />").unwrap();
336 let doc_b = xot.parse("<b />").unwrap();
337 let comparison = Comparison::new(&xot, doc_a, doc_b);
338 let (a, b) = comparison.vtrees();
339 assert_ne!(a.nodes[0].tree_signature_id, b.nodes[0].tree_signature_id);
341 assert_eq!(a.nodes[0].node_signature_id, b.nodes[0].node_signature_id);
342 assert_ne!(a.nodes[1].tree_signature_id, b.nodes[1].tree_signature_id);
344 assert_ne!(a.nodes[1].node_signature_id, b.nodes[1].node_signature_id);
345 }
346
347 #[test]
348 fn test_tree_signature_ids() {
349 let mut xot = Xot::new();
350 let doc_a = xot
351 .parse("<container><a>A</a><a>A</a></container>")
352 .unwrap();
353 let doc_b = xot
354 .parse("<container><a>A</a><a>Aprime</a></container>")
355 .unwrap();
356 let comparison = Comparison::new(&xot, doc_a, doc_b);
357 let (a, b) = comparison.vtrees();
358 assert_ne!(a.nodes[0].tree_signature_id, b.nodes[0].tree_signature_id);
360 assert_ne!(a.nodes[1].tree_signature_id, b.nodes[1].tree_signature_id);
362 assert_eq!(a.nodes[2].tree_signature_id, b.nodes[2].tree_signature_id);
364 assert_eq!(a.nodes[2].tree_signature_id, a.nodes[4].tree_signature_id);
366 assert_ne!(a.nodes[4].tree_signature_id, b.nodes[4].tree_signature_id);
368 }
369
370 #[test]
371 fn test_element_weights() {
372 let mut xot = Xot::new();
373 let doc_a = xot.parse("<a />").unwrap();
374 let doc_b = xot.parse("<a><b/></a>").unwrap();
375 let comparison = Comparison::new(&xot, doc_a, doc_b);
376 let (a, b) = comparison.vtrees();
377 assert_eq!(a.nodes[0].weight, 1);
379 assert_eq!(b.nodes[0].weight, 2);
380 assert_eq!(a.nodes[1].weight, 1);
382 assert_eq!(b.nodes[1].weight, 2);
383 }
384
385 #[test]
386 fn test_weights_with_text() {
387 let mut xot = Xot::new();
388 let doc_a = xot.parse("<a>AAAA</a>").unwrap();
389 let doc_b = xot.parse("<a>BBB</a>").unwrap();
390 let comparison = Comparison::new(&xot, doc_a, doc_b);
391 let (a, b) = comparison.vtrees();
392 assert_eq!(a.nodes[0].weight, 5);
394 assert_eq!(b.nodes[0].weight, 4);
395 assert_eq!(a.nodes[1].weight, 5);
397 assert_eq!(b.nodes[1].weight, 4);
398 assert_eq!(a.nodes[2].weight, 4);
400 assert_eq!(b.nodes[2].weight, 3);
401 }
402
403 #[test]
404 fn test_descendant_count() {
405 let mut xot = Xot::new();
406 let doc_a = xot.parse("<a><b><c/></b></a>").unwrap();
407 let doc_b = xot.parse("<a><b/></a>").unwrap();
408 let comparison = Comparison::new(&xot, doc_a, doc_b);
409 let (a, b) = comparison.vtrees();
410 assert_eq!(a.nodes[0].descendant_count, 3);
412 assert_eq!(b.nodes[0].descendant_count, 2);
413 assert_eq!(a.nodes[1].descendant_count, 2);
415 assert_eq!(b.nodes[1].descendant_count, 1);
416 }
417
418 #[test]
419 fn test_vtree_children() {
420 let mut xot = Xot::new();
421 let doc_a = xot.parse("<a><b><d/></b><c></c></a>").unwrap();
422 let doc_b = xot.parse("<a><b/></a>").unwrap();
423
424 let comparison = Comparison::new(&xot, doc_a, doc_b);
425 let (a, _b) = comparison.vtrees();
426
427 let children_root = a.children(&a.nodes[0]).collect::<Vec<_>>();
428 assert_eq!(children_root.len(), 1);
429 let children_doc_el = a.children(children_root[0]).collect::<Vec<_>>();
430 assert_eq!(children_doc_el.len(), 2);
431 assert_eq!(a.children(children_doc_el[0]).count(), 1);
432 assert_eq!(a.children(children_doc_el[1]).count(), 0);
433 }
434
435 #[test]
436 fn test_vtree_processing_instruction() {
437 let mut xot = Xot::new();
438 let doc_a = xot.parse("<a><?pi?><?pi foo?></a>").unwrap();
439 let doc_b = xot.parse("<a><?pi?><?pi bar?></a>").unwrap();
440
441 let comparison = Comparison::new(&xot, doc_a, doc_b);
442 let (a, b) = comparison.vtrees();
443
444 assert_eq!(a.nodes[2].node_signature_id, b.nodes[2].node_signature_id);
445 assert_ne!(a.nodes[3].node_signature_id, b.nodes[3].node_signature_id);
446 }
447
448 #[test]
449 fn test_vtree_comment() {
450 let mut xot = Xot::new();
451 let doc_a = xot.parse("<a><!--foo--><!--a--></a>").unwrap();
452 let doc_b = xot.parse("<a><!--foo--><!--b--></a>").unwrap();
453
454 let comparison = Comparison::new(&xot, doc_a, doc_b);
455 let (a, b) = comparison.vtrees();
456
457 assert_eq!(a.nodes[2].node_signature_id, b.nodes[2].node_signature_id);
458 assert_ne!(a.nodes[3].node_signature_id, b.nodes[3].node_signature_id);
459 }
460}