1use std::{
2 collections::{HashMap, HashSet},
3 fmt::Display,
4};
5
6use crate::tree::{XNode, XTree};
7use md5::Digest;
8
9trait Concat {
10 fn concat(self, other: Self) -> Self;
11}
12
13impl Concat for Digest {
14 fn concat(mut self, other: Self) -> Self {
15 for i in 0..self.0.len() {
16 self.0[i] = self.0[i].wrapping_add(other.0[i]);
17 }
18 self
19 }
20}
21
22#[derive(Debug, Clone)]
23pub enum Edit<'a, 'tree1, 'tree2> {
24 Insert {
25 child_node: XNode<'a, 'tree2>,
26 to_node: XNode<'a, 'tree1>,
27 },
28 Delete(XNode<'a, 'tree1>),
29 Update {
30 old: XNode<'a, 'tree1>,
31 new: XNode<'a, 'tree2>,
32 },
33 ReplaceRoot,
34}
35
36impl Display for Edit<'_, '_, '_> {
37 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
38 match self {
39 Edit::Insert {
40 child_node,
41 to_node,
42 } => {
43 write!(
44 f,
45 "insert node {} to node {}",
46 child_node.id(),
47 to_node.id()
48 )
49 }
50 Edit::Delete(node) => write!(f, "delete node {}", node.id()),
51 Edit::Update { old, new } => write!(
52 f,
53 "update node {}: {:?} -> {:?}",
54 old.id(),
55 old.value().unwrap().trim(),
56 new.value().unwrap().trim()
57 ),
58 Edit::ReplaceRoot => write!(f, "replace root node"),
59 }
60 }
61}
62
63type Diff<'a, 'tree1, 'tree2> = Vec<Edit<'a, 'tree1, 'tree2>>;
64
65pub fn diff<'a, 'doc1, 'doc2>(
67 tree1: &'doc1 XTree<'doc1>,
68 tree2: &'doc2 XTree<'doc2>,
69) -> Diff<'a, 'doc1, 'doc2> {
70 fn diff_node<'a, 'doc1, 'doc2>(
71 node1: XNode<'a, 'doc1>,
72 ht1: &HashMap<String, Digest>,
73 node2: XNode<'a, 'doc2>,
74 ht2: &HashMap<String, Digest>,
75 ) -> Diff<'a, 'doc1, 'doc2> {
76 if ht1.get(&node1.id().to_string()) == ht2.get(&node2.id().to_string()) {
77 return Vec::new();
78 }
79
80 if (node1.is_attribute() && node2.is_attribute()) || (node1.is_text() && node2.is_text()) {
82 return vec![Edit::Update {
83 old: node1,
84 new: node2,
85 }];
86 }
87
88 let mut iht1: HashMap<_, _> = node1
89 .children()
90 .iter()
91 .map(|n| (*ht1.get(&n.id().to_string()).unwrap(), *n))
92 .collect();
93 let mut iht2: HashMap<_, _> = node2
94 .children()
95 .iter()
96 .map(|n| (*ht2.get(&n.id().to_string()).unwrap(), *n))
97 .collect();
98 let children_hashes1: HashSet<_> = iht1.keys().copied().collect();
99 let children_hashes2: HashSet<_> = iht2.keys().copied().collect();
100 let same_hashes: HashSet<_> = children_hashes1.intersection(&children_hashes2).collect();
101 iht1.retain(|k, _| !same_hashes.contains(&k));
102 iht2.retain(|k, _| !same_hashes.contains(&k));
103 let mut remaining_children1: HashSet<_> = iht1.into_values().collect();
104 let mut remaining_children2: HashSet<_> = iht2.into_values().collect();
105 let mut diff_pairs = Vec::new();
106 for n1 in &remaining_children1 {
107 for n2 in &remaining_children2 {
108 if n1.signature() == n2.signature() {
109 diff_pairs.push((*n1, *n2, diff_node(*n1, ht1, *n2, ht2)));
110 }
111 }
112 }
113 diff_pairs.sort_by_key(|item| item.2.len());
114 let mut diff = Vec::new();
115 for (n1, n2, mut d) in diff_pairs {
116 if remaining_children1.contains(&n1) && remaining_children2.contains(&n2) {
117 diff.append(&mut d);
118 remaining_children1.remove(&n1);
119 remaining_children2.remove(&n2);
120 }
121 }
122 for n1 in remaining_children1 {
123 diff.push(Edit::Delete(n1));
124 }
125 for n2 in remaining_children2 {
126 diff.push(Edit::Insert {
127 child_node: n2,
128 to_node: node1,
129 });
130 }
131 diff
132 }
133 if tree1.root().signature() != tree2.root().signature() {
134 return vec![Edit::ReplaceRoot];
135 }
136 let ht1 = calculate_hash_table(tree1);
137 let ht2 = calculate_hash_table(tree2);
138 diff_node(tree1.root(), &ht1, tree2.root(), &ht2)
139}
140
141fn calculate_hash_table(tree: &XTree) -> HashMap<String, Digest> {
142 fn hash_of_node(node: XNode, ht: &mut HashMap<String, Digest>) -> Digest {
143 let hash = if node.children().is_empty() {
144 node.hash()
145 } else {
146 let mut acc = node.hash();
147 for child in node.children() {
148 acc = acc.concat(hash_of_node(child, ht));
149 }
150 acc
151 };
152 ht.insert(node.id().to_string(), hash);
153 hash
154 }
155 let mut hash_table = HashMap::new();
156 hash_of_node(tree.root(), &mut hash_table);
157 hash_table
158}
159
160#[cfg(test)]
161mod test {
162 #[cfg(feature = "print")]
163 use crate::tree::print::{PrintTreeOptions, print_tree};
164
165 use super::*;
166 use std::fs;
167
168 #[test]
169 fn test_calculate_hash_table_same_tree() {
170 let text1 = fs::read_to_string("test/file1.xml").unwrap();
171 let tree1 = XTree::parse(&text1).unwrap();
172 let ht1 = calculate_hash_table(&tree1);
173
174 let text2 = fs::read_to_string("test/file1.xml").unwrap();
175 let tree2 = XTree::parse(&text2).unwrap();
176 let ht2 = calculate_hash_table(&tree2);
177
178 assert_eq!(
179 ht1.get(&tree1.root().id().to_string()),
180 ht2.get(&tree2.root().id().to_string())
181 );
182 }
183
184 #[test]
185 fn test_calculate_hash_table_different_tree() {
186 let text1 = fs::read_to_string("test/file1.xml").unwrap();
187 let tree1 = XTree::parse(&text1).unwrap();
188 let ht1 = calculate_hash_table(&tree1);
189
190 let text2 = fs::read_to_string("test/file2.xml").unwrap();
191 let tree2 = XTree::parse(&text2).unwrap();
192 let ht2 = calculate_hash_table(&tree2);
193
194 assert_ne!(
195 ht1.get(&tree1.root().id().to_string()),
196 ht2.get(&tree2.root().id().to_string())
197 );
198 }
199
200 #[test]
201 fn test_diff() {
202 let text1 = fs::read_to_string("test/file1.xml").unwrap();
203 let tree1 = XTree::parse(&text1).unwrap();
204
205 let text2 = fs::read_to_string("test/file2.xml").unwrap();
206 let tree2 = XTree::parse(&text2).unwrap();
207
208 #[cfg(feature = "print")]
209 {
210 print_tree(&tree1, PrintTreeOptions::default().with_node_id());
211 print_tree(&tree2, PrintTreeOptions::default().with_node_id());
212 }
213
214 let diff = diff(&tree1, &tree2);
215 diff.iter().any(|d| {
216 regex::Regex::new(r#"update node \d+: \"George\" -> \"Fred\""#)
217 .unwrap()
218 .is_match(&d.to_string())
219 });
220 diff.iter().any(|d| {
221 regex::Regex::new(r#"insert node \d+[Attr] to node \d+"#)
222 .unwrap()
223 .is_match(&d.to_string())
224 });
225 diff.iter().any(|d| {
226 regex::Regex::new(r#"insert node \d+[Foo] to node \d+"#)
227 .unwrap()
228 .is_match(&d.to_string())
229 });
230 for e in diff {
231 println!("{}", e);
232 }
233 }
234}