1use std::{
91 collections::{HashMap, HashSet},
92 fmt::Display,
93};
94
95use md5::Digest;
96use tree::{XNode, XNodeId, XTree};
97
98pub mod tree;
100
101trait Concat {
102 fn concat(self, other: Self) -> Self;
103}
104
105impl Concat for Digest {
106 fn concat(mut self, other: Self) -> Self {
107 for i in 0..self.0.len() {
108 self.0[i] = self.0[i].wrapping_add(other.0[i]);
109 }
110 self
111 }
112}
113
114#[derive(Debug, Clone)]
115pub enum Edit<'tree1, 'tree2> {
116 Insert {
117 child_node: XNodeId<'tree2>,
118 to: XNodeId<'tree1>,
119 },
120 Delete(XNodeId<'tree1>),
121 Update {
122 node_id: XNodeId<'tree1>,
123 old_value: String,
124 new_value: String,
125 },
126 ReplaceRoot,
127}
128
129impl Display for Edit<'_, '_> {
130 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131 match self {
132 Edit::Insert { child_node, to } => {
133 write!(f, "insert node {} to node {}", child_node, to)
134 }
135 Edit::Delete(node_id) => write!(f, "delete node {}", node_id),
136 Edit::Update {
137 node_id,
138 old_value,
139 new_value,
140 } => write!(
141 f,
142 "update node {}: {:?} -> {:?}",
143 node_id, old_value, new_value
144 ),
145 Edit::ReplaceRoot => write!(f, "replace root node"),
146 }
147 }
148}
149
150type Diff<'tree1, 'tree2> = Vec<Edit<'tree1, 'tree2>>;
151
152pub fn diff<'doc1, 'doc2>(
154 tree1: &'doc1 XTree<'doc1>,
155 tree2: &'doc2 XTree<'doc2>,
156) -> Diff<'doc1, 'doc2> {
157 fn diff_node<'doc1, 'doc2>(
158 node1: XNode<'_, 'doc1>,
159 ht1: &HashMap<XNodeId<'doc1>, Digest>,
160 node2: XNode<'_, 'doc2>,
161 ht2: &HashMap<XNodeId<'doc2>, Digest>,
162 ) -> Diff<'doc1, 'doc2> {
163 if ht1.get(&node1.id()) == ht2.get(&node2.id()) {
164 return Vec::new();
165 }
166
167 if (node1.is_attribute() && node2.is_attribute()) || (node1.is_text() && node2.is_text()) {
169 return vec![Edit::Update {
170 node_id: node1.id(),
171 old_value: node1.value().unwrap_or_default().trim().to_string(),
172 new_value: node2.value().unwrap_or_default().trim().to_string(),
173 }];
174 }
175
176 let mut iht1: HashMap<_, _> = node1
177 .children()
178 .iter()
179 .map(|n| (*ht1.get(&n.id()).unwrap(), *n))
180 .collect();
181 let mut iht2: HashMap<_, _> = node2
182 .children()
183 .iter()
184 .map(|n| (*ht2.get(&n.id()).unwrap(), *n))
185 .collect();
186 let children_hashes1: HashSet<_> = iht1.keys().copied().collect();
187 let children_hashes2: HashSet<_> = iht2.keys().copied().collect();
188 let same_hashes: HashSet<_> = children_hashes1.intersection(&children_hashes2).collect();
189 iht1.retain(|k, _| !same_hashes.contains(&k));
190 iht2.retain(|k, _| !same_hashes.contains(&k));
191 let mut remaining_children1: HashSet<_> = iht1.into_values().collect();
192 let mut remaining_children2: HashSet<_> = iht2.into_values().collect();
193 let mut diff_pairs = Vec::new();
194 for n1 in &remaining_children1 {
195 for n2 in &remaining_children2 {
196 if n1.signature() == n2.signature() {
197 diff_pairs.push((*n1, *n2, diff_node(*n1, ht1, *n2, ht2)));
198 }
199 }
200 }
201 diff_pairs.sort_by_key(|item| item.2.len());
202 let mut diff = Vec::new();
203 for (n1, n2, mut d) in diff_pairs {
204 if remaining_children1.contains(&n1) && remaining_children2.contains(&n2) {
205 diff.append(&mut d);
206 remaining_children1.remove(&n1);
207 remaining_children2.remove(&n2);
208 }
209 }
210 for n1 in remaining_children1 {
211 diff.push(Edit::Delete(n1.id()));
212 }
213 for n2 in remaining_children2 {
214 diff.push(Edit::Insert {
215 child_node: n2.id(),
216 to: node1.id(),
217 });
218 }
219 diff
220 }
221 if tree1.root().signature() != tree2.root().signature() {
222 return vec![Edit::ReplaceRoot];
223 }
224 let ht1 = calculate_hash_table(tree1);
225 let ht2 = calculate_hash_table(tree2);
226 diff_node(tree1.root(), &ht1, tree2.root(), &ht2)
227}
228
229fn calculate_hash_table<'doc>(tree: &'doc XTree) -> HashMap<XNodeId<'doc>, Digest> {
230 fn hash_of_node<'doc>(
231 node: XNode<'_, 'doc>,
232 ht: &mut HashMap<XNodeId<'doc>, Digest>,
233 ) -> Digest {
234 let hash = if node.children().is_empty() {
235 node.hash()
236 } else {
237 let mut acc = node.hash();
238 for child in node.children() {
239 acc = acc.concat(hash_of_node(child, ht));
240 }
241 acc
242 };
243 ht.insert(node.id(), hash);
244 hash
245 }
246 let mut hash_table = HashMap::new();
247 hash_of_node(tree.root(), &mut hash_table);
248 hash_table
249}
250
251#[cfg(test)]
252mod test {
253 use std::fs;
254 #[cfg(feature = "print")]
255 use tree::XTreePrintOptions;
256
257 use super::*;
258
259 #[test]
260 fn test_calculate_hash_table_same_tree() {
261 let text1 = fs::read_to_string("test/file1.xml").unwrap();
262 let tree1 = XTree::parse(&text1).unwrap();
263 let ht1 = calculate_hash_table(&tree1);
264
265 let text2 = fs::read_to_string("test/file1.xml").unwrap();
266 let tree2 = XTree::parse(&text2).unwrap();
267 let ht2 = calculate_hash_table(&tree2);
268
269 assert_eq!(ht1.get(&tree1.root().id()), ht2.get(&tree2.root().id()));
270 }
271
272 #[test]
273 fn test_calculate_hash_table_different_tree() {
274 let text1 = fs::read_to_string("test/file1.xml").unwrap();
275 let tree1 = XTree::parse(&text1).unwrap();
276 let ht1 = calculate_hash_table(&tree1);
277
278 let text2 = fs::read_to_string("test/file2.xml").unwrap();
279 let tree2 = XTree::parse(&text2).unwrap();
280 let ht2 = calculate_hash_table(&tree2);
281
282 #[cfg(feature = "print")]
283 {
284 let hex_marker1 = ht1.iter().map(|(k, v)| (*k, format!("{:x}", v))).collect();
285 tree1.print(
286 XTreePrintOptions::default()
287 .with_node_marker(&hex_marker1)
288 .with_node_id(),
289 );
290
291 let hex_marker2 = ht2.iter().map(|(k, v)| (*k, format!("{:x}", v))).collect();
292 tree2.print(
293 XTreePrintOptions::default()
294 .with_node_marker(&hex_marker2)
295 .with_node_id(),
296 );
297 }
298
299 assert_ne!(ht1.get(&tree1.root().id()), ht2.get(&tree2.root().id()));
300 }
301
302 #[test]
303 fn test_diff() {
304 let text1 = fs::read_to_string("test/file1.xml").unwrap();
305 let tree1 = XTree::parse(&text1).unwrap();
306
307 let text2 = fs::read_to_string("test/file2.xml").unwrap();
308 let tree2 = XTree::parse(&text2).unwrap();
309
310 #[cfg(feature = "print")]
311 {
312 tree1.print(XTreePrintOptions::default().with_node_id());
313 tree2.print(XTreePrintOptions::default().with_node_id());
314 }
315
316 let diff = diff(&tree1, &tree2);
317 diff.iter().any(|d| {
318 regex::Regex::new(r#"update node \d+: \"George\" -> \"Fred\""#)
319 .unwrap()
320 .is_match(&d.to_string())
321 });
322 diff.iter().any(|d| {
323 regex::Regex::new(r#"insert node \d+[Attr] to node \d+"#)
324 .unwrap()
325 .is_match(&d.to_string())
326 });
327 diff.iter().any(|d| {
328 regex::Regex::new(r#"insert node \d+[Foo] to node \d+"#)
329 .unwrap()
330 .is_match(&d.to_string())
331 });
332 for e in diff {
333 println!("{}", e);
334 }
335 }
336}