1use std::collections::HashMap;
10
11use crate::node::{NodeInner, NodeRef};
12
13use super::{CandidateEntry, DfsTreeIterator, Matching};
14
15#[derive(Default)]
21pub struct DiffMatching {
22 base_root: Option<NodeRef>,
23 branch_root: Option<NodeRef>,
24 hash_index: HashMap<i32, Vec<NodeRef>>,
26}
27
28impl DiffMatching {
29 pub fn new() -> Self {
31 DiffMatching {
32 base_root: None,
33 branch_root: None,
34 hash_index: HashMap::new(),
35 }
36 }
37
38 fn build_hash_index(&mut self, base: &NodeRef) {
40 self.hash_index.clear();
41
42 for node in DfsTreeIterator::new(base.clone()) {
43 if let Some(content) = node.borrow().content() {
44 let hash = content.content_hash();
45 self.hash_index.entry(hash).or_default().push(node.clone());
46 }
47 }
48 }
49
50 fn match_subtrees(&mut self, base: &NodeRef, branch: &NodeRef) {
52 self.build_hash_index(base);
53
54 let mut queue: Vec<NodeRef> = vec![branch.clone()];
56
57 while let Some(current) = queue.pop() {
58 let candidates = self.find_candidates(¤t);
60
61 if candidates.is_empty() {
62 let children: Vec<NodeRef> = current.borrow().children().to_vec();
64 for child in children {
65 queue.push(child);
66 }
67 } else {
68 let best = self.get_best_candidate(¤t, candidates);
70
71 if let Some(best_entry) = best {
72 self.add_matching(¤t, &best_entry.candidate);
74
75 let stop_nodes = self.get_stop_nodes(&best_entry.candidate, ¤t);
77 for stop_node in stop_nodes {
78 queue.push(stop_node);
79 }
80 } else {
81 let children: Vec<NodeRef> = current.borrow().children().to_vec();
83 for child in children {
84 queue.push(child);
85 }
86 }
87 }
88 }
89 }
90
91 fn find_candidates(&self, branch: &NodeRef) -> Vec<CandidateEntry> {
95 let mut candidates = Vec::new();
96
97 let branch_borrowed = branch.borrow();
98 if let Some(content) = branch_borrowed.content() {
99 let hash = content.content_hash();
100
101 if let Some(nodes) = self.hash_index.get(&hash) {
102 for node in nodes {
103 let node_borrowed = node.borrow();
105 if let Some(node_content) = node_borrowed.content() {
106 if content.content_equals(node_content) {
107 drop(node_borrowed);
108 candidates.push(CandidateEntry::new(node.clone(), 0.0, -1.0));
109 }
110 }
111 }
112 }
113 }
114
115 candidates
116 }
117
118 fn get_best_candidate(
123 &self,
124 branch: &NodeRef,
125 candidates: Vec<CandidateEntry>,
126 ) -> Option<CandidateEntry> {
127 if candidates.is_empty() {
128 return None;
129 }
130
131 if candidates.len() > 1 {
133 if let Some(left_sibling) = NodeInner::left_sibling_of_ref(branch) {
134 let left_base_match = left_sibling
135 .borrow()
136 .get_base_match()
137 .and_then(|w| w.upgrade());
138
139 if let Some(left_base) = left_base_match {
140 for candidate in &candidates {
142 if let Some(cand_left) =
143 NodeInner::left_sibling_of_ref(&candidate.candidate)
144 {
145 if cand_left.borrow().id() == left_base.borrow().id() {
146 return Some(candidate.clone());
147 }
148 }
149 }
150 }
151 }
152 }
153
154 candidates.into_iter().next()
156 }
157
158 fn get_stop_nodes(&self, base: &NodeRef, branch: &NodeRef) -> Vec<NodeRef> {
160 let mut stop_nodes = Vec::new();
161
162 let base_borrowed = base.borrow();
163 let branch_borrowed = branch.borrow();
164
165 let base_child_count = base_borrowed.child_count();
166 let branch_child_count = branch_borrowed.child_count();
167
168 let base_children: Vec<NodeRef> = base_borrowed.children().to_vec();
171 let branch_children: Vec<NodeRef> = branch_borrowed.children().to_vec();
172 drop(base_borrowed);
173 drop(branch_borrowed);
174
175 if base_child_count == branch_child_count {
176 for i in 0..branch_child_count {
177 let base_child = &base_children[i];
178 let branch_child = &branch_children[i];
179
180 let base_content = base_child.borrow().content().cloned();
182 let branch_content = branch_child.borrow().content().cloned();
183
184 let matches = match (&base_content, &branch_content) {
185 (Some(bc), Some(rc)) => bc.content_equals(rc),
186 _ => false,
187 };
188
189 if matches {
190 self.add_matching(branch_child, base_child);
192
193 let child_stops = self.get_stop_nodes(base_child, branch_child);
195 stop_nodes.extend(child_stops);
196 } else {
197 stop_nodes.push(branch_child.clone());
198 }
199 }
200 } else {
201 for child in &branch_children {
203 stop_nodes.push(child.clone());
204 }
205 }
206
207 stop_nodes
208 }
209
210 fn add_matching(&self, branch: &NodeRef, base: &NodeRef) {
212 branch.borrow_mut().set_base_match(base);
213
214 let base_borrowed = base.borrow();
216 if let crate::node::NodeKind::Base { left, .. } = base_borrowed.kind() {
217 left.borrow_mut().add_match(branch.clone());
218 }
219 }
220
221 pub fn branch_root_ref(&self) -> Option<&NodeRef> {
223 self.branch_root.as_ref()
224 }
225}
226
227impl Matching for DiffMatching {
228 fn build_matching(&mut self, base: &NodeRef, branch: &NodeRef) {
229 self.base_root = Some(base.clone());
230 self.branch_root = Some(branch.clone());
231
232 self.match_subtrees(base, branch);
233
234 branch.borrow_mut().set_base_match(base);
236 }
237
238 fn get_area_stop_nodes(&self, stop_nodes: &mut Vec<NodeRef>, node: &NodeRef) {
239 let children: Vec<NodeRef> = node.borrow().children().to_vec();
241 for child in &children {
242 if child.borrow().get_base_match().is_none() {
243 stop_nodes.push(child.clone());
244 } else {
245 self.get_area_stop_nodes(stop_nodes, child);
246 }
247 }
248 }
249
250 fn base_root(&self) -> Option<&NodeRef> {
251 self.base_root.as_ref()
252 }
253
254 fn branch_root(&self) -> Option<&NodeRef> {
255 self.branch_root.as_ref()
256 }
257}
258
259#[cfg(test)]
260mod tests {
261 use super::*;
262 use crate::node::{new_base_node, new_branch_node, XmlContent, XmlElement, XmlText};
263
264 fn make_element(name: &str) -> XmlContent {
265 XmlContent::Element(XmlElement::new(
266 name.to_string(),
267 std::collections::HashMap::new(),
268 ))
269 }
270
271 fn make_text(text: &str) -> XmlContent {
272 XmlContent::Text(XmlText::new(text))
273 }
274
275 #[test]
276 fn test_diff_matching_exact_only() {
277 let base_root = new_base_node(Some(make_element("root")));
279 let base_a = new_base_node(Some(make_element("a")));
280 NodeInner::add_child_to_ref(&base_root, base_a.clone());
281
282 let branch_root = new_branch_node(Some(make_element("root")));
283 let branch_a = new_branch_node(Some(make_element("a")));
284 NodeInner::add_child_to_ref(&branch_root, branch_a.clone());
285
286 let mut matching = DiffMatching::new();
287 matching.build_matching(&base_root, &branch_root);
288
289 assert!(branch_a.borrow().get_base_match().is_some());
291 }
292
293 #[test]
294 fn test_diff_matching_no_fuzzy() {
295 let base_root = new_base_node(Some(make_element("root")));
297 let base_text = new_base_node(Some(make_text("hello world")));
298 NodeInner::add_child_to_ref(&base_root, base_text);
299
300 let branch_root = new_branch_node(Some(make_element("root")));
301 let branch_text = new_branch_node(Some(make_text("hello world!"))); NodeInner::add_child_to_ref(&branch_root, branch_text.clone());
303
304 let mut matching = DiffMatching::new();
305 matching.build_matching(&base_root, &branch_root);
306
307 assert!(branch_text.borrow().get_base_match().is_none());
309 }
310
311 #[test]
312 fn test_diff_matching_sequential_preference() {
313 let base_root = new_base_node(Some(make_element("root")));
319 let base_a1 = new_base_node(Some(make_element("a")));
320 let base_b = new_base_node(Some(make_element("b")));
321 let base_a2 = new_base_node(Some(make_element("a")));
322 NodeInner::add_child_to_ref(&base_root, base_a1.clone());
323 NodeInner::add_child_to_ref(&base_root, base_b.clone());
324 NodeInner::add_child_to_ref(&base_root, base_a2.clone());
325
326 let branch_root = new_branch_node(Some(make_element("root")));
327 let branch_a1 = new_branch_node(Some(make_element("a")));
328 let branch_b = new_branch_node(Some(make_element("b")));
329 let branch_a2 = new_branch_node(Some(make_element("a")));
330 NodeInner::add_child_to_ref(&branch_root, branch_a1.clone());
331 NodeInner::add_child_to_ref(&branch_root, branch_b.clone());
332 NodeInner::add_child_to_ref(&branch_root, branch_a2.clone());
333
334 let mut matching = DiffMatching::new();
335 matching.build_matching(&base_root, &branch_root);
336
337 let match1 = branch_a1
339 .borrow()
340 .get_base_match()
341 .and_then(|w| w.upgrade());
342 assert!(match1.is_some());
343 assert_eq!(match1.unwrap().borrow().id(), base_a1.borrow().id());
344
345 let match2 = branch_a2
347 .borrow()
348 .get_base_match()
349 .and_then(|w| w.upgrade());
350 assert!(match2.is_some());
351 assert_eq!(match2.unwrap().borrow().id(), base_a2.borrow().id());
352 }
353
354 #[test]
355 fn test_get_area_stop_nodes_all_siblings() {
356 let base_root = new_base_node(Some(make_element("root")));
366 let base_a = new_base_node(Some(make_element("a")));
367 let base_b = new_base_node(Some(make_element("b")));
368 let base_c = new_base_node(Some(make_element("c")));
369 NodeInner::add_child_to_ref(&base_root, base_a.clone());
370 NodeInner::add_child_to_ref(&base_root, base_b.clone());
371 NodeInner::add_child_to_ref(&base_root, base_c.clone());
372
373 let branch_root = new_branch_node(Some(make_element("root")));
374 let branch_a = new_branch_node(Some(make_element("a")));
375 let branch_x = new_branch_node(Some(make_element("x"))); let branch_c = new_branch_node(Some(make_element("c")));
377 NodeInner::add_child_to_ref(&branch_root, branch_a.clone());
378 NodeInner::add_child_to_ref(&branch_root, branch_x.clone());
379 NodeInner::add_child_to_ref(&branch_root, branch_c.clone());
380
381 let mut matching = DiffMatching::new();
382 matching.build_matching(&base_root, &branch_root);
383
384 assert!(
386 branch_a.borrow().get_base_match().is_some(),
387 "a should match"
388 );
389 assert!(
390 branch_c.borrow().get_base_match().is_some(),
391 "c should match"
392 );
393 assert!(
394 branch_x.borrow().get_base_match().is_none(),
395 "x should not match"
396 );
397
398 let mut stop_nodes = Vec::new();
400 matching.get_area_stop_nodes(&mut stop_nodes, &branch_root);
401
402 let stop_ids: Vec<_> = stop_nodes.iter().map(|n| n.borrow().id()).collect();
404 assert!(
405 stop_ids.contains(&branch_x.borrow().id()),
406 "unmatched sibling x must appear in stop nodes, got {:?}",
407 stop_ids
408 );
409 }
410}