1use std::collections::HashMap;
10
11use crate::node::{NodeInner, NodeRef};
12
13use super::{CandidateEntry, DfsTreeIterator, Matching};
14
15#[derive(Default)]
21pub struct DiffMatching {
22 base_root: Option<NodeRef>,
23 branch_root: Option<NodeRef>,
24 hash_index: HashMap<i32, Vec<NodeRef>>,
26}
27
28impl DiffMatching {
29 pub fn new() -> Self {
31 DiffMatching {
32 base_root: None,
33 branch_root: None,
34 hash_index: HashMap::new(),
35 }
36 }
37
38 fn build_hash_index(&mut self, base: &NodeRef) {
40 self.hash_index.clear();
41
42 for node in DfsTreeIterator::new(base.clone()) {
43 if let Some(content) = node.borrow().content() {
44 let hash = content.content_hash();
45 self.hash_index.entry(hash).or_default().push(node.clone());
46 }
47 }
48 }
49
50 fn match_subtrees(&mut self, base: &NodeRef, branch: &NodeRef) {
52 self.build_hash_index(base);
53
54 let mut queue: Vec<NodeRef> = vec![branch.clone()];
56
57 while let Some(current) = queue.pop() {
58 let candidates = self.find_candidates(¤t);
60
61 if candidates.is_empty() {
62 let children: Vec<NodeRef> = current.borrow().children().to_vec();
64 for child in children {
65 queue.push(child);
66 }
67 } else {
68 let best = self.get_best_candidate(¤t, candidates);
70
71 if let Some(best_entry) = best {
72 self.add_matching(¤t, &best_entry.candidate);
74
75 let stop_nodes = self.get_stop_nodes(&best_entry.candidate, ¤t);
77 for stop_node in stop_nodes {
78 queue.push(stop_node);
79 }
80 } else {
81 let children: Vec<NodeRef> = current.borrow().children().to_vec();
83 for child in children {
84 queue.push(child);
85 }
86 }
87 }
88 }
89 }
90
91 fn find_candidates(&self, branch: &NodeRef) -> Vec<CandidateEntry> {
95 let mut candidates = Vec::new();
96
97 let branch_borrowed = branch.borrow();
98 if let Some(content) = branch_borrowed.content() {
99 let hash = content.content_hash();
100
101 if let Some(nodes) = self.hash_index.get(&hash) {
102 for node in nodes {
103 let node_borrowed = node.borrow();
105 if let Some(node_content) = node_borrowed.content() {
106 if content.content_equals(node_content) {
107 drop(node_borrowed);
108 candidates.push(CandidateEntry::new(node.clone(), 0.0, -1.0));
109 }
110 }
111 }
112 }
113 }
114
115 candidates
116 }
117
118 fn get_best_candidate(
123 &self,
124 branch: &NodeRef,
125 candidates: Vec<CandidateEntry>,
126 ) -> Option<CandidateEntry> {
127 if candidates.is_empty() {
128 return None;
129 }
130
131 if candidates.len() > 1 {
133 if let Some(left_sibling) = NodeInner::left_sibling_of_ref(branch) {
134 let left_base_match = left_sibling
135 .borrow()
136 .get_base_match()
137 .and_then(|w| w.upgrade());
138
139 if let Some(left_base) = left_base_match {
140 for candidate in &candidates {
142 if let Some(cand_left) =
143 NodeInner::left_sibling_of_ref(&candidate.candidate)
144 {
145 if cand_left.borrow().id() == left_base.borrow().id() {
146 return Some(candidate.clone());
147 }
148 }
149 }
150 }
151 }
152 }
153
154 candidates.into_iter().next()
156 }
157
158 fn get_stop_nodes(&self, base: &NodeRef, branch: &NodeRef) -> Vec<NodeRef> {
160 let mut stop_nodes = Vec::new();
161
162 let base_borrowed = base.borrow();
163 let branch_borrowed = branch.borrow();
164
165 let base_child_count = base_borrowed.child_count();
166 let branch_child_count = branch_borrowed.child_count();
167
168 let base_children: Vec<NodeRef> = base_borrowed.children().to_vec();
171 let branch_children: Vec<NodeRef> = branch_borrowed.children().to_vec();
172 drop(base_borrowed);
173 drop(branch_borrowed);
174
175 if base_child_count == branch_child_count {
176 for i in 0..branch_child_count {
177 let base_child = &base_children[i];
178 let branch_child = &branch_children[i];
179
180 let base_content = base_child.borrow().content().cloned();
182 let branch_content = branch_child.borrow().content().cloned();
183
184 let matches = match (&base_content, &branch_content) {
185 (Some(bc), Some(rc)) => bc.content_equals(rc),
186 _ => false,
187 };
188
189 if matches {
190 self.add_matching(branch_child, base_child);
192
193 let child_stops = self.get_stop_nodes(base_child, branch_child);
195 stop_nodes.extend(child_stops);
196 } else {
197 stop_nodes.push(branch_child.clone());
198 }
199 }
200 } else {
201 for child in &branch_children {
203 stop_nodes.push(child.clone());
204 }
205 }
206
207 stop_nodes
208 }
209
210 fn add_matching(&self, branch: &NodeRef, base: &NodeRef) {
212 branch.borrow_mut().set_base_match(base);
213
214 let base_borrowed = base.borrow();
216 if let crate::node::NodeKind::Base { left, .. } = base_borrowed.kind() {
217 left.borrow_mut().add_match(branch.clone());
218 }
219 }
220
221 pub fn branch_root_ref(&self) -> Option<&NodeRef> {
223 self.branch_root.as_ref()
224 }
225}
226
227impl Matching for DiffMatching {
228 fn build_matching(&mut self, base: &NodeRef, branch: &NodeRef) {
229 self.base_root = Some(base.clone());
230 self.branch_root = Some(branch.clone());
231
232 self.match_subtrees(base, branch);
233
234 branch.borrow_mut().set_base_match(base);
236 }
237
238 fn get_area_stop_nodes(&self, stop_nodes: &mut Vec<NodeRef>, node: &NodeRef) {
239 let children: Vec<NodeRef> = node.borrow().children().to_vec();
241 for child in &children {
242 if child.borrow().get_base_match().is_none() {
243 stop_nodes.push(child.clone());
244 } else {
245 self.get_area_stop_nodes(stop_nodes, child);
246 return;
247 }
248 }
249 }
250
251 fn base_root(&self) -> Option<&NodeRef> {
252 self.base_root.as_ref()
253 }
254
255 fn branch_root(&self) -> Option<&NodeRef> {
256 self.branch_root.as_ref()
257 }
258}
259
260#[cfg(test)]
261mod tests {
262 use super::*;
263 use crate::node::{new_base_node, new_branch_node, XmlContent, XmlElement, XmlText};
264
265 fn make_element(name: &str) -> XmlContent {
266 XmlContent::Element(XmlElement::new(
267 name.to_string(),
268 std::collections::HashMap::new(),
269 ))
270 }
271
272 fn make_text(text: &str) -> XmlContent {
273 XmlContent::Text(XmlText::new(text))
274 }
275
276 #[test]
277 fn test_diff_matching_exact_only() {
278 let base_root = new_base_node(Some(make_element("root")));
280 let base_a = new_base_node(Some(make_element("a")));
281 NodeInner::add_child_to_ref(&base_root, base_a.clone());
282
283 let branch_root = new_branch_node(Some(make_element("root")));
284 let branch_a = new_branch_node(Some(make_element("a")));
285 NodeInner::add_child_to_ref(&branch_root, branch_a.clone());
286
287 let mut matching = DiffMatching::new();
288 matching.build_matching(&base_root, &branch_root);
289
290 assert!(branch_a.borrow().get_base_match().is_some());
292 }
293
294 #[test]
295 fn test_diff_matching_no_fuzzy() {
296 let base_root = new_base_node(Some(make_element("root")));
298 let base_text = new_base_node(Some(make_text("hello world")));
299 NodeInner::add_child_to_ref(&base_root, base_text);
300
301 let branch_root = new_branch_node(Some(make_element("root")));
302 let branch_text = new_branch_node(Some(make_text("hello world!"))); NodeInner::add_child_to_ref(&branch_root, branch_text.clone());
304
305 let mut matching = DiffMatching::new();
306 matching.build_matching(&base_root, &branch_root);
307
308 assert!(branch_text.borrow().get_base_match().is_none());
310 }
311
312 #[test]
313 fn test_diff_matching_sequential_preference() {
314 let base_root = new_base_node(Some(make_element("root")));
320 let base_a1 = new_base_node(Some(make_element("a")));
321 let base_b = new_base_node(Some(make_element("b")));
322 let base_a2 = new_base_node(Some(make_element("a")));
323 NodeInner::add_child_to_ref(&base_root, base_a1.clone());
324 NodeInner::add_child_to_ref(&base_root, base_b.clone());
325 NodeInner::add_child_to_ref(&base_root, base_a2.clone());
326
327 let branch_root = new_branch_node(Some(make_element("root")));
328 let branch_a1 = new_branch_node(Some(make_element("a")));
329 let branch_b = new_branch_node(Some(make_element("b")));
330 let branch_a2 = new_branch_node(Some(make_element("a")));
331 NodeInner::add_child_to_ref(&branch_root, branch_a1.clone());
332 NodeInner::add_child_to_ref(&branch_root, branch_b.clone());
333 NodeInner::add_child_to_ref(&branch_root, branch_a2.clone());
334
335 let mut matching = DiffMatching::new();
336 matching.build_matching(&base_root, &branch_root);
337
338 let match1 = branch_a1
340 .borrow()
341 .get_base_match()
342 .and_then(|w| w.upgrade());
343 assert!(match1.is_some());
344 assert_eq!(match1.unwrap().borrow().id(), base_a1.borrow().id());
345
346 let match2 = branch_a2
348 .borrow()
349 .get_base_match()
350 .and_then(|w| w.upgrade());
351 assert!(match2.is_some());
352 assert_eq!(match2.unwrap().borrow().id(), base_a2.borrow().id());
353 }
354}