Skip to main content

xml_3dm/matching/
mod.rs

1//! Tree matching algorithms.
2//!
3//! This module provides algorithms for establishing correspondences between
4//! nodes in base and branch trees. The matching is used by the merge algorithm
5//! to identify which nodes have been modified, moved, copied, or deleted.
6
7mod diff;
8mod heuristic;
9mod tri;
10
11pub use diff::DiffMatching;
12pub use heuristic::HeuristicMatching;
13pub use tri::TriMatching;
14
15use crate::node::NodeRef;
16
17/// Trait for tree matching algorithms.
18///
19/// A Matching builds correspondences between a base tree and a branch tree.
20/// The actual matchings are stored in the nodes themselves (BaseNode's left/right
21/// MatchedNodes and BranchNode's base_match field).
22pub trait Matching {
23    /// Builds a matching between a base tree and a branch tree.
24    ///
25    /// After this method returns, the nodes in both trees will have their
26    /// matching fields populated.
27    fn build_matching(&mut self, base: &NodeRef, branch: &NodeRef);
28
29    /// Gets the children of leaf nodes in a matching subtree.
30    ///
31    /// Uses the MatchArea tag of nodes to identify the subtree boundaries.
32    fn get_area_stop_nodes(&self, stop_nodes: &mut Vec<NodeRef>, node: &NodeRef);
33
34    /// Returns a reference to the base tree root.
35    fn base_root(&self) -> Option<&NodeRef>;
36
37    /// Returns a reference to the branch tree root.
38    fn branch_root(&self) -> Option<&NodeRef>;
39}
40
41/// Constants used by matching algorithms.
42pub mod constants {
43    /// Threshold for considering a subtree to be copied.
44    /// Subtrees smaller than this (in info bytes) are not considered copies.
45    pub const COPY_THRESHOLD: i32 = 128;
46
47    /// Info bytes per edge in a matched tree.
48    pub const EDGE_BYTES: i32 = 4;
49
50    /// Maximum content dissimilarity when fuzzy matching nodes in DFS search.
51    pub const DFS_MATCH_THRESHOLD: f64 = 0.2;
52
53    /// Maximum content dissimilarity when searching for potential matching
54    /// subtree roots.
55    pub const MAX_FUZZY_MATCH: f64 = 0.2;
56
57    /// Distance value used when both nodes are at the end of their
58    /// respective child lists (e.g., both at position 0 for left sibling check).
59    pub const END_MATCH: f64 = crate::measure::MAX_DIST;
60}
61
62/// A candidate match entry with distance metrics.
63#[derive(Debug, Clone)]
64pub struct CandidateEntry {
65    /// The candidate base node.
66    pub candidate: NodeRef,
67    /// Content distance between candidate and branch node.
68    pub distance: f64,
69    /// Minimum of left sibling distance, right sibling distance, and child list distance.
70    /// Set to -1.0 if not yet calculated.
71    pub left_right_down: f64,
72}
73
74impl CandidateEntry {
75    /// Creates a new candidate entry.
76    pub fn new(candidate: NodeRef, distance: f64, left_right_down: f64) -> Self {
77        CandidateEntry {
78            candidate,
79            distance,
80            left_right_down,
81        }
82    }
83}
84
85/// Iterator for traversing a tree in depth-first order.
86pub struct DfsTreeIterator {
87    /// Stack of (node, next_child_index) pairs for iterating.
88    stack: Vec<(NodeRef, usize)>,
89}
90
91impl DfsTreeIterator {
92    /// Creates a new DFS iterator starting at the given root.
93    pub fn new(root: NodeRef) -> Self {
94        DfsTreeIterator {
95            stack: vec![(root, 0)],
96        }
97    }
98}
99
100impl Iterator for DfsTreeIterator {
101    type Item = NodeRef;
102
103    fn next(&mut self) -> Option<Self::Item> {
104        while let Some((node, child_idx)) = self.stack.pop() {
105            let borrowed = node.borrow();
106            let child_count = borrowed.child_count();
107
108            if child_idx == 0 {
109                // First visit to this node - return it
110                drop(borrowed);
111                if child_count > 0 {
112                    // Push back with incremented index for later processing
113                    self.stack.push((node.clone(), 1));
114                    // Push first child
115                    let first_child = node.borrow().child(0).cloned();
116                    if let Some(child) = first_child {
117                        self.stack.push((child, 0));
118                    }
119                }
120                return Some(node);
121            } else if child_idx < child_count {
122                // Continue with next child
123                let next_child = borrowed.child(child_idx).cloned();
124                drop(borrowed);
125                self.stack.push((node, child_idx + 1));
126                if let Some(child) = next_child {
127                    self.stack.push((child, 0));
128                }
129            }
130            // If child_idx >= child_count, we're done with this node's children
131        }
132        None
133    }
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139    use crate::node::{new_base_node, NodeInner, XmlContent, XmlElement};
140    use std::collections::HashMap;
141
142    #[test]
143    fn test_dfs_iterator_single_node() {
144        let root = new_base_node(Some(XmlContent::Element(XmlElement::new(
145            "root".to_string(),
146            HashMap::new(),
147        ))));
148
149        let iter = DfsTreeIterator::new(root.clone());
150        let nodes: Vec<NodeRef> = iter.collect();
151
152        assert_eq!(nodes.len(), 1);
153        assert_eq!(nodes[0].borrow().id(), root.borrow().id());
154    }
155
156    #[test]
157    fn test_dfs_iterator_tree() {
158        // Build tree:
159        //       root
160        //      /    \
161        //     a      b
162        //    / \
163        //   c   d
164
165        let root = new_base_node(Some(XmlContent::Element(XmlElement::new(
166            "root".to_string(),
167            HashMap::new(),
168        ))));
169        let a = new_base_node(Some(XmlContent::Element(XmlElement::new(
170            "a".to_string(),
171            HashMap::new(),
172        ))));
173        let b = new_base_node(Some(XmlContent::Element(XmlElement::new(
174            "b".to_string(),
175            HashMap::new(),
176        ))));
177        let c = new_base_node(Some(XmlContent::Element(XmlElement::new(
178            "c".to_string(),
179            HashMap::new(),
180        ))));
181        let d = new_base_node(Some(XmlContent::Element(XmlElement::new(
182            "d".to_string(),
183            HashMap::new(),
184        ))));
185
186        NodeInner::add_child_to_ref(&root, a.clone());
187        NodeInner::add_child_to_ref(&root, b.clone());
188        NodeInner::add_child_to_ref(&a, c.clone());
189        NodeInner::add_child_to_ref(&a, d.clone());
190
191        let iter = DfsTreeIterator::new(root.clone());
192        let nodes: Vec<NodeRef> = iter.collect();
193
194        // DFS order: root, a, c, d, b
195        assert_eq!(nodes.len(), 5);
196
197        // Get element names for easier checking
198        let names: Vec<String> = nodes
199            .iter()
200            .map(|n| {
201                let borrowed = n.borrow();
202                if let Some(XmlContent::Element(e)) = borrowed.content() {
203                    e.qname().to_string()
204                } else {
205                    String::new()
206                }
207            })
208            .collect();
209
210        assert_eq!(names, vec!["root", "a", "c", "d", "b"]);
211    }
212
213    #[test]
214    fn test_candidate_entry() {
215        let node = new_base_node(None);
216        let entry = CandidateEntry::new(node.clone(), 0.5, -1.0);
217
218        assert_eq!(entry.distance, 0.5);
219        assert_eq!(entry.left_right_down, -1.0);
220    }
221}