Skip to main content

codemod_core/pattern/
matcher.rs

1//! Pattern matching engine.
2//!
3//! Given a [`Pattern`](super::Pattern) and a source file, this module finds
4//! all locations where the `before_template` matches. Each match records the
5//! byte range, position, matched text, and variable bindings so that the
6//! [`TransformApplier`](crate::transform::applier::TransformApplier) can
7//! later produce the replacement text.
8
9use std::collections::HashMap;
10
11use tree_sitter::{Node, Parser, Tree};
12
13use super::{Pattern, PatternVar};
14use crate::error::CodemodError;
15use crate::language::LanguageAdapter;
16
17// ---------------------------------------------------------------------------
18// Public types
19// ---------------------------------------------------------------------------
20
21/// A single match found in source code.
22#[derive(Debug, Clone)]
23pub struct Match {
24    /// Byte offset range in the source.
25    pub byte_range: std::ops::Range<usize>,
26    /// Line/column start position (0-indexed).
27    pub start_position: Position,
28    /// Line/column end position (0-indexed).
29    pub end_position: Position,
30    /// The matched source text.
31    pub matched_text: String,
32    /// Captured variable bindings: variable name -> matched text.
33    pub bindings: HashMap<String, String>,
34}
35
36/// A line/column position in source code.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub struct Position {
39    /// 0-indexed line number.
40    pub line: usize,
41    /// 0-indexed column (byte offset within the line).
42    pub column: usize,
43}
44
45// ---------------------------------------------------------------------------
46// PatternMatcher
47// ---------------------------------------------------------------------------
48
49/// Finds occurrences of a [`Pattern`] in source code.
50///
51/// The matcher parses the source into an AST and then walks every subtree,
52/// attempting to unify it with the `before_template` AST. When a subtree
53/// matches — i.e. has the same structure and all non-variable leaves agree —
54/// it records a [`Match`] with the captured variable bindings.
55pub struct PatternMatcher {
56    language: Box<dyn LanguageAdapter>,
57}
58
59impl PatternMatcher {
60    /// Create a new matcher backed by the given language adapter.
61    pub fn new(language: Box<dyn LanguageAdapter>) -> Self {
62        Self { language }
63    }
64
65    /// Find all matches of `pattern` in the given `source` code.
66    ///
67    /// # Errors
68    ///
69    /// Returns [`CodemodError::Parse`] if the source or the pattern template
70    /// cannot be parsed.
71    pub fn find_matches(&self, source: &str, pattern: &Pattern) -> crate::Result<Vec<Match>> {
72        // 1. Parse the before_template to get its AST shape.
73        let template_tree = self.parse(&pattern.before_template)?;
74        // 2. Parse the target source.
75        let source_tree = self.parse(source)?;
76
77        let template_root = template_tree.root_node();
78        let source_root = source_tree.root_node();
79
80        let mut matches = Vec::new();
81
82        // 3. Walk every node in the source tree and try to match.
83        self.walk_and_match(
84            source_root,
85            template_root,
86            source,
87            &pattern.before_template,
88            &pattern.variables,
89            &mut matches,
90        );
91
92        Ok(matches)
93    }
94
95    // -----------------------------------------------------------------
96    // Internal helpers
97    // -----------------------------------------------------------------
98
99    /// Parse source text into a tree-sitter [`Tree`].
100    fn parse(&self, source: &str) -> crate::Result<Tree> {
101        let mut parser = Parser::new();
102        parser
103            .set_language(&self.language.language())
104            .map_err(|e| CodemodError::Parse(format!("Failed to set language: {e}")))?;
105        parser
106            .parse(source, None)
107            .ok_or_else(|| CodemodError::Parse("tree-sitter returned no tree".into()))
108    }
109
110    /// Recursively walk the source tree. At every named node, attempt to
111    /// unify with the template root. If the node does not match, recurse
112    /// into its children.
113    fn walk_and_match(
114        &self,
115        source_node: Node,
116        template_root: Node,
117        source: &str,
118        template_source: &str,
119        variables: &[PatternVar],
120        matches: &mut Vec<Match>,
121    ) {
122        // Try matching the current source node against the template root.
123        let mut bindings = HashMap::new();
124        if self.try_match(
125            source_node,
126            template_root,
127            source,
128            template_source,
129            variables,
130            &mut bindings,
131        ) {
132            let start = source_node.start_position();
133            let end = source_node.end_position();
134            matches.push(Match {
135                byte_range: source_node.byte_range(),
136                start_position: Position {
137                    line: start.row,
138                    column: start.column,
139                },
140                end_position: Position {
141                    line: end.row,
142                    column: end.column,
143                },
144                matched_text: source[source_node.byte_range()].to_string(),
145                bindings,
146            });
147            // Do not recurse into children of a matched node to avoid
148            // overlapping matches.
149            return;
150        }
151
152        // No match — recurse into named children.
153        let child_count = source_node.named_child_count();
154        for i in 0..child_count {
155            if let Some(child) = source_node.named_child(i) {
156                self.walk_and_match(
157                    child,
158                    template_root,
159                    source,
160                    template_source,
161                    variables,
162                    matches,
163                );
164            }
165        }
166    }
167
168    /// Attempt to unify `source_node` with `template_node`.
169    ///
170    /// Returns `true` if the two trees have the same shape, all literal
171    /// leaves agree, and variable positions are captured in `bindings`.
172    fn try_match(
173        &self,
174        source_node: Node,
175        template_node: Node,
176        source: &str,
177        template_source: &str,
178        variables: &[PatternVar],
179        bindings: &mut HashMap<String, String>,
180    ) -> bool {
181        let template_text = &template_source[template_node.byte_range()];
182
183        // Check if the template text at this position is a variable placeholder.
184        if let Some(var) = self.is_variable_placeholder(template_text, variables) {
185            let source_text = source[source_node.byte_range()].to_string();
186            // If this variable was already bound, the binding must be consistent.
187            if let Some(existing) = bindings.get(&var.name) {
188                return *existing == source_text;
189            }
190            bindings.insert(var.name.clone(), source_text);
191            return true;
192        }
193
194        // Kinds must agree.
195        if source_node.kind() != template_node.kind() {
196            return false;
197        }
198
199        // Leaf nodes: text must match exactly.
200        if template_node.named_child_count() == 0 && source_node.named_child_count() == 0 {
201            let s_text = &source[source_node.byte_range()];
202            return s_text == template_text;
203        }
204
205        // Structural nodes: children count must agree and each child must match.
206        let t_count = template_node.named_child_count();
207        let s_count = source_node.named_child_count();
208        if t_count != s_count {
209            return false;
210        }
211
212        for i in 0..t_count {
213            let t_child = match template_node.named_child(i) {
214                Some(c) => c,
215                None => return false,
216            };
217            let s_child = match source_node.named_child(i) {
218                Some(c) => c,
219                None => return false,
220            };
221            if !self.try_match(
222                s_child,
223                t_child,
224                source,
225                template_source,
226                variables,
227                bindings,
228            ) {
229                return false;
230            }
231        }
232
233        true
234    }
235
236    /// Check whether `text` matches a known pattern variable placeholder
237    /// (e.g. `"$var1"`).
238    fn is_variable_placeholder<'a>(
239        &self,
240        text: &str,
241        variables: &'a [PatternVar],
242    ) -> Option<&'a PatternVar> {
243        variables.iter().find(|v| v.name == text)
244    }
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250
251    #[test]
252    fn test_position_equality() {
253        let a = Position { line: 1, column: 4 };
254        let b = Position { line: 1, column: 4 };
255        assert_eq!(a, b);
256    }
257}