codemod_core/pattern/matcher.rs
1//! Pattern matching engine.
2//!
3//! Given a [`Pattern`](super::Pattern) and a source file, this module finds
4//! all locations where the `before_template` matches. Each match records the
5//! byte range, position, matched text, and variable bindings so that the
6//! [`TransformApplier`](crate::transform::applier::TransformApplier) can
7//! later produce the replacement text.
8
9use std::collections::HashMap;
10
11use tree_sitter::{Node, Parser, Tree};
12
13use super::{Pattern, PatternVar};
14use crate::error::CodemodError;
15use crate::language::LanguageAdapter;
16
17// ---------------------------------------------------------------------------
18// Public types
19// ---------------------------------------------------------------------------
20
21/// A single match found in source code.
22#[derive(Debug, Clone)]
23pub struct Match {
24 /// Byte offset range in the source.
25 pub byte_range: std::ops::Range<usize>,
26 /// Line/column start position (0-indexed).
27 pub start_position: Position,
28 /// Line/column end position (0-indexed).
29 pub end_position: Position,
30 /// The matched source text.
31 pub matched_text: String,
32 /// Captured variable bindings: variable name -> matched text.
33 pub bindings: HashMap<String, String>,
34}
35
36/// A line/column position in source code.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub struct Position {
39 /// 0-indexed line number.
40 pub line: usize,
41 /// 0-indexed column (byte offset within the line).
42 pub column: usize,
43}
44
45// ---------------------------------------------------------------------------
46// PatternMatcher
47// ---------------------------------------------------------------------------
48
49/// Finds occurrences of a [`Pattern`] in source code.
50///
51/// The matcher parses the source into an AST and then walks every subtree,
52/// attempting to unify it with the `before_template` AST. When a subtree
53/// matches — i.e. has the same structure and all non-variable leaves agree —
54/// it records a [`Match`] with the captured variable bindings.
55pub struct PatternMatcher {
56 language: Box<dyn LanguageAdapter>,
57}
58
59impl PatternMatcher {
60 /// Create a new matcher backed by the given language adapter.
61 pub fn new(language: Box<dyn LanguageAdapter>) -> Self {
62 Self { language }
63 }
64
65 /// Find all matches of `pattern` in the given `source` code.
66 ///
67 /// # Errors
68 ///
69 /// Returns [`CodemodError::Parse`] if the source or the pattern template
70 /// cannot be parsed.
71 pub fn find_matches(&self, source: &str, pattern: &Pattern) -> crate::Result<Vec<Match>> {
72 // 1. Parse the before_template to get its AST shape.
73 let template_tree = self.parse(&pattern.before_template)?;
74 // 2. Parse the target source.
75 let source_tree = self.parse(source)?;
76
77 let template_root = template_tree.root_node();
78 let source_root = source_tree.root_node();
79
80 let mut matches = Vec::new();
81
82 // 3. Walk every node in the source tree and try to match.
83 self.walk_and_match(
84 source_root,
85 template_root,
86 source,
87 &pattern.before_template,
88 &pattern.variables,
89 &mut matches,
90 );
91
92 Ok(matches)
93 }
94
95 // -----------------------------------------------------------------
96 // Internal helpers
97 // -----------------------------------------------------------------
98
99 /// Parse source text into a tree-sitter [`Tree`].
100 fn parse(&self, source: &str) -> crate::Result<Tree> {
101 let mut parser = Parser::new();
102 parser
103 .set_language(&self.language.language())
104 .map_err(|e| CodemodError::Parse(format!("Failed to set language: {e}")))?;
105 parser
106 .parse(source, None)
107 .ok_or_else(|| CodemodError::Parse("tree-sitter returned no tree".into()))
108 }
109
110 /// Recursively walk the source tree. At every named node, attempt to
111 /// unify with the template root. If the node does not match, recurse
112 /// into its children.
113 fn walk_and_match(
114 &self,
115 source_node: Node,
116 template_root: Node,
117 source: &str,
118 template_source: &str,
119 variables: &[PatternVar],
120 matches: &mut Vec<Match>,
121 ) {
122 // Try matching the current source node against the template root.
123 let mut bindings = HashMap::new();
124 if self.try_match(
125 source_node,
126 template_root,
127 source,
128 template_source,
129 variables,
130 &mut bindings,
131 ) {
132 let start = source_node.start_position();
133 let end = source_node.end_position();
134 matches.push(Match {
135 byte_range: source_node.byte_range(),
136 start_position: Position {
137 line: start.row,
138 column: start.column,
139 },
140 end_position: Position {
141 line: end.row,
142 column: end.column,
143 },
144 matched_text: source[source_node.byte_range()].to_string(),
145 bindings,
146 });
147 // Do not recurse into children of a matched node to avoid
148 // overlapping matches.
149 return;
150 }
151
152 // No match — recurse into named children.
153 let child_count = source_node.named_child_count();
154 for i in 0..child_count {
155 if let Some(child) = source_node.named_child(i) {
156 self.walk_and_match(
157 child,
158 template_root,
159 source,
160 template_source,
161 variables,
162 matches,
163 );
164 }
165 }
166 }
167
168 /// Attempt to unify `source_node` with `template_node`.
169 ///
170 /// Returns `true` if the two trees have the same shape, all literal
171 /// leaves agree, and variable positions are captured in `bindings`.
172 fn try_match(
173 &self,
174 source_node: Node,
175 template_node: Node,
176 source: &str,
177 template_source: &str,
178 variables: &[PatternVar],
179 bindings: &mut HashMap<String, String>,
180 ) -> bool {
181 let template_text = &template_source[template_node.byte_range()];
182
183 // Check if the template text at this position is a variable placeholder.
184 if let Some(var) = self.is_variable_placeholder(template_text, variables) {
185 let source_text = source[source_node.byte_range()].to_string();
186 // If this variable was already bound, the binding must be consistent.
187 if let Some(existing) = bindings.get(&var.name) {
188 return *existing == source_text;
189 }
190 bindings.insert(var.name.clone(), source_text);
191 return true;
192 }
193
194 // Kinds must agree.
195 if source_node.kind() != template_node.kind() {
196 return false;
197 }
198
199 // Leaf nodes: text must match exactly.
200 if template_node.named_child_count() == 0 && source_node.named_child_count() == 0 {
201 let s_text = &source[source_node.byte_range()];
202 return s_text == template_text;
203 }
204
205 // Structural nodes: children count must agree and each child must match.
206 let t_count = template_node.named_child_count();
207 let s_count = source_node.named_child_count();
208 if t_count != s_count {
209 return false;
210 }
211
212 for i in 0..t_count {
213 let t_child = match template_node.named_child(i) {
214 Some(c) => c,
215 None => return false,
216 };
217 let s_child = match source_node.named_child(i) {
218 Some(c) => c,
219 None => return false,
220 };
221 if !self.try_match(
222 s_child,
223 t_child,
224 source,
225 template_source,
226 variables,
227 bindings,
228 ) {
229 return false;
230 }
231 }
232
233 true
234 }
235
236 /// Check whether `text` matches a known pattern variable placeholder
237 /// (e.g. `"$var1"`).
238 fn is_variable_placeholder<'a>(
239 &self,
240 text: &str,
241 variables: &'a [PatternVar],
242 ) -> Option<&'a PatternVar> {
243 variables.iter().find(|v| v.name == text)
244 }
245}
246
247#[cfg(test)]
248mod tests {
249 use super::*;
250
251 #[test]
252 fn test_position_equality() {
253 let a = Position { line: 1, column: 4 };
254 let b = Position { line: 1, column: 4 };
255 assert_eq!(a, b);
256 }
257}