Skip to main content

thread_ast_engine/
matcher.rs

1// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com>
2// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
3// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
4//
5// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT
6
7//! # Pattern Matching Engine
8//!
9//! Core pattern matching functionality for finding and matching AST nodes.
10//!
11//! ## Key Traits and Types
12//!
13//! - [`Matcher`] - Core trait for matching AST nodes against patterns
14//! - [`MatcherExt`] - Extension trait providing utility methods like [`MatcherExt::find_node`]
15//! - [`Pattern`] - Matches nodes based on AST structure with meta-variables
16//! - [`NodeMatch`] - Result of a successful pattern match, containing the matched node and captured variables
17//!
18//! ## Pattern Types
19//!
20//! The engine supports several types of matchers:
21//!
22//! - **`Pattern`** - Structural matching based on AST shape (most common)
23//! - **`KindMatcher`** - Simple matching based on node type/kind
24//! - **`RegexMatcher`** - Text-based matching using regular expressions
25//! - **`MatchAll`** / **`MatchNone`** - Utility matchers for always/never matching
26//!
27//! ## Examples
28//!
29//! ### Basic Pattern Matching
30//!
31//! ```rust,no_run
32//! # use thread_ast_engine::Language;
33//! # use thread_ast_engine::tree_sitter::LanguageExt;
34//! # use thread_ast_engine::matcher::MatcherExt;
35//! let ast = Language::Tsx.ast_grep("let x = 42;");
36//! let root = ast.root();
37//!
38//! // Find variable declarations
39//! if let Some(decl) = root.find("let $VAR = $VALUE") {
40//!     let var_name = decl.get_env().get_match("VAR").unwrap();
41//!     let value = decl.get_env().get_match("VALUE").unwrap();
42//!     println!("Variable {} = {}", var_name.text(), value.text());
43//! }
44//! ```
45//!
46//! ### Finding Multiple Matches
47//!
48//! ```rust,no_run
49//! # use thread_ast_engine::Language;
50//! # use thread_ast_engine::tree_sitter::LanguageExt;
51//! # use thread_ast_engine::matcher::MatcherExt;
52//! let code = "let a = 1; let b = 2; let c = 3;";
53//! let ast = Language::Tsx.ast_grep(code);
54//! let root = ast.root();
55//!
56//! // Find all variable declarations
57//! for decl in root.find_all("let $VAR = $VALUE") {
58//!     let var_name = decl.get_env().get_match("VAR").unwrap();
59//!     println!("Found variable: {}", var_name.text());
60//! }
61//! ```
62//!
63//! ### `NodeMatch`
64//!
65//! #### Pattern Match Results with Meta-Variable Capture
66//!
67//! Contains the implementation for the [`NodeMatch`] type that represents
68//! the result of a successful pattern match, including both the matched AST node and
69//! any captured meta-variables.
70//!
71//! When a pattern like `"function $NAME($$$PARAMS) { $$$BODY }"` matches an AST node,
72//! it creates a [`NodeMatch`] that stores:
73//! - The matched node (the function declaration)
74//! - The captured variables (`$NAME`, `$PARAMS`, `$BODY`)
75//!
76//! #### Key Features
77//!
78//! - **Node access**: Use like a regular [`Node`] through [`Deref`]
79//! - **Meta-variable access**: Get captured variables via [`NodeMatch::get_env`]
80//! - **Code replacement**: Generate edits with [`NodeMatch::replace_by`]
81//! - **Type safety**: Lifetime-bound to ensure memory safety
82//!
83//! #### Example Usage
84//!
85//! ```rust,ignore
86//! // Find function declarations
87//! let matches = root.find_all("function $NAME($$$PARAMS) { $$$BODY }");
88//!
89//! for match_ in matches {
90//!     // Use as a regular node
91//!     println!("Function at line {}", match_.start_pos().line());
92//!
93//!     // Access captured meta-variables
94//!     let env = match_.get_env();
95//!     let name = env.get_match("NAME").unwrap();
96//!     println!("Function name: {}", name.text());
97//!
98//!     // Generate replacement code
99//!     let edit = match_.replace_by("async function $NAME($$$PARAMS) { $$$BODY }");
100//! }
101//! ```
102
103use crate::{Doc, Node, meta_var::MetaVarEnv, source::Edit as E};
104
105pub use crate::matchers::kind::*;
106pub use crate::matchers::matcher::{Matcher, MatcherExt, NodeMatch};
107pub use crate::matchers::pattern::*;
108pub use crate::matchers::text::*;
109use bit_set::BitSet;
110use std::any::TypeId;
111use std::borrow::{Borrow, Cow};
112use std::cell::RefCell;
113use std::collections::HashMap;
114use std::ops::Deref;
115
116use crate::replacer::Replacer;
117
118/// Thread-local cache for compiled patterns, keyed by (`pattern_source`, `language_type_id`).
119///
120/// Pattern compilation via `Pattern::try_new` involves tree-sitter parsing which is
121/// expensive (~100µs). This cache eliminates redundant compilations when the same
122/// pattern string is used repeatedly (common in rule-based scanning), providing
123/// up to 100x speedup on cache hits.
124///
125/// The cache is bounded to `PATTERN_CACHE_MAX_SIZE` entries per thread and uses
126/// LRU-style eviction (full clear when capacity is exceeded, which is rare in
127/// practice since pattern sets are typically small and stable).
128const PATTERN_CACHE_MAX_SIZE: usize = 256;
129
130thread_local! {
131    static PATTERN_CACHE: RefCell<HashMap<(String, TypeId), Pattern>> =
132        RefCell::new(HashMap::with_capacity(32));
133}
134
135/// Look up or compile a pattern, caching the result per-thread.
136///
137/// Returns `None` if the pattern fails to compile (same as `Pattern::try_new(...).ok()`).
138fn cached_pattern_try_new<D: Doc>(src: &str, lang: &D::Lang) -> Option<Pattern> {
139    let lang_id = TypeId::of::<D::Lang>();
140
141    PATTERN_CACHE.with(|cache| {
142        let mut cache = cache.borrow_mut();
143
144        // Check cache first
145        if let Some(pattern) = cache.get(&(src.to_string(), lang_id)) {
146            return Some(pattern.clone());
147        }
148
149        // Compile and cache on miss
150        let pattern = Pattern::try_new(src, lang).ok()?;
151
152        // Simple eviction: clear when full (rare - pattern sets are typically small)
153        if cache.len() >= PATTERN_CACHE_MAX_SIZE {
154            cache.clear();
155        }
156
157        cache.insert((src.to_string(), lang_id), pattern.clone());
158        Some(pattern)
159    })
160}
161
162type Edit<D> = E<<D as Doc>::Source>;
163
164impl<'tree, D: Doc> NodeMatch<'tree, D> {
165    pub const fn new(node: Node<'tree, D>, env: MetaVarEnv<'tree, D>) -> Self {
166        Self(node, env)
167    }
168
169    pub const fn get_node(&self) -> &Node<'tree, D> {
170        &self.0
171    }
172
173    /// Returns the populated `MetaVarEnv` for this match.
174    pub const fn get_env(&self) -> &MetaVarEnv<'tree, D> {
175        &self.1
176    }
177    pub const fn get_env_mut(&mut self) -> &mut MetaVarEnv<'tree, D> {
178        &mut self.1
179    }
180    /// # Safety
181    /// should only called for readopting nodes
182    pub(crate) const unsafe fn get_node_mut(&mut self) -> &mut Node<'tree, D> {
183        &mut self.0
184    }
185}
186
187impl<D: Doc> NodeMatch<'_, D> {
188    pub fn replace_by<R: Replacer<D>>(&self, replacer: R) -> Edit<D> {
189        let range = self.range();
190        let position = range.start;
191        let deleted_length = range.len();
192        let inserted_text = replacer.generate_replacement(self);
193        Edit::<D> {
194            position,
195            deleted_length,
196            inserted_text,
197        }
198    }
199
200    #[doc(hidden)]
201    pub fn make_edit<M, R>(&self, matcher: &M, replacer: &R) -> Edit<D>
202    where
203        M: Matcher,
204        R: Replacer<D>,
205    {
206        let range = replacer.get_replaced_range(self, matcher);
207        let inserted_text = replacer.generate_replacement(self);
208        Edit::<D> {
209            position: range.start,
210            deleted_length: range.len(),
211            inserted_text,
212        }
213    }
214}
215
216impl<'tree, D: Doc> From<Node<'tree, D>> for NodeMatch<'tree, D> {
217    fn from(node: Node<'tree, D>) -> Self {
218        Self(node, MetaVarEnv::new())
219    }
220}
221
222/// `NodeMatch` is an immutable view to Node
223impl<'tree, D: Doc> From<NodeMatch<'tree, D>> for Node<'tree, D> {
224    fn from(node_match: NodeMatch<'tree, D>) -> Self {
225        node_match.0
226    }
227}
228
229/// `NodeMatch` is an immutable view to Node
230impl<'tree, D: Doc> Deref for NodeMatch<'tree, D> {
231    type Target = Node<'tree, D>;
232    fn deref(&self) -> &Self::Target {
233        &self.0
234    }
235}
236
237/// `NodeMatch` is an immutable view to Node
238impl<'tree, D: Doc> Borrow<Node<'tree, D>> for NodeMatch<'tree, D> {
239    fn borrow(&self) -> &Node<'tree, D> {
240        &self.0
241    }
242}
243
244impl<T> MatcherExt for T
245where
246    T: Matcher,
247{
248    fn match_node<'tree, D: Doc>(&self, node: Node<'tree, D>) -> Option<NodeMatch<'tree, D>> {
249        // in future we might need to customize initial MetaVarEnv
250        let mut env = Cow::Owned(MetaVarEnv::new());
251        let node = self.match_node_with_env(node, &mut env)?;
252        Some(NodeMatch::new(node, env.into_owned()))
253    }
254
255    fn find_node<'tree, D: Doc>(&self, node: Node<'tree, D>) -> Option<NodeMatch<'tree, D>> {
256        for n in node.dfs() {
257            if let Some(ret) = self.match_node(n.clone()) {
258                return Some(ret);
259            }
260        }
261        None
262    }
263}
264
265impl Matcher for str {
266    fn match_node_with_env<'tree, D: Doc>(
267        &self,
268        node: Node<'tree, D>,
269        env: &mut Cow<MetaVarEnv<'tree, D>>,
270    ) -> Option<Node<'tree, D>> {
271        let pattern = cached_pattern_try_new::<D>(self, node.lang())?;
272        pattern.match_node_with_env(node, env)
273    }
274
275    fn get_match_len<D: Doc>(&self, node: Node<'_, D>) -> Option<usize> {
276        let pattern = cached_pattern_try_new::<D>(self, node.lang())?;
277        pattern.get_match_len(node)
278    }
279}
280
281impl<T> Matcher for &T
282where
283    T: Matcher + ?Sized,
284{
285    fn match_node_with_env<'tree, D: Doc>(
286        &self,
287        node: Node<'tree, D>,
288        env: &mut Cow<MetaVarEnv<'tree, D>>,
289    ) -> Option<Node<'tree, D>> {
290        (**self).match_node_with_env(node, env)
291    }
292
293    fn potential_kinds(&self) -> Option<BitSet> {
294        (**self).potential_kinds()
295    }
296
297    fn get_match_len<D: Doc>(&self, node: Node<'_, D>) -> Option<usize> {
298        (**self).get_match_len(node)
299    }
300}
301
302pub struct MatchAll;
303impl Matcher for MatchAll {
304    fn match_node_with_env<'tree, D: Doc>(
305        &self,
306        node: Node<'tree, D>,
307        _env: &mut Cow<MetaVarEnv<'tree, D>>,
308    ) -> Option<Node<'tree, D>> {
309        Some(node)
310    }
311
312    fn potential_kinds(&self) -> Option<BitSet> {
313        // return None to match anything
314        None
315    }
316}
317
318pub struct MatchNone;
319impl Matcher for MatchNone {
320    fn match_node_with_env<'tree, D: Doc>(
321        &self,
322        _node: Node<'tree, D>,
323        _env: &mut Cow<MetaVarEnv<'tree, D>>,
324    ) -> Option<Node<'tree, D>> {
325        None
326    }
327
328    fn potential_kinds(&self) -> Option<BitSet> {
329        // matches nothing
330        Some(BitSet::new())
331    }
332}
333
334#[cfg(test)]
335mod test {
336    use super::*;
337    use crate::language::Tsx;
338    use crate::tree_sitter::{LanguageExt, StrDoc};
339
340    fn use_node<L: LanguageExt>(n: &Node<StrDoc<L>>) -> String {
341        n.text().to_string()
342    }
343
344    fn borrow_node<'a, D, B>(b: B) -> String
345    where
346        D: Doc + 'static,
347        B: Borrow<Node<'a, D>>,
348    {
349        b.borrow().text().to_string()
350    }
351
352    #[test]
353    fn test_node_match_as_node() {
354        let root = Tsx.ast_grep("var a = 1");
355        let node = root.root();
356        let src = node.text().to_string();
357        let nm = NodeMatch::from(node);
358        let ret = use_node(&*nm);
359        assert_eq!(ret, src);
360        assert_eq!(use_node(&*nm), borrow_node(nm));
361    }
362
363    #[test]
364    fn test_node_env() {
365        let root = Tsx.ast_grep("var a = 1");
366        let find = root.root().find("var $A = 1").expect("should find");
367        let env = find.get_env();
368        let node = env.get_match("A").expect("should find");
369        assert_eq!(node.text(), "a");
370    }
371
372    #[test]
373    fn test_replace_by() {
374        let root = Tsx.ast_grep("var a = 1");
375        let find = root.root().find("var $A = 1").expect("should find");
376        let fixed = find.replace_by("var b = $A");
377        assert_eq!(fixed.position, 0);
378        assert_eq!(fixed.deleted_length, 9);
379        assert_eq!(fixed.inserted_text, "var b = a".as_bytes());
380    }
381}