thread_ast_engine/matchers/types.rs
1// SPDX-FileCopyrightText: 2025 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com>
2// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
3// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
4//
5// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT
6#![allow(
7 dead_code,
8 reason = "Some fields report they're dead if the `matching` feature is not enabled."
9)]
10//! # Core Pattern Matching Types
11//!
12//! Fundamental types and traits for AST pattern matching operations.
13//!
14//! ## Key Types
15//!
16//! - [`Matcher`] - Core trait for matching AST nodes
17//! - [`Pattern`] - Structural pattern for matching AST shapes
18//! - [`MatchStrictness`] - Controls how precisely patterns must match
19//! - [`PatternNode`] - Internal representation of pattern structure
20//!
21//! ## Usage
22//!
23//! These types are available even without the `matching` feature flag enabled,
24//! allowing API definitions that reference them without requiring full
25//! implementation dependencies.
26
27use crate::Doc;
28use crate::meta_var::{MetaVarEnv, MetaVariable};
29use crate::node::Node;
30use bit_set::BitSet;
31use std::borrow::Cow;
32use thiserror::Error;
33
34/// Core trait for matching AST nodes against patterns.
35///
36/// Implementors define how to match nodes, whether by structure, content,
37/// kind, or other criteria. The matcher can also capture meta-variables
38/// during the matching process.
39///
40/// # Type Parameters
41///
42/// The trait is generic over document types to support different source
43/// encodings and language implementations.
44///
45/// # Example Implementation
46///
47/// ```rust,ignore
48/// use thread_ast_engine::Matcher;
49///
50/// struct SimpleKindMatcher {
51/// target_kind: String,
52/// }
53///
54/// impl Matcher for SimpleKindMatcher {
55/// fn match_node_with_env<'tree, D: Doc>(
56/// &self,
57/// node: Node<'tree, D>,
58/// _env: &mut Cow<MetaVarEnv<'tree, D>>,
59/// ) -> Option<Node<'tree, D>> {
60/// if node.kind() == self.target_kind {
61/// Some(node)
62/// } else {
63/// None
64/// }
65/// }
66/// }
67/// ```
68pub trait Matcher {
69 /// Attempt to match a node, updating the meta-variable environment.
70 ///
71 /// Returns the matched node if successful, or `None` if the node doesn't match.
72 /// The returned node is usually the input node, but can be different for
73 /// matchers like `Has` that match based on descendants.
74 ///
75 /// # Parameters
76 ///
77 /// - `node` - The AST node to test for matching
78 /// - `env` - Meta-variable environment to capture variables during matching
79 ///
80 /// # Returns
81 ///
82 /// The matched node if successful, otherwise `None`
83 fn match_node_with_env<'tree, D: Doc>(
84 &self,
85 _node: Node<'tree, D>,
86 _env: &mut Cow<MetaVarEnv<'tree, D>>,
87 ) -> Option<Node<'tree, D>>;
88
89 /// Provide a hint about which node types this matcher can match.
90 ///
91 /// Returns a bitset of node kind IDs that this matcher might match,
92 /// or `None` if it needs to test all node types. Used for optimization
93 /// to avoid testing matchers against incompatible nodes.
94 ///
95 /// # Returns
96 ///
97 /// - `Some(BitSet)` - Specific node kinds this matcher can match
98 /// - `None` - This matcher needs to test all node types
99 fn potential_kinds(&self) -> Option<BitSet> {
100 None
101 }
102
103 /// Determine how much of a matched node should be replaced.
104 ///
105 /// Used during replacement to determine the exact span of text to replace.
106 /// Typically skips trailing punctuation or anonymous nodes.
107 ///
108 /// # Parameters
109 ///
110 /// - `node` - The matched node
111 ///
112 /// # Returns
113 ///
114 /// Number of bytes from the node's start position to replace,
115 /// or `None` to replace the entire node.
116 fn get_match_len<D: Doc>(&self, _node: Node<'_, D>) -> Option<usize> {
117 None
118 }
119}
120
121/// Extension trait providing convenient utility methods for [`Matcher`] implementations.
122///
123/// Automatically implemented for all types that implement [`Matcher`]. Provides
124/// higher-level operations like finding nodes and working with meta-variable environments.
125///
126/// # Important
127///
128/// You should not implement this trait manually - it's automatically implemented
129/// for all [`Matcher`] types.
130///
131/// # Example
132///
133/// ```rust,no_run
134/// # use thread_ast_engine::Language;
135/// # use thread_ast_engine::tree_sitter::LanguageExt;
136/// # use thread_ast_engine::MatcherExt;
137/// let ast = Language::Tsx.ast_grep("const x = 42;");
138/// let root = ast.root();
139///
140/// // Use MatcherExt methods
141/// if let Some(node_match) = root.find("const $VAR = $VALUE") {
142/// println!("Found constant declaration");
143/// }
144/// ```
145pub trait MatcherExt: Matcher {
146 fn match_node<'tree, D: Doc>(&self, node: Node<'tree, D>) -> Option<NodeMatch<'tree, D>>;
147
148 fn find_node<'tree, D: Doc>(&self, node: Node<'tree, D>) -> Option<NodeMatch<'tree, D>>;
149}
150
151/// Result of a successful pattern match containing the matched node and captured variables.
152///
153/// `NodeMatch` combines an AST node with the meta-variables captured during
154/// pattern matching. It acts like a regular [`Node`] (through [`Deref`]) while
155/// also providing access to captured variables through [`get_env`].
156///
157/// # Lifetime
158///
159/// The lifetime `'t` ties the match to its source document, ensuring memory safety.
160///
161/// # Usage Patterns
162///
163/// ```rust,ignore
164/// // Use as a regular node
165/// let text = node_match.text();
166/// let position = node_match.start_pos();
167///
168/// // Access captured meta-variables
169/// let env = node_match.get_env();
170/// let captured_name = env.get_match("VAR_NAME").unwrap();
171///
172/// // Generate replacement code
173/// let edit = node_match.replace_by("new code with $VAR_NAME");
174/// ```
175///
176/// # Type Parameters
177///
178/// - `'t` - Lifetime tied to the source document
179/// - `D: Doc` - Document type containing the source and language info
180#[derive(Clone, Debug)]
181#[cfg_attr(not(feature = "matching"), allow(dead_code))]
182pub struct NodeMatch<'t, D: Doc>(pub(crate) Node<'t, D>, pub(crate) MetaVarEnv<'t, D>);
183
184/// Controls how precisely patterns must match AST structure.
185///
186/// Different strictness levels allow patterns to match with varying degrees
187/// of precision, from exact CST matching to loose structural matching.
188///
189/// # Variants
190///
191/// - **`Cst`** - All nodes must match exactly (concrete syntax tree)
192/// - **`Smart`** - Matches meaningful nodes, ignoring trivial syntax
193/// - **`Ast`** - Only structural nodes matter (abstract syntax tree)
194/// - **`Relaxed`** - Ignores comments and focuses on code structure
195/// - **`Signature`** - Matches structure only, ignoring all text content
196///
197/// # Example
198///
199/// ```rust,ignore
200/// // With Cst strictness, these would be different:
201/// // "let x=42;" vs "let x = 42;"
202/// //
203/// // With Ast strictness, they match the same pattern:
204/// // "let $VAR = $VALUE"
205/// ```
206#[derive(Clone, Copy, Debug)]
207pub enum MatchStrictness {
208 /// Match all nodes exactly (Concrete Syntax Tree)
209 Cst,
210 /// Match all nodes except trivial syntax elements
211 Smart,
212 /// Match only structural AST nodes (Abstract Syntax Tree)
213 Ast,
214 /// Match AST nodes while ignoring comments
215 Relaxed,
216 /// Match structure only, ignoring all text content
217 Signature,
218}
219
220/// Structural pattern for matching AST nodes based on their shape and content.
221///
222/// Patterns represent code structures with support for meta-variables (like `$VAR`)
223/// that can capture parts of the matched code. They're built from source code strings
224/// and compiled into efficient matching structures.
225///
226/// # Example
227///
228/// ```rust,ignore
229/// // Pattern for variable declarations
230/// let pattern = Pattern::new("let $NAME = $VALUE", language);
231///
232/// // Can match: "let x = 42", "let result = calculate()", etc.
233/// ```
234#[derive(Clone)]
235pub struct Pattern {
236 /// The root pattern node containing the matching logic
237 pub node: PatternNode,
238 /// Optional hint about the root node kind for optimization
239 pub(crate) root_kind: Option<u16>,
240 /// How strictly the pattern should match
241 pub strictness: MatchStrictness,
242}
243
244/// Builder for constructing patterns from source code.
245///
246/// Handles parsing pattern strings into [`Pattern`] structures,
247/// with optional contextual information for more precise matching.
248#[derive(Clone, Debug)]
249pub struct PatternBuilder<'a> {
250 /// Optional CSS-like selector for contextual matching
251 pub(crate) selector: Option<&'a str>,
252 /// The pattern source code
253 pub(crate) src: Cow<'a, str>,
254}
255
256/// Internal representation of a pattern's structure.
257///
258/// Patterns are compiled into a tree of `PatternNode` elements that
259/// efficiently represent the matching logic for different AST structures.
260#[derive(Clone)]
261pub enum PatternNode {
262 /// Meta-variable that captures matched content
263 MetaVar {
264 /// The meta-variable specification (e.g., `$VAR`, `$$$ITEMS`)
265 meta_var: MetaVariable,
266 },
267 /// Leaf node with specific text content
268 Terminal {
269 /// Expected text content
270 text: String,
271 /// Whether this represents a named AST node
272 is_named: bool,
273 /// Node type identifier
274 kind_id: u16,
275 },
276 /// Internal node with child patterns
277 Internal {
278 /// Node type identifier
279 kind_id: u16,
280 /// Child pattern nodes
281 children: Vec<Self>,
282 },
283}
284
285#[derive(Debug, Error)]
286pub enum PatternError {
287 #[error("Fails to parse the pattern query: `{0}`")]
288 Parse(String),
289 #[error("No AST root is detected. Please check the pattern source `{0}`.")]
290 NoContent(String),
291 #[error("Multiple AST nodes are detected. Please check the pattern source `{0}`.")]
292 MultipleNode(String),
293 #[error(transparent)]
294 #[cfg(feature = "matching")]
295 InvalidKind(#[from] super::kind::KindMatcherError),
296 #[error(
297 "Fails to create Contextual pattern: selector `{selector}` matches no node in the context `{context}`."
298 )]
299 NoSelectorInContext { context: String, selector: String },
300}