thread_ast_engine/matchers/kind.rs
1// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com>
2// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
3// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
4//
5// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT
6
7//! # AST Node Kind Matching
8//!
9//! Provides matchers that filter AST nodes based on their syntactic type (kind).
10//! Every AST node has a "kind" that describes what syntax element it represents
11//! (e.g., "`function_declaration`", "identifier", "`string_literal`").
12//!
13//! ## Core Types
14//!
15//! - [`KindMatcher`] - Matches nodes of a specific syntactic type
16//! - [`KindMatcherError`] - Errors when creating matchers with invalid kinds
17//! - [`kind_utils`] - Utilities for working with node kinds
18//!
19//! ## Example Usage
20//!
21//! ```rust,ignore
22//! use thread_ast_engine::matchers::KindMatcher;
23//! use thread_ast_engine::matcher::MatcherExt;
24//!
25//! // Match all function declarations
26//! let matcher = KindMatcher::new("function_declaration", &language);
27//! let functions: Vec<_> = root.find_all(&matcher).collect();
28//!
29//! // Match parsing errors in source code
30//! let error_matcher = KindMatcher::error_matcher();
31//! let errors: Vec<_> = root.find_all(&error_matcher).collect();
32//! ```
33//!
34//! ## Node Kind Concepts
35//!
36//! - **Named nodes** - Represent actual language constructs (functions, variables, etc.)
37//! - **Anonymous nodes** - Represent punctuation and keywords (`{`, `}`, `let`, etc.)
38//! - **Error nodes** - Represent unparsable syntax (syntax errors)
39//!
40//! Kind matching is useful for:
41//! - Finding all nodes of a specific type (all functions, all classes, etc.)
42//! - Detecting syntax errors in source code
43//! - Building language-specific analysis tools
44
45use super::matcher::Matcher;
46
47use crate::language::Language;
48use crate::meta_var::MetaVarEnv;
49use crate::node::KindId;
50use crate::{Doc, Node};
51
52use std::borrow::Cow;
53
54use bit_set::BitSet;
55use thiserror::Error;
56
57// 0 is symbol_end for not found, 65535 is builtin symbol ERROR
58// see https://tree-sitter.docsforge.com/master/api/#TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
59// and https://tree-sitter.docsforge.com/master/api/ts_language_symbol_for_name/
60const TS_BUILTIN_SYM_END: KindId = 0;
61const TS_BUILTIN_SYM_ERROR: KindId = 65535;
62
63/// Errors that can occur when creating a [`KindMatcher`].
64#[derive(Debug, Error)]
65pub enum KindMatcherError {
66 /// The specified node kind name doesn't exist in the language grammar.
67 ///
68 /// This happens when you try to match a node type that isn't defined
69 /// in the tree-sitter grammar for the language.
70 #[error("Kind `{0}` is invalid.")]
71 InvalidKindName(String),
72}
73
74/// Matcher that finds AST nodes based on their syntactic type (kind).
75///
76/// `KindMatcher` is the simplest type of matcher - it matches nodes whose
77/// type matches a specific string. Every AST node has a "kind" that describes
78/// what syntax element it represents.
79///
80/// # Examples
81///
82/// ```rust,ignore
83/// // Match all function declarations
84/// let matcher = KindMatcher::new("function_declaration", &language);
85/// let functions: Vec<_> = root.find_all(&matcher).collect();
86///
87/// // Match all identifiers
88/// let id_matcher = KindMatcher::new("identifier", &language);
89/// let identifiers: Vec<_> = root.find_all(&id_matcher).collect();
90///
91/// // Find syntax errors in code
92/// let error_matcher = KindMatcher::error_matcher();
93/// let errors: Vec<_> = root.find_all(&error_matcher).collect();
94/// ```
95///
96/// # Common Node Kinds
97///
98/// The exact node kinds depend on the language, but common examples include:
99/// - `"function_declaration"` - Function definitions
100/// - `"identifier"` - Variable/function names
101/// - `"string_literal"` - String values
102/// - `"number"` - Numeric literals
103/// - `"ERROR"` - Syntax errors
104#[derive(Debug, Clone)]
105pub struct KindMatcher {
106 /// The numeric ID of the node kind to match
107 kind: KindId,
108}
109
110impl KindMatcher {
111 pub fn new<L: Language>(node_kind: &str, lang: &L) -> Self {
112 Self {
113 kind: lang.kind_to_id(node_kind),
114 }
115 }
116
117 pub fn try_new<L: Language>(node_kind: &str, lang: &L) -> Result<Self, KindMatcherError> {
118 let s = Self::new(node_kind, lang);
119 if s.is_invalid() {
120 Err(KindMatcherError::InvalidKindName(node_kind.into()))
121 } else {
122 Ok(s)
123 }
124 }
125
126 #[must_use]
127 pub const fn from_id(kind: KindId) -> Self {
128 Self { kind }
129 }
130
131 /// Whether the kind matcher contains undefined tree-sitter kind.
132 #[must_use]
133 pub const fn is_invalid(&self) -> bool {
134 self.kind == TS_BUILTIN_SYM_END
135 }
136
137 /// Construct a matcher that only matches ERROR
138 #[must_use]
139 pub const fn error_matcher() -> Self {
140 Self::from_id(TS_BUILTIN_SYM_ERROR)
141 }
142}
143
144pub mod kind_utils {
145 use super::{KindId, TS_BUILTIN_SYM_ERROR};
146
147 /// Whether the kind will match parsing error occurred in the source code.
148 ///
149 /// This is used to match parsing error in the source code.
150 /// for example, we can use `kind: ERROR` in YAML to find invalid syntax in source.
151 /// the name `is_error` implies the matcher itself is error.
152 /// But here the matcher itself is valid and it is what it matches is error.
153 #[must_use]
154 pub const fn is_error_kind(kind: KindId) -> bool {
155 kind == TS_BUILTIN_SYM_ERROR
156 }
157
158 #[must_use]
159 pub const fn are_kinds_matching(goal: KindId, candidate: KindId) -> bool {
160 goal == candidate || is_error_kind(goal)
161 }
162}
163
164impl Matcher for KindMatcher {
165 fn match_node_with_env<'tree, D: Doc>(
166 &self,
167 node: Node<'tree, D>,
168 _env: &mut Cow<MetaVarEnv<'tree, D>>,
169 ) -> Option<Node<'tree, D>> {
170 if node.kind_id() == self.kind {
171 Some(node)
172 } else {
173 None
174 }
175 }
176
177 fn potential_kinds(&self) -> Option<BitSet> {
178 let mut set = BitSet::new();
179 set.insert(self.kind.into());
180 Some(set)
181 }
182}
183
184#[cfg(test)]
185mod test {
186 use super::*;
187 use crate::language::Tsx;
188 use crate::matcher::MatcherExt;
189 use crate::{Root, tree_sitter::StrDoc};
190
191 fn pattern_node(s: &str) -> Root<StrDoc<Tsx>> {
192 Root::str(s, Tsx)
193 }
194 #[test]
195 fn test_kind_match() {
196 let kind = "public_field_definition";
197 let cand = pattern_node("class A { a = 123 }");
198 let cand = cand.root();
199 let pattern = KindMatcher::new(kind, &Tsx);
200 assert!(
201 pattern.find_node(cand.clone()).is_some(),
202 "goal: {}, candidate: {}",
203 kind,
204 cand.get_inner_node().to_sexp(),
205 );
206 }
207
208 #[test]
209 fn test_kind_non_match() {
210 let kind = "field_definition";
211 let cand = pattern_node("const a = 123");
212 let cand = cand.root();
213 let pattern = KindMatcher::new(kind, &Tsx);
214 assert!(
215 pattern.find_node(cand.clone()).is_none(),
216 "goal: {}, candidate: {}",
217 kind,
218 cand.get_inner_node().to_sexp(),
219 );
220 }
221
222 #[test]
223 fn test_kind_potential_kinds() {
224 let kind = "field_definition";
225 let matcher = KindMatcher::new(kind, &Tsx);
226 let potential_kinds = matcher
227 .potential_kinds()
228 .expect("should have potential kinds");
229 // should has exactly one potential kind
230 assert_eq!(potential_kinds.len(), 1);
231 }
232}