Skip to main content

thread_ast_engine/matchers/
kind.rs

1// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com>
2// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
3// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
4//
5// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT
6
7//! # AST Node Kind Matching
8//!
9//! Provides matchers that filter AST nodes based on their syntactic type (kind).
10//! Every AST node has a "kind" that describes what syntax element it represents
11//! (e.g., "`function_declaration`", "identifier", "`string_literal`").
12//!
13//! ## Core Types
14//!
15//! - [`KindMatcher`] - Matches nodes of a specific syntactic type
16//! - [`KindMatcherError`] - Errors when creating matchers with invalid kinds
17//! - [`kind_utils`] - Utilities for working with node kinds
18//!
19//! ## Example Usage
20//!
21//! ```rust,ignore
22//! use thread_ast_engine::matchers::KindMatcher;
23//! use thread_ast_engine::matcher::MatcherExt;
24//!
25//! // Match all function declarations
26//! let matcher = KindMatcher::new("function_declaration", &language);
27//! let functions: Vec<_> = root.find_all(&matcher).collect();
28//!
29//! // Match parsing errors in source code
30//! let error_matcher = KindMatcher::error_matcher();
31//! let errors: Vec<_> = root.find_all(&error_matcher).collect();
32//! ```
33//!
34//! ## Node Kind Concepts
35//!
36//! - **Named nodes** - Represent actual language constructs (functions, variables, etc.)
37//! - **Anonymous nodes** - Represent punctuation and keywords (`{`, `}`, `let`, etc.)
38//! - **Error nodes** - Represent unparsable syntax (syntax errors)
39//!
40//! Kind matching is useful for:
41//! - Finding all nodes of a specific type (all functions, all classes, etc.)
42//! - Detecting syntax errors in source code
43//! - Building language-specific analysis tools
44
45use super::matcher::Matcher;
46
47use crate::language::Language;
48use crate::meta_var::MetaVarEnv;
49use crate::node::KindId;
50use crate::{Doc, Node};
51
52use std::borrow::Cow;
53
54use bit_set::BitSet;
55use thiserror::Error;
56
57// 0 is symbol_end for not found, 65535 is builtin symbol ERROR
58// see https://tree-sitter.docsforge.com/master/api/#TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
59// and https://tree-sitter.docsforge.com/master/api/ts_language_symbol_for_name/
60const TS_BUILTIN_SYM_END: KindId = 0;
61const TS_BUILTIN_SYM_ERROR: KindId = 65535;
62
63/// Errors that can occur when creating a [`KindMatcher`].
64#[derive(Debug, Error)]
65pub enum KindMatcherError {
66    /// The specified node kind name doesn't exist in the language grammar.
67    ///
68    /// This happens when you try to match a node type that isn't defined
69    /// in the tree-sitter grammar for the language.
70    #[error("Kind `{0}` is invalid.")]
71    InvalidKindName(String),
72}
73
74/// Matcher that finds AST nodes based on their syntactic type (kind).
75///
76/// `KindMatcher` is the simplest type of matcher - it matches nodes whose
77/// type matches a specific string. Every AST node has a "kind" that describes
78/// what syntax element it represents.
79///
80/// # Examples
81///
82/// ```rust,ignore
83/// // Match all function declarations
84/// let matcher = KindMatcher::new("function_declaration", &language);
85/// let functions: Vec<_> = root.find_all(&matcher).collect();
86///
87/// // Match all identifiers
88/// let id_matcher = KindMatcher::new("identifier", &language);
89/// let identifiers: Vec<_> = root.find_all(&id_matcher).collect();
90///
91/// // Find syntax errors in code
92/// let error_matcher = KindMatcher::error_matcher();
93/// let errors: Vec<_> = root.find_all(&error_matcher).collect();
94/// ```
95///
96/// # Common Node Kinds
97///
98/// The exact node kinds depend on the language, but common examples include:
99/// - `"function_declaration"` - Function definitions
100/// - `"identifier"` - Variable/function names
101/// - `"string_literal"` - String values
102/// - `"number"` - Numeric literals
103/// - `"ERROR"` - Syntax errors
104#[derive(Debug, Clone)]
105pub struct KindMatcher {
106    /// The numeric ID of the node kind to match
107    kind: KindId,
108}
109
110impl KindMatcher {
111    pub fn new<L: Language>(node_kind: &str, lang: &L) -> Self {
112        Self {
113            kind: lang.kind_to_id(node_kind),
114        }
115    }
116
117    pub fn try_new<L: Language>(node_kind: &str, lang: &L) -> Result<Self, KindMatcherError> {
118        let s = Self::new(node_kind, lang);
119        if s.is_invalid() {
120            Err(KindMatcherError::InvalidKindName(node_kind.into()))
121        } else {
122            Ok(s)
123        }
124    }
125
126    #[must_use]
127    pub const fn from_id(kind: KindId) -> Self {
128        Self { kind }
129    }
130
131    /// Whether the kind matcher contains undefined tree-sitter kind.
132    #[must_use]
133    pub const fn is_invalid(&self) -> bool {
134        self.kind == TS_BUILTIN_SYM_END
135    }
136
137    /// Construct a matcher that only matches ERROR
138    #[must_use]
139    pub const fn error_matcher() -> Self {
140        Self::from_id(TS_BUILTIN_SYM_ERROR)
141    }
142}
143
144pub mod kind_utils {
145    use super::{KindId, TS_BUILTIN_SYM_ERROR};
146
147    /// Whether the kind will match parsing error occurred in the source code.
148    ///
149    /// This is used to match parsing error in the source code.
150    /// for example, we can use `kind: ERROR` in YAML to find invalid syntax in source.
151    /// the name `is_error` implies the matcher itself is error.
152    /// But here the matcher itself is valid and it is what it matches is error.
153    #[must_use]
154    pub const fn is_error_kind(kind: KindId) -> bool {
155        kind == TS_BUILTIN_SYM_ERROR
156    }
157
158    #[must_use]
159    pub const fn are_kinds_matching(goal: KindId, candidate: KindId) -> bool {
160        goal == candidate || is_error_kind(goal)
161    }
162}
163
164impl Matcher for KindMatcher {
165    fn match_node_with_env<'tree, D: Doc>(
166        &self,
167        node: Node<'tree, D>,
168        _env: &mut Cow<MetaVarEnv<'tree, D>>,
169    ) -> Option<Node<'tree, D>> {
170        if node.kind_id() == self.kind {
171            Some(node)
172        } else {
173            None
174        }
175    }
176
177    fn potential_kinds(&self) -> Option<BitSet> {
178        let mut set = BitSet::new();
179        set.insert(self.kind.into());
180        Some(set)
181    }
182}
183
184#[cfg(test)]
185mod test {
186    use super::*;
187    use crate::language::Tsx;
188    use crate::matcher::MatcherExt;
189    use crate::{Root, tree_sitter::StrDoc};
190
191    fn pattern_node(s: &str) -> Root<StrDoc<Tsx>> {
192        Root::str(s, Tsx)
193    }
194    #[test]
195    fn test_kind_match() {
196        let kind = "public_field_definition";
197        let cand = pattern_node("class A { a = 123 }");
198        let cand = cand.root();
199        let pattern = KindMatcher::new(kind, &Tsx);
200        assert!(
201            pattern.find_node(cand.clone()).is_some(),
202            "goal: {}, candidate: {}",
203            kind,
204            cand.get_inner_node().to_sexp(),
205        );
206    }
207
208    #[test]
209    fn test_kind_non_match() {
210        let kind = "field_definition";
211        let cand = pattern_node("const a = 123");
212        let cand = cand.root();
213        let pattern = KindMatcher::new(kind, &Tsx);
214        assert!(
215            pattern.find_node(cand.clone()).is_none(),
216            "goal: {}, candidate: {}",
217            kind,
218            cand.get_inner_node().to_sexp(),
219        );
220    }
221
222    #[test]
223    fn test_kind_potential_kinds() {
224        let kind = "field_definition";
225        let matcher = KindMatcher::new(kind, &Tsx);
226        let potential_kinds = matcher
227            .potential_kinds()
228            .expect("should have potential kinds");
229        // should has exactly one potential kind
230        assert_eq!(potential_kinds.len(), 1);
231    }
232}