Skip to main content

thread_ast_engine/matchers/
text.rs

1// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com>
2// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
3// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
4//
5// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT
6
7//! # Text-Based Pattern Matching
8//!
9//! Provides regex-based matchers for finding AST nodes by their text content.
10//! Useful when you need to match nodes based on their actual text rather
11//! than their structural properties.
12//!
13//! ## Core Types
14//!
15//! - [`RegexMatcher`] - Matches nodes whose text content matches a regex pattern
16//! - [`RegexMatcherError`] - Errors from invalid regex patterns
17//!
18//! ## Example Usage
19//!
20//! ```rust,ignore
21//! // Find all nodes containing specific text patterns
22//! let number_matcher = RegexMatcher::try_new(r"\d+")?; // Numbers
23//! let email_matcher = RegexMatcher::try_new(r"[\w\.-]+@[\w\.-]+\.\w+")?; // Emails
24//!
25//! // Find all numeric literals
26//! let numbers: Vec<_> = root.find_all(&number_matcher).collect();
27//!
28//! // Find specific variable names
29//! let temp_vars = RegexMatcher::try_new(r"temp\w*")?;
30//! let temp_variables: Vec<_> = root.find_all(&temp_vars).collect();
31//! ```
32//!
33//! ## Use Cases
34//!
35//! Text matching complements structural patterns when you need to:
36//! - Find nodes with specific naming patterns
37//! - Locate hardcoded values or literals
38//! - Search for code smells in text content
39//! - Filter nodes by complex text criteria
40
41use super::matcher::Matcher;
42use crate::Doc;
43use crate::Node;
44use crate::meta_var::MetaVarEnv;
45
46use bit_set::BitSet;
47use regex::{Error as RegexError, Regex};
48use thiserror::Error;
49
50use std::borrow::Cow;
51
52/// Errors that can occur when creating a [`RegexMatcher`].
53#[derive(Debug, Error)]
54pub enum RegexMatcherError {
55    /// The provided regex pattern is invalid.
56    ///
57    /// Common causes include unbalanced parentheses, invalid escape sequences,
58    /// or unsupported regex features.
59    #[error("Parsing text matcher fails.")]
60    Regex(#[from] RegexError),
61}
62
63/// Matcher that finds AST nodes based on regex patterns applied to their text content.
64///
65/// `RegexMatcher` enables flexible text-based searching within AST nodes.
66/// It matches any node whose text content satisfies the provided regular expression.
67///
68/// # Examples
69///
70/// ```rust,ignore
71/// // Match numeric literals
72/// let numbers = RegexMatcher::try_new(r"^\d+$")?;
73/// let numeric_nodes: Vec<_> = root.find_all(&numbers).collect();
74///
75/// // Find TODO comments
76/// let todos = RegexMatcher::try_new(r"(?i)todo|fixme")?;
77/// let todo_comments: Vec<_> = root.find_all(&todos).collect();
78///
79/// // Match specific naming patterns
80/// let private_vars = RegexMatcher::try_new(r"^_\w+")?;
81/// let private_variables: Vec<_> = root.find_all(&private_vars).collect();
82/// ```
83///
84/// # Performance Note
85///
86/// Text matching requires extracting text from every tested node, which can be
87/// slower than structural matching. Consider combining with other matchers
88/// or using more specific patterns when possible.
89#[derive(Clone, Debug)]
90pub struct RegexMatcher {
91    /// Compiled regex pattern for matching node text
92    regex: Regex,
93}
94
95impl RegexMatcher {
96    pub fn try_new(text: &str) -> Result<Self, RegexMatcherError> {
97        Ok(Self {
98            regex: Regex::new(text)?,
99        })
100    }
101}
102
103impl Matcher for RegexMatcher {
104    fn match_node_with_env<'tree, D: Doc>(
105        &self,
106        node: Node<'tree, D>,
107        _env: &mut Cow<MetaVarEnv<'tree, D>>,
108    ) -> Option<Node<'tree, D>> {
109        self.regex.is_match(&node.text()).then_some(node)
110    }
111
112    fn potential_kinds(&self) -> Option<BitSet> {
113        None
114    }
115}