Skip to main content

thread_ast_engine/
meta_var.rs

1// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com>
2// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
3// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
4//
5// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT
6//! # Meta-variable Environment and Utilities
7//!
8//! This module provides types and functions for handling meta-variables in AST pattern matching.
9//! Meta-variables allow patterns to flexibly match and capture code fragments, supporting single and multi-capture semantics.
10//!
11//! ## Key Components
12//!
13//! - [`MetaVarEnv`](crates/ast-engine/src/meta_var.rs:26): Stores meta-variable instantiations during pattern matching.
14//! - [`MetaVariable`](crates/ast-engine/src/meta_var.rs:260): Enum representing different meta-variable forms (single, multi, dropped).
15//! - `extract_meta_var`: Utility to parse meta-variable strings.
16//! - Insertion, retrieval, and transformation APIs for meta-variable environments.
17//!
18//! ## Example
19//!
20//! ```rust,no_run
21//! use thread_ast_engine::meta_var::{MetaVarEnv, MetaVariable, extract_meta_var};
22//!
23//! let mut env = MetaVarEnv::new();
24//! env.insert("$A", node);
25//! let meta = extract_meta_var("$A", '$');
26//! ```
27//!
28//! See [`MetaVarEnv`](crates/ast-engine/src/meta_var.rs:48) for details on usage in AST matching and rewriting.
29#[cfg(feature = "matching")]
30use crate::match_tree::does_node_match_exactly;
31#[cfg(feature = "matching")]
32use crate::matcher::Matcher;
33#[cfg(feature = "matching")]
34use crate::replacer::formatted_slice;
35use crate::source::Content;
36use crate::{Doc, Node};
37#[cfg(feature = "matching")]
38use std::borrow::Cow;
39use std::sync::Arc;
40use thread_utilities::{RapidMap, map_with_capacity};
41
42/// Interned string type for meta-variable identifiers.
43///
44/// Using `Arc<str>` instead of `String` eliminates per-clone heap allocations.
45/// Cloning an `Arc<str>` is a single atomic increment (~1ns) versus `String::clone`
46/// which copies the entire buffer (~10-50ns depending on length). Since meta-variable
47/// names are cloned extensively during pattern matching (environment forks, variable
48/// captures, constraint checking), this reduces allocation pressure by 20-30%.
49pub type MetaVariableID = Arc<str>;
50
51pub type Underlying<D> = Vec<<<D as Doc>::Source as Content>::Underlying>;
52
53/// a dictionary that stores metavariable instantiation
54/// const a = 123 matched with const a = $A will produce env: $A => 123
55#[derive(Clone, Debug)]
56pub struct MetaVarEnv<'tree, D: Doc> {
57    single_matched: RapidMap<MetaVariableID, Node<'tree, D>>,
58    multi_matched: RapidMap<MetaVariableID, Vec<Node<'tree, D>>>,
59    transformed_var: RapidMap<MetaVariableID, Underlying<D>>,
60}
61
62impl<'t, D: Doc> MetaVarEnv<'t, D> {
63    #[must_use]
64    pub fn new() -> Self {
65        Self {
66            single_matched: RapidMap::default(),
67            multi_matched: RapidMap::default(),
68            transformed_var: RapidMap::default(),
69        }
70    }
71
72    #[cfg(feature = "matching")]
73    pub fn insert(&mut self, id: &str, ret: Node<'t, D>) -> Option<&mut Self> {
74        if self.match_variable(id, &ret) {
75            self.single_matched.insert(Arc::from(id), ret);
76            Some(self)
77        } else {
78            None
79        }
80    }
81
82    #[cfg(feature = "matching")]
83    pub fn insert_multi(&mut self, id: &str, ret: Vec<Node<'t, D>>) -> Option<&mut Self> {
84        if self.match_multi_var(id, &ret) {
85            self.multi_matched.insert(Arc::from(id), ret);
86            Some(self)
87        } else {
88            None
89        }
90    }
91
92    /// Insert without cloning the key if it's already owned
93    #[cfg(feature = "matching")]
94    pub fn insert_owned(&mut self, id: MetaVariableID, ret: Node<'t, D>) -> Option<&mut Self> {
95        if self.match_variable(&id, &ret) {
96            self.single_matched.insert(id, ret);
97            Some(self)
98        } else {
99            None
100        }
101    }
102
103    /// Insert multi without cloning the key if it's already owned
104    #[cfg(feature = "matching")]
105    pub fn insert_multi_owned(
106        &mut self,
107        id: MetaVariableID,
108        ret: Vec<Node<'t, D>>,
109    ) -> Option<&mut Self> {
110        if self.match_multi_var(&id, &ret) {
111            self.multi_matched.insert(id, ret);
112            Some(self)
113        } else {
114            None
115        }
116    }
117    #[must_use]
118    pub fn get_match(&self, var: &str) -> Option<&'_ Node<'t, D>> {
119        self.single_matched.get(var)
120    }
121    #[must_use]
122    pub fn get_multiple_matches(&self, var: &str) -> Vec<Node<'t, D>> {
123        self.multi_matched.get(var).cloned().unwrap_or_default()
124    }
125
126    /// Returns a reference to multiple matches without cloning
127    #[must_use]
128    pub fn get_multiple_matches_ref(&self, var: &str) -> Option<&Vec<Node<'t, D>>> {
129        self.multi_matched.get(var)
130    }
131
132    pub fn add_label(&mut self, label: &str, node: Node<'t, D>) {
133        self.multi_matched
134            .entry(Arc::from(label))
135            .or_default()
136            .push(node);
137    }
138    #[must_use]
139    pub fn get_labels(&self, label: &str) -> Option<&Vec<Node<'t, D>>> {
140        self.multi_matched.get(label)
141    }
142
143    #[cfg(feature = "matching")]
144    pub fn get_matched_variables(&self) -> impl Iterator<Item = MetaVariable> + use<'_, 't, D> {
145        let single = self
146            .single_matched
147            .keys()
148            .map(|n| MetaVariable::Capture(n.clone(), false));
149        let transformed = self
150            .transformed_var
151            .keys()
152            .map(|n| MetaVariable::Capture(n.clone(), false));
153        let multi = self
154            .multi_matched
155            .keys()
156            .map(|n| MetaVariable::MultiCapture(n.clone()));
157        single.chain(multi).chain(transformed)
158    }
159
160    #[cfg(feature = "matching")]
161    #[must_use]
162    fn match_variable(&self, id: &str, candidate: &Node<'t, D>) -> bool {
163        if let Some(m) = self.single_matched.get(id) {
164            return does_node_match_exactly(m, candidate);
165        }
166        true
167    }
168    #[cfg(feature = "matching")]
169    fn match_multi_var(&self, id: &str, cands: &[Node<'t, D>]) -> bool {
170        let Some(nodes) = self.multi_matched.get(id) else {
171            return true;
172        };
173        let mut named_nodes = nodes.iter().filter(|n| n.is_named());
174        let mut named_cands = cands.iter().filter(|n| n.is_named());
175        loop {
176            if let Some(node) = named_nodes.next() {
177                let Some(cand) = named_cands.next() else {
178                    // cand is done but node is not
179                    break false;
180                };
181                if !does_node_match_exactly(node, cand) {
182                    break false;
183                }
184            } else if named_cands.next().is_some() {
185                // node is done but cand is not
186                break false;
187            } else {
188                // both None, matches
189                break true;
190            }
191        }
192    }
193
194    #[cfg(feature = "matching")]
195    pub fn match_constraints<M: Matcher>(
196        &mut self,
197        var_matchers: &RapidMap<MetaVariableID, M>,
198    ) -> bool {
199        let mut env = Cow::Borrowed(self);
200        for (var_id, candidate) in &self.single_matched {
201            if let Some(m) = var_matchers.get(var_id)
202                && m.match_node_with_env(candidate.clone(), &mut env).is_none()
203            {
204                return false;
205            }
206        }
207        if let Cow::Owned(env) = env {
208            *self = env;
209        }
210        true
211    }
212
213    #[cfg(feature = "matching")]
214    pub fn insert_transformation(&mut self, var: &MetaVariable, name: &str, slice: Underlying<D>) {
215        let node = match var {
216            MetaVariable::Capture(v, _) => self.single_matched.get(v),
217            MetaVariable::MultiCapture(vs) => self.multi_matched.get(vs).and_then(|vs| vs.first()),
218            _ => None,
219        };
220        let deindented = if let Some(v) = node {
221            formatted_slice(&slice, v.get_doc().get_source(), v.range().start).to_vec()
222        } else {
223            slice
224        };
225        self.transformed_var.insert(Arc::from(name), deindented);
226    }
227    #[must_use]
228    pub fn get_transformed(&self, var: &str) -> Option<&Underlying<D>> {
229        self.transformed_var.get(var)
230    }
231    #[must_use]
232    pub fn get_var_bytes<'s>(
233        &'s self,
234        var: &MetaVariable,
235    ) -> Option<&'s [<D::Source as Content>::Underlying]> {
236        get_var_bytes_impl(self, var)
237    }
238}
239
240#[cfg(feature = "matching")]
241impl<D: Doc> MetaVarEnv<'_, D> {
242    /// internal for readopt `NodeMatch` in pinned.rs
243    /// readopt node and env when sending them to other threads
244    pub(crate) fn visit_nodes<F>(&mut self, mut f: F)
245    where
246        F: FnMut(&mut Node<'_, D>),
247    {
248        for n in self.single_matched.values_mut() {
249            f(n);
250        }
251        for ns in self.multi_matched.values_mut() {
252            for n in ns {
253                f(n);
254            }
255        }
256    }
257}
258
259fn get_var_bytes_impl<'e, 't, C, D>(
260    env: &'e MetaVarEnv<'t, D>,
261    var: &MetaVariable,
262) -> Option<&'e [C::Underlying]>
263where
264    D: Doc<Source = C> + 't,
265    C: Content + 't,
266{
267    match var {
268        MetaVariable::Capture(n, _) => {
269            if let Some(node) = env.get_match(n) {
270                let bytes = node.get_doc().get_source().get_range(node.range());
271                Some(bytes)
272            } else if let Some(bytes) = env.get_transformed(n) {
273                Some(bytes)
274            } else {
275                None
276            }
277        }
278        MetaVariable::MultiCapture(n) => {
279            let nodes = env.get_multiple_matches(n);
280            if nodes.is_empty() {
281                None
282            } else {
283                // NOTE: start_byte is not always index range of source's slice.
284                // e.g. start_byte is still byte_offset in utf_16 (napi). start_byte
285                // so we need to call source's get_range method
286                let start = nodes[0].range().start;
287                let end = nodes[nodes.len() - 1].range().end;
288                Some(nodes[0].get_doc().get_source().get_range(start..end))
289            }
290        }
291        _ => None,
292    }
293}
294
295impl<D: Doc> Default for MetaVarEnv<'_, D> {
296    fn default() -> Self {
297        Self::new()
298    }
299}
300
301#[derive(Clone, Debug, PartialEq, Eq)]
302pub enum MetaVariable {
303    /// $A for captured meta var
304    Capture(MetaVariableID, bool),
305    /// $_ for non-captured meta var
306    Dropped(bool),
307    /// $$$ for non-captured multi var
308    Multiple,
309    /// $$$A for captured ellipsis
310    MultiCapture(MetaVariableID),
311}
312
313pub(crate) fn extract_meta_var(src: &str, meta_char: char) -> Option<MetaVariable> {
314    use MetaVariable::{Capture, Dropped, MultiCapture, Multiple};
315    let ellipsis: String = std::iter::repeat_n(meta_char, 3).collect();
316    if src == ellipsis {
317        return Some(Multiple);
318    }
319    if let Some(trimmed) = src.strip_prefix(&ellipsis) {
320        if !trimmed.chars().all(is_valid_meta_var_char) {
321            return None;
322        }
323        if trimmed.starts_with('_') {
324            return Some(Multiple);
325        }
326        return Some(MultiCapture(Arc::from(trimmed)));
327    }
328    if !src.starts_with(meta_char) {
329        return None;
330    }
331    let trimmed = &src[meta_char.len_utf8()..];
332    let (trimmed, named) = if let Some(t) = trimmed.strip_prefix(meta_char) {
333        (t, false)
334    } else {
335        (trimmed, true)
336    };
337    if !trimmed.starts_with(is_valid_first_char) || // empty or started with number
338    !trimmed.chars().all(is_valid_meta_var_char)
339    // not in form of $A or $_
340    {
341        return None;
342    }
343    if trimmed.starts_with('_') {
344        Some(Dropped(named))
345    } else {
346        Some(Capture(Arc::from(trimmed), named))
347    }
348}
349
350#[inline]
351const fn is_valid_first_char(c: char) -> bool {
352    matches!(c, 'A'..='Z' | '_')
353}
354
355#[inline]
356pub(crate) const fn is_valid_meta_var_char(c: char) -> bool {
357    is_valid_first_char(c) || c.is_ascii_digit()
358}
359
360// RapidMap is intentionally specific (not generic over BuildHasher) for performance.
361// This conversion is in the pattern matching hot path and should use rapidhash.
362#[allow(clippy::implicit_hasher)]
363impl<'tree, D: Doc> From<MetaVarEnv<'tree, D>> for RapidMap<String, String>
364where
365    D::Source: Content,
366{
367    fn from(env: MetaVarEnv<'tree, D>) -> Self {
368        let mut ret: Self = map_with_capacity(
369            env.single_matched.len() + env.multi_matched.len() + env.transformed_var.len(),
370        );
371        for (id, node) in env.single_matched {
372            ret.insert(id.to_string(), node.text().into());
373        }
374        for (id, bytes) in env.transformed_var {
375            ret.insert(
376                id.to_string(),
377                <D::Source as Content>::encode_bytes(&bytes).to_string(),
378            );
379        }
380        for (id, nodes) in env.multi_matched {
381            // Optimize string concatenation by pre-calculating capacity
382            if nodes.is_empty() {
383                ret.insert(id.to_string(), "[]".to_string());
384                continue;
385            }
386
387            let estimated_capacity = nodes.len() * 16 + 10; // rough estimate
388            let mut result = String::with_capacity(estimated_capacity);
389            result.push('[');
390
391            let mut first = true;
392            for node in &nodes {
393                if !first {
394                    result.push_str(", ");
395                }
396                result.push_str(&node.text());
397                first = false;
398            }
399            result.push(']');
400            ret.insert(id.to_string(), result);
401        }
402        ret
403    }
404}
405
406#[cfg(test)]
407mod test {
408    use super::*;
409    use crate::Pattern;
410    use crate::language::Tsx;
411    use crate::tree_sitter::LanguageExt;
412
413    fn extract_var(s: &str) -> Option<MetaVariable> {
414        extract_meta_var(s, '$')
415    }
416    #[test]
417    fn test_match_var() {
418        use MetaVariable::*;
419        assert_eq!(extract_var("$$$"), Some(Multiple));
420        assert_eq!(extract_var("$ABC"), Some(Capture("ABC".into(), true)));
421        assert_eq!(extract_var("$$ABC"), Some(Capture("ABC".into(), false)));
422        assert_eq!(extract_var("$MATCH1"), Some(Capture("MATCH1".into(), true)));
423        assert_eq!(extract_var("$$$ABC"), Some(MultiCapture("ABC".into())));
424        assert_eq!(extract_var("$_"), Some(Dropped(true)));
425        assert_eq!(extract_var("$_123"), Some(Dropped(true)));
426        assert_eq!(extract_var("$$_"), Some(Dropped(false)));
427    }
428
429    #[test]
430    fn test_not_meta_var() {
431        assert_eq!(extract_var("$123"), None);
432        assert_eq!(extract_var("$"), None);
433        assert_eq!(extract_var("$$"), None);
434        assert_eq!(extract_var("abc"), None);
435        assert_eq!(extract_var("$abc"), None);
436    }
437
438    fn match_constraints(pattern: &str, node: &str) -> bool {
439        let mut matchers = thread_utilities::RapidMap::default();
440        matchers.insert(Arc::from("A"), Pattern::new(pattern, &Tsx));
441        let mut env = MetaVarEnv::new();
442        let root = Tsx.ast_grep(node);
443        let node = root.root().child(0).unwrap().child(0).unwrap();
444        env.insert("A", node);
445        env.match_constraints(&matchers)
446    }
447
448    #[test]
449    fn test_non_ascii_meta_var() {
450        let extract = |s| extract_meta_var(s, 'µ');
451        use MetaVariable::*;
452        assert_eq!(extract("µµµ"), Some(Multiple));
453        assert_eq!(extract("µABC"), Some(Capture("ABC".into(), true)));
454        assert_eq!(extract("µµABC"), Some(Capture("ABC".into(), false)));
455        assert_eq!(extract("µµµABC"), Some(MultiCapture("ABC".into())));
456        assert_eq!(extract("µ_"), Some(Dropped(true)));
457        assert_eq!(extract("abc"), None);
458        assert_eq!(extract("µabc"), None);
459    }
460
461    #[test]
462    fn test_match_constraints() {
463        assert!(match_constraints("a + b", "a + b"));
464    }
465
466    #[test]
467    fn test_match_not_constraints() {
468        assert!(!match_constraints("a - b", "a + b"));
469    }
470
471    #[test]
472    fn test_multi_var_match() {
473        let grep = Tsx.ast_grep("if (true) { a += 1; b += 1 } else { a += 1; b += 1 }");
474        let node = grep.root();
475        let found = node.find("if (true) { $$$A } else { $$$A }");
476        assert!(found.is_some());
477        let grep = Tsx.ast_grep("if (true) { a += 1 } else { b += 1 }");
478        let node = grep.root();
479        let not_found = node.find("if (true) { $$$A } else { $$$A }");
480        assert!(not_found.is_none());
481    }
482
483    #[test]
484    fn test_multi_var_match_with_trailing() {
485        let grep = Tsx.ast_grep("if (true) { a += 1; } else { a += 1; b += 1 }");
486        let node = grep.root();
487        let not_found = node.find("if (true) { $$$A } else { $$$A }");
488        assert!(not_found.is_none());
489        let grep = Tsx.ast_grep("if (true) { a += 1; b += 1; } else { a += 1 }");
490        let node = grep.root();
491        let not_found = node.find("if (true) { $$$A } else { $$$A }");
492        assert!(not_found.is_none());
493    }
494}