ast_grep_core/matcher/
pattern.rs

1use crate::language::Language;
2use crate::match_tree::{match_end_non_recursive, match_node_non_recursive, MatchStrictness};
3use crate::matcher::{kind_utils, KindMatcher, KindMatcherError, Matcher};
4use crate::meta_var::{MetaVarEnv, MetaVariable};
5use crate::source::SgNode;
6use crate::{Doc, Node, Root};
7
8use bit_set::BitSet;
9use thiserror::Error;
10
11use std::borrow::Cow;
12use std::collections::HashSet;
13
14#[derive(Clone)]
15pub struct Pattern {
16  pub node: PatternNode,
17  root_kind: Option<u16>,
18  pub strictness: MatchStrictness,
19}
20
21pub struct PatternBuilder<'a> {
22  selector: Option<&'a str>,
23  src: Cow<'a, str>,
24}
25
26impl PatternBuilder<'_> {
27  pub fn build<D, F>(&self, parse: F) -> Result<Pattern, PatternError>
28  where
29    F: FnOnce(&str) -> Result<D, String>,
30    D: Doc,
31  {
32    let doc = parse(&self.src).map_err(PatternError::Parse)?;
33    let root = Root::doc(doc);
34    if let Some(selector) = self.selector {
35      self.contextual(&root, selector)
36    } else {
37      self.single(&root)
38    }
39  }
40  fn single<D: Doc>(&self, root: &Root<D>) -> Result<Pattern, PatternError> {
41    let goal = root.root();
42    if goal.children().len() == 0 {
43      return Err(PatternError::NoContent(self.src.to_string()));
44    }
45    if !is_single_node(&goal.inner) {
46      return Err(PatternError::MultipleNode(self.src.to_string()));
47    }
48    let node = Pattern::single_matcher(root);
49    Ok(Pattern::from(node))
50  }
51
52  fn contextual<D: Doc>(&self, root: &Root<D>, selector: &str) -> Result<Pattern, PatternError> {
53    let goal = root.root();
54    let kind_matcher = KindMatcher::try_new(selector, root.lang().clone())?;
55    let Some(node) = goal.find(&kind_matcher) else {
56      return Err(PatternError::NoSelectorInContext {
57        context: self.src.to_string(),
58        selector: selector.into(),
59      });
60    };
61    Ok(Pattern {
62      root_kind: Some(node.kind_id()),
63      node: convert_node_to_pattern(node.get_node().clone()),
64      strictness: MatchStrictness::Smart,
65    })
66  }
67}
68
69#[derive(Clone)]
70pub enum PatternNode {
71  MetaVar {
72    meta_var: MetaVariable,
73  },
74  /// Node without children.
75  Terminal {
76    text: String,
77    is_named: bool,
78    kind_id: u16,
79  },
80  /// Non-Terminal Syntax Nodes are called Internal
81  Internal {
82    kind_id: u16,
83    children: Vec<PatternNode>,
84  },
85}
86
87impl PatternNode {
88  // for skipping trivial nodes in goal after ellipsis
89  pub fn is_trivial(&self) -> bool {
90    match self {
91      PatternNode::Terminal { is_named, .. } => !*is_named,
92      _ => false,
93    }
94  }
95
96  pub fn fixed_string(&self) -> Cow<str> {
97    match &self {
98      PatternNode::Terminal { text, .. } => Cow::Borrowed(text),
99      PatternNode::MetaVar { .. } => Cow::Borrowed(""),
100      PatternNode::Internal { children, .. } => {
101        children
102          .iter()
103          .map(|n| n.fixed_string())
104          .fold(Cow::Borrowed(""), |longest, curr| {
105            if longest.len() >= curr.len() {
106              longest
107            } else {
108              curr
109            }
110          })
111      }
112    }
113  }
114}
115impl<'r, D: Doc> From<Node<'r, D>> for PatternNode {
116  fn from(node: Node<'r, D>) -> Self {
117    convert_node_to_pattern(node)
118  }
119}
120
121impl<'r, D: Doc> From<Node<'r, D>> for Pattern {
122  fn from(node: Node<'r, D>) -> Self {
123    Self {
124      node: convert_node_to_pattern(node),
125      root_kind: None,
126      strictness: MatchStrictness::Smart,
127    }
128  }
129}
130
131fn convert_node_to_pattern<D: Doc>(node: Node<'_, D>) -> PatternNode {
132  if let Some(meta_var) = extract_var_from_node(&node) {
133    PatternNode::MetaVar { meta_var }
134  } else if node.is_leaf() {
135    PatternNode::Terminal {
136      text: node.text().to_string(),
137      is_named: node.is_named(),
138      kind_id: node.kind_id(),
139    }
140  } else {
141    let children = node.children().filter_map(|n| {
142      if n.is_missing() {
143        None
144      } else {
145        Some(PatternNode::from(n))
146      }
147    });
148    PatternNode::Internal {
149      kind_id: node.kind_id(),
150      children: children.collect(),
151    }
152  }
153}
154
155fn extract_var_from_node<D: Doc>(goal: &Node<'_, D>) -> Option<MetaVariable> {
156  let key = goal.text();
157  goal.lang().extract_meta_var(&key)
158}
159
160#[derive(Debug, Error)]
161pub enum PatternError {
162  #[error("Fails to parse the pattern query: `{0}`")]
163  Parse(String),
164  #[error("No AST root is detected. Please check the pattern source `{0}`.")]
165  NoContent(String),
166  #[error("Multiple AST nodes are detected. Please check the pattern source `{0}`.")]
167  MultipleNode(String),
168  #[error(transparent)]
169  InvalidKind(#[from] KindMatcherError),
170  #[error("Fails to create Contextual pattern: selector `{selector}` matches no node in the context `{context}`.")]
171  NoSelectorInContext { context: String, selector: String },
172}
173
174#[inline]
175fn is_single_node<'r, N: SgNode<'r>>(n: &N) -> bool {
176  match n.children().len() {
177    1 => true,
178    2 => {
179      let c = n.child(1).expect("second child must exist");
180      // some language will have weird empty syntax node at the end
181      // see golang's `$A = 0` pattern test case
182      c.is_missing() || c.kind().is_empty()
183    }
184    _ => false,
185  }
186}
187impl Pattern {
188  pub fn has_error(&self) -> bool {
189    let kind = match &self.node {
190      PatternNode::Terminal { kind_id, .. } => *kind_id,
191      PatternNode::Internal { kind_id, .. } => *kind_id,
192      PatternNode::MetaVar { .. } => match self.root_kind {
193        Some(k) => k,
194        None => return false,
195      },
196    };
197    kind_utils::is_error_kind(kind)
198  }
199
200  pub fn fixed_string(&self) -> Cow<str> {
201    self.node.fixed_string()
202  }
203
204  /// Get all defined variables in the pattern.
205  /// Used for validating rules and report undefined variables.
206  pub fn defined_vars(&self) -> HashSet<&str> {
207    let mut vars = HashSet::new();
208    collect_vars(&self.node, &mut vars);
209    vars
210  }
211}
212
213fn meta_var_name(meta_var: &MetaVariable) -> Option<&str> {
214  use MetaVariable as MV;
215  match meta_var {
216    MV::Capture(name, _) => Some(name),
217    MV::MultiCapture(name) => Some(name),
218    MV::Dropped(_) => None,
219    MV::Multiple => None,
220  }
221}
222
223fn collect_vars<'p>(p: &'p PatternNode, vars: &mut HashSet<&'p str>) {
224  match p {
225    PatternNode::MetaVar { meta_var, .. } => {
226      if let Some(name) = meta_var_name(meta_var) {
227        vars.insert(name);
228      }
229    }
230    PatternNode::Terminal { .. } => {
231      // collect nothing for terminal nodes!
232    }
233    PatternNode::Internal { children, .. } => {
234      for c in children {
235        collect_vars(c, vars);
236      }
237    }
238  }
239}
240
241impl Pattern {
242  pub fn try_new<L: Language>(src: &str, lang: L) -> Result<Self, PatternError> {
243    let processed = lang.pre_process_pattern(src);
244    let builder = PatternBuilder {
245      selector: None,
246      src: processed,
247    };
248    lang.build_pattern(&builder)
249  }
250
251  pub fn new<L: Language>(src: &str, lang: L) -> Self {
252    Self::try_new(src, lang).unwrap()
253  }
254
255  pub fn with_strictness(mut self, strictness: MatchStrictness) -> Self {
256    self.strictness = strictness;
257    self
258  }
259
260  pub fn contextual<L: Language>(
261    context: &str,
262    selector: &str,
263    lang: L,
264  ) -> Result<Self, PatternError> {
265    let processed = lang.pre_process_pattern(context);
266    let builder = PatternBuilder {
267      selector: Some(selector),
268      src: processed,
269    };
270    lang.build_pattern(&builder)
271  }
272  fn single_matcher<D: Doc>(root: &Root<D>) -> Node<D> {
273    // debug_assert!(matches!(self.style, PatternStyle::Single));
274    let node = root.root();
275    let mut inner = node.inner;
276    while is_single_node(&inner) {
277      inner = inner.child(0).unwrap();
278    }
279    Node { inner, root }
280  }
281}
282
283impl Matcher for Pattern {
284  fn match_node_with_env<'tree, D: Doc>(
285    &self,
286    node: Node<'tree, D>,
287    env: &mut Cow<MetaVarEnv<'tree, D>>,
288  ) -> Option<Node<'tree, D>> {
289    if let Some(k) = self.root_kind {
290      if node.kind_id() != k {
291        return None;
292      }
293    }
294    // do not pollute the env if pattern does not match
295    let mut may_write = Cow::Borrowed(env.as_ref());
296    let node = match_node_non_recursive(self, node, &mut may_write)?;
297    if let Cow::Owned(map) = may_write {
298      // only change env when pattern matches
299      *env = Cow::Owned(map);
300    }
301    Some(node)
302  }
303
304  fn potential_kinds(&self) -> Option<bit_set::BitSet> {
305    let kind = match self.node {
306      PatternNode::Terminal { kind_id, .. } => kind_id,
307      PatternNode::MetaVar { .. } => self.root_kind?,
308      PatternNode::Internal { kind_id, .. } => {
309        if kind_utils::is_error_kind(kind_id) {
310          // error can match any kind
311          return None;
312        }
313        kind_id
314      }
315    };
316
317    let mut kinds = BitSet::new();
318    kinds.insert(kind.into());
319    Some(kinds)
320  }
321
322  fn get_match_len<D: Doc>(&self, node: Node<'_, D>) -> Option<usize> {
323    let start = node.range().start;
324    let end = match_end_non_recursive(self, node)?;
325    Some(end - start)
326  }
327}
328impl std::fmt::Debug for PatternNode {
329  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
330    match self {
331      Self::MetaVar { meta_var, .. } => write!(f, "{:?}", meta_var),
332      Self::Terminal { text, .. } => write!(f, "{}", text),
333      Self::Internal { children, .. } => write!(f, "{:?}", children),
334    }
335  }
336}
337
338impl std::fmt::Debug for Pattern {
339  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
340    write!(f, "{:?}", self.node)
341  }
342}
343
344#[cfg(test)]
345mod test {
346  use super::*;
347  use crate::language::Tsx;
348  use crate::matcher::MatcherExt;
349  use crate::meta_var::MetaVarEnv;
350  use crate::tree_sitter::StrDoc;
351  use std::collections::HashMap;
352
353  fn pattern_node(s: &str) -> Root<StrDoc<Tsx>> {
354    Root::str(s, Tsx)
355  }
356
357  fn test_match(s1: &str, s2: &str) {
358    let pattern = Pattern::new(s1, Tsx);
359    let cand = pattern_node(s2);
360    let cand = cand.root();
361    assert!(
362      pattern.find_node(cand.clone()).is_some(),
363      "goal: {:?}, candidate: {}",
364      pattern,
365      cand.get_inner_node().to_sexp(),
366    );
367  }
368  fn test_non_match(s1: &str, s2: &str) {
369    let pattern = Pattern::new(s1, Tsx);
370    let cand = pattern_node(s2);
371    let cand = cand.root();
372    assert!(
373      pattern.find_node(cand.clone()).is_none(),
374      "goal: {:?}, candidate: {}",
375      pattern,
376      cand.get_inner_node().to_sexp(),
377    );
378  }
379
380  #[test]
381  fn test_meta_variable() {
382    test_match("const a = $VALUE", "const a = 123");
383    test_match("const $VARIABLE = $VALUE", "const a = 123");
384    test_match("const $VARIABLE = $VALUE", "const a = 123");
385  }
386
387  #[test]
388  fn test_whitespace() {
389    test_match("function t() { }", "function t() {}");
390    test_match("function t() {}", "function t() {  }");
391  }
392
393  fn match_env(goal_str: &str, cand: &str) -> HashMap<String, String> {
394    let pattern = Pattern::new(goal_str, Tsx);
395    let cand = pattern_node(cand);
396    let cand = cand.root();
397    let nm = pattern.find_node(cand).unwrap();
398    HashMap::from(nm.get_env().clone())
399  }
400
401  #[test]
402  fn test_meta_variable_env() {
403    let env = match_env("const a = $VALUE", "const a = 123");
404    assert_eq!(env["VALUE"], "123");
405  }
406
407  #[test]
408  fn test_pattern_should_not_pollute_env() {
409    // gh issue #1164
410    let pattern = Pattern::new("const $A = 114", Tsx);
411    let cand = pattern_node("const a = 514");
412    let cand = cand.root().child(0).unwrap();
413    let map = MetaVarEnv::new();
414    let mut env = Cow::Borrowed(&map);
415    let nm = pattern.match_node_with_env(cand, &mut env);
416    assert!(nm.is_none());
417    assert!(env.get_match("A").is_none());
418    assert!(map.get_match("A").is_none());
419  }
420
421  #[test]
422  fn test_match_non_atomic() {
423    let env = match_env("const a = $VALUE", "const a = 5 + 3");
424    assert_eq!(env["VALUE"], "5 + 3");
425  }
426
427  #[test]
428  fn test_class_assignment() {
429    test_match("class $C { $MEMBER = $VAL}", "class A {a = 123}");
430    test_non_match("class $C { $MEMBER = $VAL; b = 123; }", "class A {a = 123}");
431    // test_match("a = 123", "class A {a = 123}");
432    test_non_match("a = 123", "class B {b = 123}");
433  }
434
435  #[test]
436  fn test_return() {
437    test_match("$A($B)", "return test(123)");
438  }
439
440  #[test]
441  fn test_contextual_pattern() {
442    let pattern =
443      Pattern::contextual("class A { $F = $I }", "public_field_definition", Tsx).expect("test");
444    let cand = pattern_node("class B { b = 123 }");
445    assert!(pattern.find_node(cand.root()).is_some());
446    let cand = pattern_node("let b = 123");
447    assert!(pattern.find_node(cand.root()).is_none());
448  }
449
450  #[test]
451  fn test_contextual_match_with_env() {
452    let pattern =
453      Pattern::contextual("class A { $F = $I }", "public_field_definition", Tsx).expect("test");
454    let cand = pattern_node("class B { b = 123 }");
455    let nm = pattern.find_node(cand.root()).expect("test");
456    let env = nm.get_env();
457    let env = HashMap::from(env.clone());
458    assert_eq!(env["F"], "b");
459    assert_eq!(env["I"], "123");
460  }
461
462  #[test]
463  fn test_contextual_unmatch_with_env() {
464    let pattern =
465      Pattern::contextual("class A { $F = $I }", "public_field_definition", Tsx).expect("test");
466    let cand = pattern_node("let b = 123");
467    let nm = pattern.find_node(cand.root());
468    assert!(nm.is_none());
469  }
470
471  fn get_kind(kind_str: &str) -> usize {
472    Tsx.kind_to_id(kind_str).into()
473  }
474
475  #[test]
476  fn test_pattern_potential_kinds() {
477    let pattern = Pattern::new("const a = 1", Tsx);
478    let kind = get_kind("lexical_declaration");
479    let kinds = pattern.potential_kinds().expect("should have kinds");
480    assert_eq!(kinds.len(), 1);
481    assert!(kinds.contains(kind));
482  }
483
484  #[test]
485  fn test_pattern_with_non_root_meta_var() {
486    let pattern = Pattern::new("const $A = $B", Tsx);
487    let kind = get_kind("lexical_declaration");
488    let kinds = pattern.potential_kinds().expect("should have kinds");
489    assert_eq!(kinds.len(), 1);
490    assert!(kinds.contains(kind));
491  }
492
493  #[test]
494  fn test_bare_wildcard() {
495    let pattern = Pattern::new("$A", Tsx);
496    // wildcard should match anything, so kinds should be None
497    assert!(pattern.potential_kinds().is_none());
498  }
499
500  #[test]
501  fn test_contextual_potential_kinds() {
502    let pattern =
503      Pattern::contextual("class A { $F = $I }", "public_field_definition", Tsx).expect("test");
504    let kind = get_kind("public_field_definition");
505    let kinds = pattern.potential_kinds().expect("should have kinds");
506    assert_eq!(kinds.len(), 1);
507    assert!(kinds.contains(kind));
508  }
509
510  #[test]
511  fn test_contextual_wildcard() {
512    let pattern = Pattern::contextual("class A { $F }", "property_identifier", Tsx).expect("test");
513    let kind = get_kind("property_identifier");
514    let kinds = pattern.potential_kinds().expect("should have kinds");
515    assert_eq!(kinds.len(), 1);
516    assert!(kinds.contains(kind));
517  }
518
519  #[test]
520  #[ignore]
521  fn test_multi_node_pattern() {
522    let pattern = Pattern::new("a;b;c;", Tsx);
523    let kinds = pattern.potential_kinds().expect("should have kinds");
524    assert_eq!(kinds.len(), 1);
525    test_match("a;b;c", "a;b;c;");
526  }
527
528  #[test]
529  #[ignore]
530  fn test_multi_node_meta_var() {
531    let env = match_env("a;$B;c", "a;b;c");
532    assert_eq!(env["B"], "b");
533    let env = match_env("a;$B;c", "a;1+2+3;c");
534    assert_eq!(env["B"], "1+2+3");
535  }
536
537  #[test]
538  #[ignore]
539  fn test_pattern_size() {
540    assert_eq!(std::mem::size_of::<Pattern>(), 40);
541  }
542
543  #[test]
544  fn test_error_kind() {
545    let ret = Pattern::contextual("a", "property_identifier", Tsx);
546    assert!(ret.is_err());
547    let ret = Pattern::new("123+", Tsx);
548    assert!(ret.has_error());
549  }
550
551  #[test]
552  fn test_bare_wildcard_in_context() {
553    let pattern = Pattern::contextual("class A { $F }", "property_identifier", Tsx).expect("test");
554    let cand = pattern_node("let b = 123");
555    // it should not match
556    assert!(pattern.find_node(cand.root()).is_none());
557  }
558
559  #[test]
560  fn test_pattern_fixed_string() {
561    let pattern = Pattern::new("class A { $F }", Tsx);
562    assert_eq!(pattern.fixed_string(), "class");
563    let pattern = Pattern::contextual("class A { $F }", "property_identifier", Tsx).expect("test");
564    assert!(pattern.fixed_string().is_empty());
565  }
566
567  #[test]
568  fn test_pattern_error() {
569    let pattern = Pattern::try_new("", Tsx);
570    assert!(matches!(pattern, Err(PatternError::NoContent(_))));
571    let pattern = Pattern::try_new("12  3344", Tsx);
572    assert!(matches!(pattern, Err(PatternError::MultipleNode(_))));
573  }
574
575  #[test]
576  fn test_debug_pattern() {
577    let pattern = Pattern::new("var $A = 1", Tsx);
578    assert_eq!(
579      format!("{pattern:?}"),
580      "[var, [Capture(\"A\", true), =, 1]]"
581    );
582  }
583
584  fn defined_vars(s: &str) -> Vec<String> {
585    let pattern = Pattern::new(s, Tsx);
586    let mut vars: Vec<_> = pattern
587      .defined_vars()
588      .into_iter()
589      .map(String::from)
590      .collect();
591    vars.sort();
592    vars
593  }
594
595  #[test]
596  fn test_extract_meta_var_from_pattern() {
597    let vars = defined_vars("var $A = 1");
598    assert_eq!(vars, ["A"]);
599  }
600
601  #[test]
602  fn test_extract_complex_meta_var() {
603    let vars = defined_vars("function $FUNC($$$ARGS): $RET { $$$BODY }");
604    assert_eq!(vars, ["ARGS", "BODY", "FUNC", "RET"]);
605  }
606
607  #[test]
608  fn test_extract_duplicate_meta_var() {
609    let vars = defined_vars("var $A = $A");
610    assert_eq!(vars, ["A"]);
611  }
612
613  #[test]
614  fn test_contextual_pattern_vars() {
615    let pattern = Pattern::contextual("<div ref={$A}/>", "jsx_attribute", Tsx).expect("correct");
616    assert_eq!(pattern.defined_vars(), ["A"].into_iter().collect());
617  }
618
619  #[test]
620  fn test_gh_1087() {
621    test_match("($P) => $F($P)", "(x) => bar(x)");
622  }
623}