ast_grep_core/language.rs
1use crate::meta_var::{extract_meta_var, MetaVariable};
2use crate::{AstGrep, Doc, Node, StrDoc};
3use std::borrow::Cow;
4use std::collections::HashMap;
5use std::path::Path;
6pub use tree_sitter::Language as TSLanguage;
7pub use tree_sitter::{Point as TSPoint, Range as TSRange};
8
9/// Trait to abstract ts-language usage in ast-grep, which includes:
10/// * which character is used for meta variable.
11/// * if we need to use other char in meta var for parser at runtime
12/// * pre process the Pattern code.
13pub trait Language: Clone {
14 /// Return the file language from path. Return None if the file type is not supported.
15 fn from_path<P: AsRef<Path>>(_path: P) -> Option<Self> {
16 // TODO: throw panic here if not implemented properly?
17 None
18 }
19
20 /// Create an [`AstGrep`] instance for the language
21 fn ast_grep<S: AsRef<str>>(&self, source: S) -> AstGrep<StrDoc<Self>> {
22 AstGrep::new(source, self.clone())
23 }
24
25 /// tree sitter language to parse the source
26 fn get_ts_language(&self) -> TSLanguage;
27 /// ignore trivial tokens in language matching
28 fn skippable_kind_ids(&self) -> &'static [u16] {
29 &[]
30 }
31
32 /// normalize pattern code before matching
33 /// e.g. remove expression_statement, or prefer parsing {} to object over block
34 fn pre_process_pattern<'q>(&self, query: &'q str) -> Cow<'q, str> {
35 Cow::Borrowed(query)
36 }
37
38 /// Configure meta variable special character
39 /// By default $ is the metavar char, but in PHP it can be #
40 #[inline]
41 fn meta_var_char(&self) -> char {
42 '$'
43 }
44
45 /// Some language does not accept $ as the leading char for identifiers.
46 /// We need to change $ to other char at run-time to make parser happy, thus the name expando.
47 /// By default this is the same as meta_var char so replacement is done at runtime.
48 #[inline]
49 fn expando_char(&self) -> char {
50 self.meta_var_char()
51 }
52
53 /// extract MetaVariable from a given source string
54 /// At runtime we need to use expand_char
55 fn extract_meta_var(&self, source: &str) -> Option<MetaVariable> {
56 extract_meta_var(source, self.expando_char())
57 }
58
59 fn injectable_languages(&self) -> Option<&'static [&'static str]> {
60 None
61 }
62
63 /// get injected language regions in the root document. e.g. get JavaScripts in HTML
64 /// it will return a list of tuples of (language, regions).
65 /// The first item is the embedded region language, e.g. javascript
66 /// The second item is a list of regions in tree_sitter.
67 /// also see https://tree-sitter.github.io/tree-sitter/using-parsers#multi-language-documents
68 fn extract_injections<D: Doc>(&self, _root: Node<D>) -> HashMap<String, Vec<TSRange>> {
69 HashMap::new()
70 }
71}
72
73impl Language for TSLanguage {
74 fn get_ts_language(&self) -> TSLanguage {
75 self.clone()
76 }
77}
78
79#[cfg(test)]
80pub use test::*;
81
82#[cfg(test)]
83mod test {
84 use super::*;
85 #[derive(Clone)]
86 pub struct Tsx;
87 impl Language for Tsx {
88 fn get_ts_language(&self) -> TSLanguage {
89 tree_sitter_typescript::language_tsx().into()
90 }
91 }
92}