Skip to main content

rma_parser/
languages.rs

1//! Language support module - provides tree-sitter grammars for 30+ languages
2//!
3//! This module provides maximum language coverage with tree-sitter grammars,
4//! optimized for fast parsing and security analysis.
5
6use anyhow::Result;
7use rma_common::{Language, RmaError};
8use tree_sitter::Language as TsLanguage;
9
10/// Get the tree-sitter language for a given language enum
11///
12/// Performance: Uses static references to avoid repeated allocations
13#[inline]
14pub fn get_language(lang: Language) -> Result<TsLanguage> {
15    match lang {
16        // Systems languages
17        Language::Rust => Ok(tree_sitter_rust::LANGUAGE.into()),
18        Language::C => Ok(tree_sitter_c::LANGUAGE.into()),
19        Language::Cpp => Ok(tree_sitter_cpp::LANGUAGE.into()),
20        Language::Zig => {
21            Err(RmaError::UnsupportedLanguage("zig - grammar not yet available".into()).into())
22        }
23
24        // JVM languages
25        Language::Java => Ok(tree_sitter_java::LANGUAGE.into()),
26        Language::Kotlin => Ok(tree_sitter_kotlin::LANGUAGE.into()),
27        Language::Scala => Ok(tree_sitter_scala::LANGUAGE.into()),
28
29        // Web languages
30        Language::JavaScript => Ok(tree_sitter_javascript::LANGUAGE.into()),
31        Language::TypeScript => Ok(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
32        Language::Html => Ok(tree_sitter_html::LANGUAGE.into()),
33        Language::Css => Ok(tree_sitter_css::LANGUAGE.into()),
34        Language::Scss => Ok(tree_sitter_css::LANGUAGE.into()), // Reuse CSS grammar for SCSS
35        Language::Vue => {
36            Err(RmaError::UnsupportedLanguage("vue - grammar not yet available".into()).into())
37        }
38        Language::Svelte => {
39            Err(RmaError::UnsupportedLanguage("svelte - grammar not yet available".into()).into())
40        }
41
42        // Scripting languages
43        Language::Python => Ok(tree_sitter_python::LANGUAGE.into()),
44        Language::Ruby => Ok(tree_sitter_ruby::LANGUAGE.into()),
45        Language::Php => Ok(tree_sitter_php::LANGUAGE_PHP.into()),
46        Language::Lua => Ok(tree_sitter_lua::LANGUAGE.into()),
47        Language::Perl => {
48            Err(RmaError::UnsupportedLanguage("perl - grammar not yet available".into()).into())
49        }
50
51        // Functional languages
52        Language::Haskell => Ok(tree_sitter_haskell::LANGUAGE.into()),
53        Language::OCaml => Ok(tree_sitter_ocaml::LANGUAGE_OCAML.into()),
54        Language::Elixir => Ok(tree_sitter_elixir::LANGUAGE.into()),
55        Language::Erlang => {
56            Err(RmaError::UnsupportedLanguage("erlang - grammar not yet available".into()).into())
57        }
58
59        // Other compiled languages
60        Language::Go => Ok(tree_sitter_go::LANGUAGE.into()),
61        Language::Swift => Ok(tree_sitter_swift::LANGUAGE.into()),
62        Language::CSharp => Ok(tree_sitter_c_sharp::LANGUAGE.into()),
63        Language::Dart => {
64            Err(RmaError::UnsupportedLanguage("dart - grammar not yet available".into()).into())
65        }
66
67        // Data/Config languages
68        Language::Json => Ok(tree_sitter_json::LANGUAGE.into()),
69        Language::Yaml => Ok(tree_sitter_yaml::LANGUAGE.into()),
70        Language::Toml => Ok(tree_sitter_toml::LANGUAGE.into()),
71        Language::Sql => Err(RmaError::UnsupportedLanguage(
72            "sql - grammar incompatible with tree-sitter 0.24".into(),
73        )
74        .into()),
75        Language::GraphQL => {
76            Err(RmaError::UnsupportedLanguage("graphql - grammar not yet available".into()).into())
77        }
78
79        // Infrastructure
80        Language::Bash => Ok(tree_sitter_bash::LANGUAGE.into()),
81        Language::Dockerfile => Err(RmaError::UnsupportedLanguage(
82            "dockerfile - grammar incompatible with tree-sitter 0.24".into(),
83        )
84        .into()),
85        Language::Hcl => Ok(tree_sitter_hcl::LANGUAGE.into()),
86        Language::Nix => {
87            Err(RmaError::UnsupportedLanguage("nix - grammar not yet available".into()).into())
88        }
89
90        // Markup
91        Language::Markdown => Ok(tree_sitter_markdown::LANGUAGE.into()),
92        Language::Latex => {
93            Err(RmaError::UnsupportedLanguage("latex - grammar not yet available".into()).into())
94        }
95
96        // Other
97        Language::Solidity => Ok(tree_sitter_solidity::LANGUAGE.into()),
98        Language::Wasm => {
99            Err(RmaError::UnsupportedLanguage("wasm - grammar not yet available".into()).into())
100        }
101        Language::Protobuf => Err(RmaError::UnsupportedLanguage(
102            "protobuf - grammar incompatible with tree-sitter 0.24".into(),
103        )
104        .into()),
105
106        Language::Unknown => Err(RmaError::UnsupportedLanguage("unknown".into()).into()),
107    }
108}
109
110/// Check if a language has tree-sitter support
111#[inline]
112pub fn has_grammar(lang: Language) -> bool {
113    get_language(lang).is_ok()
114}
115
116/// Get all languages with tree-sitter support
117pub fn supported_languages() -> Vec<Language> {
118    vec![
119        Language::Rust,
120        Language::C,
121        Language::Cpp,
122        Language::Java,
123        Language::Kotlin,
124        Language::Scala,
125        Language::JavaScript,
126        Language::TypeScript,
127        Language::Html,
128        Language::Css,
129        Language::Python,
130        Language::Ruby,
131        Language::Php,
132        Language::Lua,
133        Language::Haskell,
134        Language::OCaml,
135        Language::Elixir,
136        Language::Go,
137        Language::Swift,
138        Language::CSharp,
139        Language::Json,
140        Language::Yaml,
141        Language::Toml,
142        // Language::Sql disabled - no compatible crate for tree-sitter 0.24
143        Language::Bash,
144        // Language::Dockerfile disabled - no compatible crate for tree-sitter 0.24
145        Language::Hcl,
146        Language::Markdown,
147        Language::Solidity,
148        // Language::Protobuf disabled - no compatible crate for tree-sitter 0.24
149    ]
150}
151
152/// Get query patterns for common constructs in each language
153pub mod queries {
154    use rma_common::Language;
155
156    /// Function definition query for each language
157    pub fn function_query(lang: Language) -> Option<&'static str> {
158        match lang {
159            Language::Rust => Some(
160                r#"
161                (function_item name: (identifier) @name) @function
162                (impl_item (function_item name: (identifier) @name)) @method
163                "#,
164            ),
165            Language::C | Language::Cpp => Some(
166                r#"
167                (function_definition declarator: (function_declarator declarator: (identifier) @name)) @function
168                "#,
169            ),
170            Language::JavaScript | Language::TypeScript => Some(
171                r#"
172                (function_declaration name: (identifier) @name) @function
173                (method_definition name: (property_identifier) @name) @method
174                (arrow_function) @arrow
175                "#,
176            ),
177            Language::Python => Some(
178                r#"
179                (function_definition name: (identifier) @name) @function
180                (class_definition body: (block (function_definition name: (identifier) @name))) @method
181                "#,
182            ),
183            Language::Go => Some(
184                r#"
185                (function_declaration name: (identifier) @name) @function
186                (method_declaration name: (field_identifier) @name) @method
187                "#,
188            ),
189            Language::Java | Language::Kotlin | Language::Scala => Some(
190                r#"
191                (method_declaration name: (identifier) @name) @method
192                (constructor_declaration name: (identifier) @name) @constructor
193                "#,
194            ),
195            Language::Ruby => Some(
196                r#"
197                (method name: (identifier) @name) @method
198                "#,
199            ),
200            Language::Php => Some(
201                r#"
202                (function_definition name: (name) @name) @function
203                (method_declaration name: (name) @name) @method
204                "#,
205            ),
206            Language::Swift => Some(
207                r#"
208                (function_declaration name: (simple_identifier) @name) @function
209                "#,
210            ),
211            Language::CSharp => Some(
212                r#"
213                (method_declaration name: (identifier) @name) @method
214                "#,
215            ),
216            Language::Haskell => Some(
217                r#"
218                (function name: (variable) @name) @function
219                "#,
220            ),
221            Language::Elixir => Some(
222                r#"
223                (call target: (identifier) @keyword arguments: (arguments (identifier) @name)) @function
224                "#,
225            ),
226            Language::Lua => Some(
227                r#"
228                (function_declaration name: (identifier) @name) @function
229                "#,
230            ),
231            Language::Bash => Some(
232                r#"
233                (function_definition name: (word) @name) @function
234                "#,
235            ),
236            Language::Solidity => Some(
237                r#"
238                (function_definition name: (identifier) @name) @function
239                "#,
240            ),
241            _ => None,
242        }
243    }
244
245    /// Class/struct definition query for each language
246    pub fn class_query(lang: Language) -> Option<&'static str> {
247        match lang {
248            Language::Rust => Some(
249                r#"
250                (struct_item name: (type_identifier) @name) @struct
251                (enum_item name: (type_identifier) @name) @enum
252                (impl_item type: (type_identifier) @name) @impl
253                "#,
254            ),
255            Language::C | Language::Cpp => Some(
256                r#"
257                (struct_specifier name: (type_identifier) @name) @struct
258                (class_specifier name: (type_identifier) @name) @class
259                "#,
260            ),
261            Language::JavaScript | Language::TypeScript => Some(
262                r#"
263                (class_declaration name: (identifier) @name) @class
264                "#,
265            ),
266            Language::Python => Some(
267                r#"
268                (class_definition name: (identifier) @name) @class
269                "#,
270            ),
271            Language::Go => Some(
272                r#"
273                (type_declaration (type_spec name: (type_identifier) @name)) @type
274                "#,
275            ),
276            Language::Java | Language::Kotlin | Language::Scala => Some(
277                r#"
278                (class_declaration name: (identifier) @name) @class
279                (interface_declaration name: (identifier) @name) @interface
280                "#,
281            ),
282            Language::Ruby => Some(
283                r#"
284                (class name: (constant) @name) @class
285                (module name: (constant) @name) @module
286                "#,
287            ),
288            Language::Php => Some(
289                r#"
290                (class_declaration name: (name) @name) @class
291                (interface_declaration name: (name) @name) @interface
292                "#,
293            ),
294            Language::Swift => Some(
295                r#"
296                (class_declaration name: (type_identifier) @name) @class
297                (struct_declaration name: (type_identifier) @name) @struct
298                "#,
299            ),
300            Language::CSharp => Some(
301                r#"
302                (class_declaration name: (identifier) @name) @class
303                (interface_declaration name: (identifier) @name) @interface
304                "#,
305            ),
306            Language::Solidity => Some(
307                r#"
308                (contract_declaration name: (identifier) @name) @contract
309                "#,
310            ),
311            _ => None,
312        }
313    }
314
315    /// Import/use statement query for each language
316    pub fn import_query(lang: Language) -> Option<&'static str> {
317        match lang {
318            Language::Rust => Some(
319                r#"
320                (use_declaration) @import
321                (extern_crate_declaration) @import
322                "#,
323            ),
324            Language::C | Language::Cpp => Some(
325                r#"
326                (preproc_include) @import
327                "#,
328            ),
329            Language::JavaScript | Language::TypeScript => Some(
330                r#"
331                (import_statement) @import
332                (import_clause) @import
333                "#,
334            ),
335            Language::Python => Some(
336                r#"
337                (import_statement) @import
338                (import_from_statement) @import
339                "#,
340            ),
341            Language::Go => Some(
342                r#"
343                (import_declaration) @import
344                "#,
345            ),
346            Language::Java | Language::Kotlin | Language::Scala => Some(
347                r#"
348                (import_declaration) @import
349                "#,
350            ),
351            Language::Ruby => Some(
352                r#"
353                (call method: (identifier) @method (#match? @method "require|require_relative|include|extend")) @import
354                "#,
355            ),
356            Language::Php => Some(
357                r#"
358                (namespace_use_declaration) @import
359                "#,
360            ),
361            Language::Swift => Some(
362                r#"
363                (import_declaration) @import
364                "#,
365            ),
366            Language::CSharp => Some(
367                r#"
368                (using_directive) @import
369                "#,
370            ),
371            Language::Elixir => Some(
372                r#"
373                (call target: (identifier) @keyword (#match? @keyword "import|require|use|alias")) @import
374                "#,
375            ),
376            Language::Solidity => Some(
377                r#"
378                (import_directive) @import
379                "#,
380            ),
381            _ => None,
382        }
383    }
384
385    /// Call expression query for taint tracking
386    pub fn call_query(lang: Language) -> Option<&'static str> {
387        match lang {
388            Language::Rust => Some(
389                r#"
390                (call_expression function: (identifier) @callee) @call
391                (call_expression function: (field_expression field: (field_identifier) @callee)) @call
392                "#,
393            ),
394            Language::C | Language::Cpp => Some(
395                r#"
396                (call_expression function: (identifier) @callee) @call
397                "#,
398            ),
399            Language::JavaScript | Language::TypeScript => Some(
400                r#"
401                (call_expression function: (identifier) @callee) @call
402                (call_expression function: (member_expression property: (property_identifier) @callee)) @call
403                "#,
404            ),
405            Language::Python => Some(
406                r#"
407                (call function: (identifier) @callee) @call
408                (call function: (attribute attribute: (identifier) @callee)) @call
409                "#,
410            ),
411            Language::Go => Some(
412                r#"
413                (call_expression function: (identifier) @callee) @call
414                (call_expression function: (selector_expression field: (field_identifier) @callee)) @call
415                "#,
416            ),
417            Language::Java | Language::Kotlin => Some(
418                r#"
419                (method_invocation name: (identifier) @callee) @call
420                "#,
421            ),
422            Language::Ruby => Some(
423                r#"
424                (call method: (identifier) @callee) @call
425                "#,
426            ),
427            Language::Php => Some(
428                r#"
429                (function_call_expression function: (name) @callee) @call
430                (method_call_expression name: (name) @callee) @call
431                "#,
432            ),
433            Language::Swift => Some(
434                r#"
435                (call_expression (simple_identifier) @callee) @call
436                "#,
437            ),
438            _ => None,
439        }
440    }
441
442    /// Assignment expression query for taint tracking
443    pub fn assignment_query(lang: Language) -> Option<&'static str> {
444        match lang {
445            Language::Rust => Some(
446                r#"
447                (assignment_expression left: (identifier) @lhs) @assignment
448                (let_declaration pattern: (identifier) @lhs) @declaration
449                "#,
450            ),
451            Language::C | Language::Cpp => Some(
452                r#"
453                (assignment_expression left: (identifier) @lhs) @assignment
454                (declaration declarator: (init_declarator declarator: (identifier) @lhs)) @declaration
455                "#,
456            ),
457            Language::JavaScript | Language::TypeScript => Some(
458                r#"
459                (assignment_expression left: (identifier) @lhs) @assignment
460                (variable_declarator name: (identifier) @lhs) @declaration
461                "#,
462            ),
463            Language::Python => Some(
464                r#"
465                (assignment left: (identifier) @lhs) @assignment
466                "#,
467            ),
468            Language::Go => Some(
469                r#"
470                (assignment_statement left: (identifier) @lhs) @assignment
471                (short_var_declaration left: (expression_list (identifier) @lhs)) @declaration
472                "#,
473            ),
474            Language::Java | Language::Kotlin => Some(
475                r#"
476                (assignment_expression left: (identifier) @lhs) @assignment
477                (variable_declarator name: (identifier) @lhs) @declaration
478                "#,
479            ),
480            Language::Ruby => Some(
481                r#"
482                (assignment left: (identifier) @lhs) @assignment
483                "#,
484            ),
485            Language::Php => Some(
486                r#"
487                (assignment_expression left: (variable_name) @lhs) @assignment
488                "#,
489            ),
490            _ => None,
491        }
492    }
493}
494
495#[cfg(test)]
496mod tests {
497    use super::*;
498
499    #[test]
500    fn test_get_language() {
501        assert!(get_language(Language::Rust).is_ok());
502        assert!(get_language(Language::JavaScript).is_ok());
503        assert!(get_language(Language::Python).is_ok());
504        assert!(get_language(Language::Go).is_ok());
505        assert!(get_language(Language::Java).is_ok());
506        assert!(get_language(Language::C).is_ok());
507        assert!(get_language(Language::Cpp).is_ok());
508        assert!(get_language(Language::Ruby).is_ok());
509        assert!(get_language(Language::Php).is_ok());
510        assert!(get_language(Language::Unknown).is_err());
511    }
512
513    #[test]
514    fn test_supported_languages_count() {
515        let supported = supported_languages();
516        assert!(
517            supported.len() >= 25,
518            "Expected at least 25 supported languages"
519        );
520    }
521
522    #[test]
523    fn test_function_queries_exist() {
524        assert!(queries::function_query(Language::Rust).is_some());
525        assert!(queries::function_query(Language::JavaScript).is_some());
526        assert!(queries::function_query(Language::Python).is_some());
527        assert!(queries::function_query(Language::C).is_some());
528        assert!(queries::function_query(Language::Unknown).is_none());
529    }
530
531    #[test]
532    fn test_has_grammar() {
533        assert!(has_grammar(Language::Rust));
534        assert!(has_grammar(Language::Python));
535        assert!(has_grammar(Language::Go));
536        assert!(!has_grammar(Language::Unknown));
537    }
538}