1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
//! Language Adapter Layer
//!
//! Maps tree-sitter node kinds from each language grammar to generic
//! semantic concepts (declaration, assignment, branch, loop, call, etc.).
//!
//! This module provides a unified abstraction over different programming languages,
//! allowing analysis rules to work with generic concepts rather than language-specific
//! node kinds. Each language module defines a static `LanguageSemantics` instance
//! that maps tree-sitter node kinds to these generic concepts.
use rma_common::Language;
pub mod bash;
pub mod csharp;
pub mod elixir;
pub mod go;
pub mod java;
pub mod javascript;
pub mod kotlin;
pub mod ocaml;
pub mod php;
pub mod python;
pub mod rust_lang;
pub mod scala;
pub mod solidity;
pub mod swift;
/// Generic semantic concepts mapped from tree-sitter node kinds.
///
/// This struct provides a language-agnostic view of code structure,
/// mapping each language's specific syntax to common programming concepts.
#[derive(Debug, Clone)]
pub struct LanguageSemantics {
/// Language name (lowercase)
pub language: &'static str,
// =========================================================================
// Node kinds that represent the same concept across languages
// =========================================================================
/// Function/method definition node kinds
/// Examples: "function_declaration", "fn_item", "function_definition"
pub function_def_kinds: &'static [&'static str],
/// Conditional branch node kinds
/// Examples: "if_statement", "if_expression", "if_let_expression"
pub if_kinds: &'static [&'static str],
/// Loop construct node kinds
/// Examples: "for_statement", "while_statement", "loop_expression"
pub loop_kinds: &'static [&'static str],
/// Variable declaration node kinds
/// Examples: "variable_declaration", "let_declaration", "var_spec"
pub variable_declaration_kinds: &'static [&'static str],
/// Simple assignment node kinds
/// Examples: "assignment_expression", "assignment_statement"
pub assignment_kinds: &'static [&'static str],
/// Augmented assignment node kinds (+=, -=, etc.)
/// Examples: "augmented_assignment_expression", "compound_assignment_expr"
pub augmented_assignment_kinds: &'static [&'static str],
/// Return statement node kinds
/// Examples: "return_statement", "return_expression"
pub return_kinds: &'static [&'static str],
/// Function/method call node kinds
/// Examples: "call_expression", "method_invocation"
pub call_kinds: &'static [&'static str],
/// Try/catch/finally construct node kinds
/// Examples: "try_statement", "try_expression"
pub try_catch_kinds: &'static [&'static str],
/// Throw/raise node kinds
/// Examples: "throw_statement", "raise_statement"
pub throw_kinds: &'static [&'static str],
/// String literal node kinds
/// Examples: "string", "string_literal", "template_string"
pub string_literal_kinds: &'static [&'static str],
/// Numeric literal node kinds
/// Examples: "number", "integer_literal", "float_literal"
pub numeric_literal_kinds: &'static [&'static str],
/// Boolean literal node kinds
/// Examples: "true", "false", "boolean"
pub boolean_literal_kinds: &'static [&'static str],
/// Null/nil/None literal node kinds
/// Examples: "null", "nil", "None"
pub null_literal_kinds: &'static [&'static str],
/// Parameter definition node kinds
/// Examples: "formal_parameters", "parameter", "required_parameter"
pub parameter_kinds: &'static [&'static str],
/// Class/struct definition node kinds
/// Examples: "class_declaration", "struct_item", "type_spec"
pub class_kinds: &'static [&'static str],
/// Import/use statement node kinds
/// Examples: "import_declaration", "use_declaration", "import_statement"
pub import_kinds: &'static [&'static str],
/// Block scope node kinds (introduces a new scope)
/// Examples: "block", "statement_block", "compound_statement"
pub block_scope_kinds: &'static [&'static str],
/// Break statement node kinds
/// Examples: "break_statement", "break_expression"
pub break_kinds: &'static [&'static str],
/// Continue statement node kinds
/// Examples: "continue_statement", "continue_expression"
pub continue_kinds: &'static [&'static str],
/// Switch/match statement node kinds
/// Examples: "switch_statement", "match_expression"
pub switch_kinds: &'static [&'static str],
/// Case/arm node kinds in switch/match
/// Examples: "case_clause", "switch_case", "match_arm"
pub case_kinds: &'static [&'static str],
/// Member/property access node kinds
/// Examples: "member_expression", "field_expression", "selector_expression"
pub member_access_kinds: &'static [&'static str],
/// Binary expression node kinds
/// Examples: "binary_expression", "binary_operator"
pub binary_expression_kinds: &'static [&'static str],
/// Identifier node kinds
/// Examples: "identifier", "name"
pub identifier_kinds: &'static [&'static str],
/// Unsafe block node kinds (language-specific safety boundaries)
/// Examples: "unsafe_block" (Rust)
pub unsafe_block_kinds: &'static [&'static str],
/// Deferred execution node kinds
/// Examples: "defer_statement" (Go)
pub defer_kinds: &'static [&'static str],
/// Spawn/async task creation node kinds
/// Examples: "spawn_expression", "go_statement"
pub spawn_kinds: &'static [&'static str],
// =========================================================================
// Tree-sitter field names for accessing child nodes
// =========================================================================
/// Field name for condition in if/while/for statements
pub condition_field: &'static str,
/// Field name for the "then" branch
pub consequence_field: &'static str,
/// Field name for the "else" branch
pub alternative_field: &'static str,
/// Field name for function/loop body
pub body_field: &'static str,
/// Field name for variable initializer
pub initializer_field: &'static str,
/// Field name for left side of binary/assignment expressions
pub left_field: &'static str,
/// Field name for right side of binary/assignment expressions
pub right_field: &'static str,
/// Field name for names (function name, variable name, etc.)
pub name_field: &'static str,
/// Field name for function call arguments
pub arguments_field: &'static str,
/// Field name for values (return value, etc.)
pub value_field: &'static str,
/// Field name for operators
pub operator_field: &'static str,
/// Field name for object in member access
pub object_field: &'static str,
/// Field name for property in member access
pub property_field: &'static str,
/// Field name for function in call expression
pub function_field: &'static str,
/// Field name for function parameters
pub parameters_field: &'static str,
/// Field name for return type annotation
pub return_type_field: &'static str,
/// Field name for type annotation
pub type_field: &'static str,
/// Field name for exception handler in try/catch
pub handler_field: &'static str,
/// Field name for finalizer (finally block)
pub finalizer_field: &'static str,
}
impl LanguageSemantics {
/// Get the semantic mapping for a specific language
pub fn for_language(language: Language) -> &'static LanguageSemantics {
match language {
Language::JavaScript | Language::TypeScript => &javascript::JAVASCRIPT_SEMANTICS,
Language::Rust => &rust_lang::RUST_SEMANTICS,
Language::Go => &go::GO_SEMANTICS,
Language::Python => &python::PYTHON_SEMANTICS,
Language::Java => &java::JAVA_SEMANTICS,
Language::Php => &php::PHP_SEMANTICS,
Language::CSharp => &csharp::CSHARP_SEMANTICS,
Language::Kotlin => &kotlin::KOTLIN_SEMANTICS,
Language::Scala => &scala::SCALA_SEMANTICS,
Language::Swift => &swift::SWIFT_SEMANTICS,
Language::Bash => &bash::BASH_SEMANTICS,
Language::Elixir => &elixir::ELIXIR_SEMANTICS,
Language::Solidity => &solidity::SOLIDITY_SEMANTICS,
Language::OCaml => &ocaml::OCAML_SEMANTICS,
// Fallback to JavaScript semantics for other languages
_ => &javascript::JAVASCRIPT_SEMANTICS,
}
}
/// Convert the language string to a Language enum
pub fn language_enum(&self) -> Language {
match self.language {
"javascript" => Language::JavaScript,
"typescript" => Language::TypeScript,
"rust" => Language::Rust,
"go" => Language::Go,
"python" => Language::Python,
"java" => Language::Java,
"php" => Language::Php,
"csharp" => Language::CSharp,
"kotlin" => Language::Kotlin,
"scala" => Language::Scala,
"swift" => Language::Swift,
"bash" => Language::Bash,
"elixir" => Language::Elixir,
"solidity" => Language::Solidity,
"ocaml" => Language::OCaml,
_ => Language::Unknown,
}
}
// =========================================================================
// Helper methods to check if a node kind represents a specific concept
// =========================================================================
/// Check if a node kind represents a function definition
pub fn is_function_def(&self, kind: &str) -> bool {
self.function_def_kinds.contains(&kind)
}
/// Check if a node kind represents a conditional branch (if)
pub fn is_if(&self, kind: &str) -> bool {
self.if_kinds.contains(&kind)
}
/// Check if a node kind represents a loop construct
pub fn is_loop(&self, kind: &str) -> bool {
self.loop_kinds.contains(&kind)
}
/// Check if a node kind represents a variable declaration
pub fn is_variable_declaration(&self, kind: &str) -> bool {
self.variable_declaration_kinds.contains(&kind)
}
/// Check if a node kind represents an assignment
pub fn is_assignment(&self, kind: &str) -> bool {
self.assignment_kinds.contains(&kind)
}
/// Check if a node kind represents an augmented assignment (+=, etc.)
pub fn is_augmented_assignment(&self, kind: &str) -> bool {
self.augmented_assignment_kinds.contains(&kind)
}
/// Check if a node kind represents a return statement
pub fn is_return(&self, kind: &str) -> bool {
self.return_kinds.contains(&kind)
}
/// Check if a node kind represents a function/method call
pub fn is_call(&self, kind: &str) -> bool {
self.call_kinds.contains(&kind)
}
/// Check if a node kind represents a try/catch construct
pub fn is_try_catch(&self, kind: &str) -> bool {
self.try_catch_kinds.contains(&kind)
}
/// Check if a node kind represents a throw/raise statement
pub fn is_throw(&self, kind: &str) -> bool {
self.throw_kinds.contains(&kind)
}
/// Check if a node kind represents a string literal
pub fn is_string_literal(&self, kind: &str) -> bool {
self.string_literal_kinds.contains(&kind)
}
/// Check if a node kind represents a numeric literal
pub fn is_numeric_literal(&self, kind: &str) -> bool {
self.numeric_literal_kinds.contains(&kind)
}
/// Check if a node kind represents a boolean literal
pub fn is_boolean_literal(&self, kind: &str) -> bool {
self.boolean_literal_kinds.contains(&kind)
}
/// Check if a node kind represents a null/nil literal
pub fn is_null_literal(&self, kind: &str) -> bool {
self.null_literal_kinds.contains(&kind)
}
/// Check if a node kind represents any literal value
pub fn is_literal(&self, kind: &str) -> bool {
self.is_string_literal(kind)
|| self.is_numeric_literal(kind)
|| self.is_boolean_literal(kind)
|| self.is_null_literal(kind)
}
/// Check if a node kind represents a parameter definition
pub fn is_parameter(&self, kind: &str) -> bool {
self.parameter_kinds.contains(&kind)
}
/// Check if a node kind represents a class/struct definition
pub fn is_class(&self, kind: &str) -> bool {
self.class_kinds.contains(&kind)
}
/// Check if a node kind represents an import statement
pub fn is_import(&self, kind: &str) -> bool {
self.import_kinds.contains(&kind)
}
/// Check if a node kind represents a block scope
pub fn is_block_scope(&self, kind: &str) -> bool {
self.block_scope_kinds.contains(&kind)
}
/// Check if a node kind represents a break statement
pub fn is_break(&self, kind: &str) -> bool {
self.break_kinds.contains(&kind)
}
/// Check if a node kind represents a continue statement
pub fn is_continue(&self, kind: &str) -> bool {
self.continue_kinds.contains(&kind)
}
/// Check if a node kind represents a switch/match statement
pub fn is_switch(&self, kind: &str) -> bool {
self.switch_kinds.contains(&kind)
}
/// Check if a node kind represents a case/arm in a switch/match
pub fn is_case(&self, kind: &str) -> bool {
self.case_kinds.contains(&kind)
}
/// Check if a node kind represents a member/property access
pub fn is_member_access(&self, kind: &str) -> bool {
self.member_access_kinds.contains(&kind)
}
/// Check if a node kind represents a binary expression
pub fn is_binary_expression(&self, kind: &str) -> bool {
self.binary_expression_kinds.contains(&kind)
}
/// Check if a node kind represents an identifier
pub fn is_identifier(&self, kind: &str) -> bool {
self.identifier_kinds.contains(&kind)
}
/// Check if a node kind represents an unsafe block
pub fn is_unsafe_block(&self, kind: &str) -> bool {
self.unsafe_block_kinds.contains(&kind)
}
/// Check if a node kind represents a defer statement
pub fn is_defer(&self, kind: &str) -> bool {
self.defer_kinds.contains(&kind)
}
/// Check if a node kind represents a spawn/go statement
pub fn is_spawn(&self, kind: &str) -> bool {
self.spawn_kinds.contains(&kind)
}
/// Check if a node kind represents any control flow statement
pub fn is_control_flow(&self, kind: &str) -> bool {
self.is_if(kind)
|| self.is_loop(kind)
|| self.is_switch(kind)
|| self.is_try_catch(kind)
|| self.is_return(kind)
|| self.is_break(kind)
|| self.is_continue(kind)
|| self.is_throw(kind)
}
/// Check if a node kind represents any branching construct
pub fn is_branch(&self, kind: &str) -> bool {
self.is_if(kind) || self.is_switch(kind) || self.is_try_catch(kind)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_language_semantics_lookup() {
let js_semantics = LanguageSemantics::for_language(Language::JavaScript);
assert_eq!(js_semantics.language, "javascript");
let rust_semantics = LanguageSemantics::for_language(Language::Rust);
assert_eq!(rust_semantics.language, "rust");
let go_semantics = LanguageSemantics::for_language(Language::Go);
assert_eq!(go_semantics.language, "go");
let python_semantics = LanguageSemantics::for_language(Language::Python);
assert_eq!(python_semantics.language, "python");
let java_semantics = LanguageSemantics::for_language(Language::Java);
assert_eq!(java_semantics.language, "java");
}
#[test]
fn test_javascript_function_detection() {
let semantics = LanguageSemantics::for_language(Language::JavaScript);
assert!(semantics.is_function_def("function_declaration"));
assert!(semantics.is_function_def("arrow_function"));
assert!(!semantics.is_function_def("call_expression"));
}
#[test]
fn test_rust_unsafe_detection() {
let semantics = LanguageSemantics::for_language(Language::Rust);
assert!(semantics.is_unsafe_block("unsafe_block"));
assert!(!semantics.is_unsafe_block("block"));
}
#[test]
fn test_control_flow_detection() {
let semantics = LanguageSemantics::for_language(Language::JavaScript);
assert!(semantics.is_control_flow("if_statement"));
assert!(semantics.is_control_flow("for_statement"));
assert!(semantics.is_control_flow("return_statement"));
assert!(!semantics.is_control_flow("call_expression"));
}
}