Skip to main content

gobby_code/index/
languages.rs

1//! Language registry with tree-sitter query definitions.
2//! Ports 16 language specs from src/gobby/code_index/languages.py.
3
4use tree_sitter::Language;
5
6/// Specification for a single language's tree-sitter queries.
7pub struct LanguageSpec {
8    pub extensions: &'static [&'static str],
9    pub symbol_query: &'static str,
10    pub import_query: &'static str,
11    pub call_query: &'static str,
12}
13
14// ── Query Definitions ──────────────────────────────────────────────────
15
16const PYTHON: LanguageSpec = LanguageSpec {
17    extensions: &[".py", ".pyi"],
18    symbol_query: r#"
19        (function_definition name: (identifier) @name) @definition.function
20        (class_definition name: (identifier) @name) @definition.class
21    "#,
22    import_query: r#"
23        (import_statement) @import
24        (import_from_statement) @import
25    "#,
26    call_query: r#"
27        (call function: (identifier) @name) @call
28        (call function: (attribute attribute: (identifier) @name)) @call
29    "#,
30};
31
32const JAVASCRIPT: LanguageSpec = LanguageSpec {
33    extensions: &[".js", ".jsx", ".cjs", ".mjs"],
34    symbol_query: r#"
35        (function_declaration name: (identifier) @name) @definition.function
36        (class_declaration name: (identifier) @name) @definition.class
37        (method_definition name: (property_identifier) @name) @definition.method
38        (export_statement declaration: (function_declaration name: (identifier) @name)) @definition.function
39        (export_statement declaration: (class_declaration name: (identifier) @name)) @definition.class
40        (lexical_declaration (variable_declarator name: (identifier) @name value: (arrow_function))) @definition.function
41    "#,
42    import_query: r#"
43        (import_statement) @import
44    "#,
45    call_query: r#"
46        (call_expression function: (identifier) @name) @call
47        (call_expression function: (member_expression property: (property_identifier) @name)) @call
48    "#,
49};
50
51const TYPESCRIPT: LanguageSpec = LanguageSpec {
52    extensions: &[".ts", ".tsx"],
53    symbol_query: r#"
54        (function_declaration name: (identifier) @name) @definition.function
55        (class_declaration name: (type_identifier) @name) @definition.class
56        (method_definition name: (property_identifier) @name) @definition.method
57        (interface_declaration name: (type_identifier) @name) @definition.type
58        (type_alias_declaration name: (type_identifier) @name) @definition.type
59        (enum_declaration name: (identifier) @name) @definition.type
60        (lexical_declaration (variable_declarator name: (identifier) @name value: (arrow_function))) @definition.function
61        (export_statement declaration: (function_declaration name: (identifier) @name)) @definition.function
62        (export_statement declaration: (class_declaration name: (type_identifier) @name)) @definition.class
63        (export_statement declaration: (interface_declaration name: (type_identifier) @name)) @definition.type
64        (export_statement declaration: (type_alias_declaration name: (type_identifier) @name)) @definition.type
65        (export_statement declaration: (enum_declaration name: (identifier) @name)) @definition.type
66        (export_statement declaration: (lexical_declaration (variable_declarator name: (identifier) @name value: (arrow_function)))) @definition.function
67    "#,
68    import_query: r#"
69        (import_statement) @import
70    "#,
71    call_query: r#"
72        (call_expression function: (identifier) @name) @call
73        (call_expression function: (member_expression property: (property_identifier) @name)) @call
74    "#,
75};
76
77const GO: LanguageSpec = LanguageSpec {
78    extensions: &[".go"],
79    symbol_query: r#"
80        (function_declaration name: (identifier) @name) @definition.function
81        (method_declaration name: (field_identifier) @name) @definition.method
82        (type_declaration (type_spec name: (type_identifier) @name)) @definition.type
83    "#,
84    import_query: r#"
85        (import_declaration) @import
86    "#,
87    call_query: r#"
88        (call_expression function: (identifier) @name) @call
89        (call_expression function: (selector_expression field: (field_identifier) @name)) @call
90    "#,
91};
92
93const RUST: LanguageSpec = LanguageSpec {
94    extensions: &[".rs"],
95    symbol_query: r#"
96        (function_item name: (identifier) @name) @definition.function
97        (struct_item name: (type_identifier) @name) @definition.class
98        (enum_item name: (type_identifier) @name) @definition.type
99        (trait_item name: (type_identifier) @name) @definition.type
100        (impl_item type: (type_identifier) @name) @definition.class
101        (type_item name: (type_identifier) @name) @definition.type
102    "#,
103    import_query: r#"
104        (use_declaration) @import
105    "#,
106    call_query: r#"
107        (call_expression function: (identifier) @name) @call
108        (call_expression function: (scoped_identifier name: (identifier) @name)) @call
109        (call_expression function: (field_expression field: (field_identifier) @name)) @call
110    "#,
111};
112
113const JAVA: LanguageSpec = LanguageSpec {
114    extensions: &[".java"],
115    symbol_query: r#"
116        (method_declaration name: (identifier) @name) @definition.method
117        (class_declaration name: (identifier) @name) @definition.class
118        (interface_declaration name: (identifier) @name) @definition.type
119        (enum_declaration name: (identifier) @name) @definition.type
120        (constructor_declaration name: (identifier) @name) @definition.method
121    "#,
122    import_query: r#"
123        (import_declaration) @import
124    "#,
125    call_query: r#"
126        (method_invocation name: (identifier) @name) @call
127    "#,
128};
129
130const PHP: LanguageSpec = LanguageSpec {
131    extensions: &[".php"],
132    symbol_query: r#"
133        (function_definition name: (name) @name) @definition.function
134        (class_declaration name: (name) @name) @definition.class
135        (method_declaration name: (name) @name) @definition.method
136        (interface_declaration name: (name) @name) @definition.type
137        (trait_declaration name: (name) @name) @definition.type
138    "#,
139    import_query: r#"
140        (namespace_use_declaration) @import
141    "#,
142    call_query: r#"
143        (function_call_expression function: (name) @name) @call
144        (function_call_expression function: (qualified_name) @name) @call
145        (scoped_call_expression scope: [(name) (qualified_name)] name: (name) @name) @call
146        (member_call_expression name: (name) @name) @call
147    "#,
148};
149
150const DART: LanguageSpec = LanguageSpec {
151    extensions: &[".dart"],
152    symbol_query: r#"
153        (function_signature name: (identifier) @name) @definition.function
154        (class_declaration name: (identifier) @name) @definition.class
155        (method_signature (function_signature (identifier) @name)) @definition.method
156        (enum_declaration name: (identifier) @name) @definition.type
157    "#,
158    import_query: r#"
159        (import_or_export) @import
160    "#,
161    // Dart calls are extracted by parser.rs because this grammar models calls as
162    // selector chains rather than a stable call-expression node.
163    call_query: "",
164};
165
166const CSHARP: LanguageSpec = LanguageSpec {
167    extensions: &[".cs"],
168    symbol_query: r#"
169        (method_declaration name: (identifier) @name) @definition.method
170        (class_declaration name: (identifier) @name) @definition.class
171        (interface_declaration name: (identifier) @name) @definition.type
172        (struct_declaration name: (identifier) @name) @definition.type
173        (enum_declaration name: (identifier) @name) @definition.type
174        (constructor_declaration name: (identifier) @name) @definition.method
175    "#,
176    import_query: r#"
177        (using_directive) @import
178    "#,
179    call_query: r#"
180        (invocation_expression function: (identifier) @name) @call
181        (invocation_expression function: (member_access_expression name: (identifier) @name)) @call
182    "#,
183};
184
185const C_LANG: LanguageSpec = LanguageSpec {
186    extensions: &[".c", ".h"],
187    symbol_query: r#"
188        (function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition.function
189        (struct_specifier name: (type_identifier) @name) @definition.type
190        (enum_specifier name: (type_identifier) @name) @definition.type
191        (type_definition declarator: (type_identifier) @name) @definition.type
192    "#,
193    import_query: r#"
194        (preproc_include) @import
195    "#,
196    call_query: r#"
197        (call_expression function: (identifier) @name) @call
198    "#,
199};
200
201const CPP: LanguageSpec = LanguageSpec {
202    extensions: &[".cpp", ".cc", ".cxx", ".hpp", ".hxx", ".hh"],
203    symbol_query: r#"
204        (function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition.function
205        (function_definition declarator: (function_declarator declarator: (qualified_identifier name: (identifier) @name))) @definition.function
206        (class_specifier name: (type_identifier) @name) @definition.class
207        (struct_specifier name: (type_identifier) @name) @definition.type
208    "#,
209    import_query: r#"
210        (preproc_include) @import
211    "#,
212    call_query: r#"
213        (call_expression function: (identifier) @name) @call
214        (call_expression function: (field_expression field: (field_identifier) @name)) @call
215    "#,
216};
217
218const ELIXIR: LanguageSpec = LanguageSpec {
219    extensions: &[".ex", ".exs"],
220    symbol_query: r#"
221        (call target: (identifier) @_keyword (#any-of? @_keyword "def" "defp" "defmacro") (arguments (identifier) @name)) @definition.function
222        (call target: (identifier) @_keyword (#any-of? @_keyword "defmodule") (arguments (alias) @name)) @definition.class
223    "#,
224    import_query: r#"
225        (call target: (identifier) @_keyword (#any-of? @_keyword "import" "alias" "use" "require")) @import
226    "#,
227    call_query: r#"
228        (call target: (identifier) @name) @call
229        (call target: (dot right: (identifier) @name)) @call
230    "#,
231};
232
233const RUBY: LanguageSpec = LanguageSpec {
234    extensions: &[".rb", ".rake", ".gemspec"],
235    symbol_query: r#"
236        (method name: (identifier) @name) @definition.function
237        (singleton_method name: (identifier) @name) @definition.function
238        (class name: (constant) @name) @definition.class
239        (module name: (constant) @name) @definition.class
240    "#,
241    import_query: r#"
242        (call method: (identifier) @_m (#any-of? @_m "require" "require_relative" "load" "include" "extend" "prepend")) @import
243    "#,
244    call_query: r#"
245        (call method: (identifier) @name) @call
246    "#,
247};
248
249const KOTLIN: LanguageSpec = LanguageSpec {
250    extensions: &[".kt", ".kts"],
251    symbol_query: r#"
252        (function_declaration name: (identifier) @name) @definition.function
253        (class_declaration name: (identifier) @name) @definition.class
254        (object_declaration name: (identifier) @name) @definition.class
255    "#,
256    import_query: r#"
257        (import) @import
258    "#,
259    call_query: r#"
260        (call_expression (identifier) @name) @call
261        (call_expression (navigation_expression (identifier) (identifier) @name)) @call
262    "#,
263};
264
265const YAML: LanguageSpec = LanguageSpec {
266    extensions: &[".yaml", ".yml"],
267    symbol_query: r#"
268        (block_mapping_pair key: (_) @name) @definition.property
269    "#,
270    import_query: "",
271    call_query: "",
272};
273
274const JSON_LANG: LanguageSpec = LanguageSpec {
275    extensions: &[".json", ".jsonc"],
276    symbol_query: r#"
277        (pair key: (string (string_content) @name)) @definition.property
278    "#,
279    import_query: "",
280    call_query: "",
281};
282
283const SWIFT: LanguageSpec = LanguageSpec {
284    extensions: &[".swift"],
285    symbol_query: r#"
286        (function_declaration name: (simple_identifier) @name) @definition.function
287        (class_declaration declaration_kind: "class" name: (type_identifier) @name) @definition.class
288        (class_declaration declaration_kind: "actor" name: (type_identifier) @name) @definition.class
289        (protocol_declaration name: (type_identifier) @name) @definition.type
290        (class_declaration declaration_kind: "struct" name: (type_identifier) @name) @definition.type
291        (class_declaration declaration_kind: "enum" name: (type_identifier) @name) @definition.type
292    "#,
293    import_query: r#"
294        (import_declaration) @import
295    "#,
296    call_query: r#"
297        (call_expression (simple_identifier) @name) @call
298        (call_expression (navigation_expression suffix: (navigation_suffix suffix: (simple_identifier) @name))) @call
299    "#,
300};
301
302// ── Registry ───────────────────────────────────────────────────────────
303
304/// All supported languages and their specs.
305const SPECS: &[(&str, &LanguageSpec)] = &[
306    ("python", &PYTHON),
307    ("javascript", &JAVASCRIPT),
308    ("typescript", &TYPESCRIPT),
309    ("go", &GO),
310    ("rust", &RUST),
311    ("java", &JAVA),
312    ("php", &PHP),
313    ("dart", &DART),
314    ("csharp", &CSHARP),
315    ("c", &C_LANG),
316    ("cpp", &CPP),
317    ("elixir", &ELIXIR),
318    ("ruby", &RUBY),
319    ("kotlin", &KOTLIN),
320    ("swift", &SWIFT),
321    ("yaml", &YAML),
322    ("json", &JSON_LANG),
323];
324
325/// Detect language name from file extension.
326pub fn detect_language(file_path: &str) -> Option<&'static str> {
327    let path = std::path::Path::new(file_path);
328    let ext = path
329        .extension()
330        .map(|e| format!(".{}", e.to_string_lossy().to_lowercase()))?;
331
332    for (name, spec) in SPECS {
333        if spec.extensions.contains(&ext.as_str()) {
334            return Some(name);
335        }
336    }
337    None
338}
339
340/// Get the language spec for a given language name.
341pub fn get_spec(lang: &str) -> Option<&'static LanguageSpec> {
342    SPECS
343        .iter()
344        .find(|(name, _)| *name == lang)
345        .map(|(_, s)| *s)
346}
347
348/// Get the tree-sitter Language object for a given language name.
349pub fn get_ts_language(lang: &str) -> Option<Language> {
350    let lang_fn = match lang {
351        "python" => tree_sitter_python::LANGUAGE,
352        "javascript" => tree_sitter_javascript::LANGUAGE,
353        "typescript" => tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
354        "go" => tree_sitter_go::LANGUAGE,
355        "rust" => tree_sitter_rust::LANGUAGE,
356        "java" => tree_sitter_java::LANGUAGE,
357        "c" => tree_sitter_c::LANGUAGE,
358        "cpp" => tree_sitter_cpp::LANGUAGE,
359        "csharp" => tree_sitter_c_sharp::LANGUAGE,
360        "ruby" => tree_sitter_ruby::LANGUAGE,
361        "php" => tree_sitter_php::LANGUAGE_PHP,
362        "swift" => tree_sitter_swift::LANGUAGE,
363        "kotlin" => tree_sitter_kotlin_ng::LANGUAGE,
364        "dart" => tree_sitter_dart::LANGUAGE,
365        "elixir" => tree_sitter_elixir::LANGUAGE,
366        "json" => tree_sitter_json::LANGUAGE,
367        "yaml" => tree_sitter_yaml::LANGUAGE,
368        _ => return None,
369    };
370    Some(lang_fn.into())
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376
377    #[test]
378    fn markdown_extensions_are_not_detected() {
379        // Markdown is intentionally handled as content-only text, not AST.
380        assert_eq!(detect_language("README.md"), None);
381        assert_eq!(detect_language("docs/guide.markdown"), None);
382    }
383
384    #[test]
385    fn javascript_extensions_still_detect() {
386        assert_eq!(detect_language("src/app.js"), Some("javascript"));
387        assert_eq!(detect_language("src/app.jsx"), Some("javascript"));
388        assert_eq!(detect_language("src/app.cjs"), Some("javascript"));
389        assert_eq!(detect_language("src/generated.mjs"), Some("javascript"));
390    }
391}