vtcode_core/tools/tree_sitter/
languages.rs

1//! Language-specific functionality and queries for tree-sitter
2
3use crate::tools::tree_sitter::analyzer::{LanguageSupport, Position, SyntaxNode};
4use serde::{Deserialize, Serialize};
5
6/// Language-specific queries and operations
7pub struct LanguageQueries {
8    pub functions_query: String,
9    pub classes_query: String,
10    pub imports_query: String,
11    pub variables_query: String,
12    pub comments_query: String,
13}
14
15impl LanguageQueries {
16    /// Get queries for a specific language
17    pub fn for_language(language: &LanguageSupport) -> Self {
18        match language {
19            LanguageSupport::Rust => Self::rust_queries(),
20            LanguageSupport::Python => Self::python_queries(),
21            LanguageSupport::JavaScript => Self::javascript_queries(),
22            LanguageSupport::TypeScript => Self::typescript_queries(),
23            LanguageSupport::Go => Self::go_queries(),
24            LanguageSupport::Java => Self::java_queries(),
25            LanguageSupport::Swift => Self::swift_queries(),
26        }
27    }
28
29    fn rust_queries() -> Self {
30        Self {
31            functions_query: r#"
32                (function_item
33                    name: (identifier) @function.name
34                    parameters: (parameters) @function.parameters
35                    return_type: (return_type)? @function.return_type
36                    body: (block) @function.body) @function.def
37
38                (impl_item
39                    type: (type_identifier) @impl.type
40                    body: (declaration_list) @impl.body)
41
42                (trait_item
43                    name: (type_identifier) @trait.name
44                    body: (declaration_list) @trait.body)
45            "#
46            .to_string(),
47
48            classes_query: r#"
49                (struct_item
50                    name: (type_identifier) @struct.name
51                    body: (field_declaration_list) @struct.fields) @struct.def
52
53                (enum_item
54                    name: (type_identifier) @enum.name
55                    body: (enum_variant_list) @enum.variants) @enum.def
56            "#
57            .to_string(),
58
59            imports_query: r#"
60                (use_declaration
61                    argument: (scoped_identifier) @import.path) @import.def
62
63                (mod_item
64                    name: (identifier) @module.name) @module.def
65            "#
66            .to_string(),
67
68            variables_query: r#"
69                (let_declaration
70                    pattern: (identifier) @variable.name
71                    type: (type_annotation)? @variable.type
72                    value: (expression)? @variable.value) @variable.def
73
74                (const_item
75                    name: (identifier) @const.name
76                    type: (type_annotation)? @const.type
77                    value: (expression) @const.value) @const.def
78
79                (static_item
80                    name: (identifier) @static.name
81                    type: (type_annotation)? @static.type
82                    value: (expression)? @static.value) @static.def
83            "#
84            .to_string(),
85
86            comments_query: r#"
87                (line_comment) @comment.line
88                (block_comment) @comment.block
89            "#
90            .to_string(),
91        }
92    }
93
94    fn python_queries() -> Self {
95        Self {
96            functions_query: r#"
97                (function_definition
98                    name: (identifier) @function.name
99                    parameters: (parameters) @function.parameters
100                    body: (block) @function.body) @function.def
101
102                (class_definition
103                    name: (identifier) @class.name
104                    body: (block) @class.body) @class.def
105            "#
106            .to_string(),
107
108            classes_query: r#"
109                (class_definition
110                    name: (identifier) @class.name
111                    superclasses: (argument_list)? @class.superclasses
112                    body: (block) @class.body) @class.def
113            "#
114            .to_string(),
115
116            imports_query: r#"
117                (import_statement
118                    name: (dotted_name) @import.name) @import.def
119
120                (import_from_statement
121                    module: (dotted_name) @import.module
122                    name: (dotted_name) @import.name) @import.def
123            "#
124            .to_string(),
125
126            variables_query: r#"
127                (assignment
128                    left: (identifier) @variable.name
129                    right: (expression) @variable.value) @variable.def
130            "#
131            .to_string(),
132
133            comments_query: r#"
134                (comment) @comment
135            "#
136            .to_string(),
137        }
138    }
139
140    fn javascript_queries() -> Self {
141        Self {
142            functions_query: r#"
143                (function_declaration
144                    name: (identifier) @function.name
145                    parameters: (formal_parameters) @function.parameters
146                    body: (statement_block) @function.body) @function.def
147
148                (function_expression
149                    name: (identifier)? @function.name
150                    parameters: (formal_parameters) @function.parameters
151                    body: (statement_block) @function.body) @function.expr
152
153                (arrow_function
154                    parameters: (formal_parameters) @arrow.parameters
155                    body: (statement_block) @arrow.body) @arrow.def
156            "#
157            .to_string(),
158
159            classes_query: r#"
160                (class_declaration
161                    name: (identifier) @class.name
162                    body: (class_body) @class.body) @class.def
163            "#
164            .to_string(),
165
166            imports_query: r#"
167                (import_statement
168                    source: (string) @import.source
169                    specifiers: (import_clause) @import.specifiers) @import.def
170
171                (export_statement
172                    declaration: (function_declaration) @export.function) @export.def
173            "#
174            .to_string(),
175
176            variables_query: r#"
177                (variable_declaration
178                    declarator: (variable_declarator
179                        name: (identifier) @variable.name
180                        value: (expression)? @variable.value)) @variable.def
181
182                (lexical_declaration
183                    declarator: (variable_declarator
184                        name: (identifier) @variable.name
185                        value: (expression)? @variable.value)) @variable.def
186            "#
187            .to_string(),
188
189            comments_query: r#"
190                (comment) @comment
191            "#
192            .to_string(),
193        }
194    }
195
196    fn typescript_queries() -> Self {
197        // TypeScript extends JavaScript queries
198        let mut js_queries = Self::javascript_queries();
199
200        // Add TypeScript-specific queries
201        js_queries.functions_query.push_str(
202            r#"
203            (interface_declaration
204                name: (type_identifier) @interface.name
205                body: (interface_body) @interface.body) @interface.def
206
207            (type_alias_declaration
208                name: (type_identifier) @type.name
209                value: (type_annotation) @type.value) @type.def
210        "#,
211        );
212
213        js_queries.classes_query.push_str(
214            r#"
215            (interface_declaration
216                name: (type_identifier) @interface.name
217                body: (interface_body) @interface.body) @interface.def
218        "#,
219        );
220
221        js_queries
222    }
223
224    fn go_queries() -> Self {
225        Self {
226            functions_query: r#"
227                (function_declaration
228                    name: (identifier) @function.name
229                    parameters: (parameter_list) @function.parameters
230                    result: (parameter_list)? @function.result
231                    body: (block) @function.body) @function.def
232
233                (method_declaration
234                    receiver: (parameter_list) @method.receiver
235                    name: (identifier) @method.name
236                    parameters: (parameter_list) @method.parameters
237                    result: (parameter_list)? @method.result
238                    body: (block) @method.body) @method.def
239            "#
240            .to_string(),
241
242            classes_query: r#"
243                (type_declaration
244                    name: (type_identifier) @type.name
245                    type_spec: (type_spec
246                        name: (type_identifier) @struct.name
247                        type: (struct_type) @struct.def)) @type.def
248
249                (interface_type
250                    method_spec: (method_spec) @interface.method) @interface.def
251            "#
252            .to_string(),
253
254            imports_query: r#"
255                (import_declaration
256                    spec: (import_spec
257                        path: (interpreted_string_literal) @import.path
258                        name: (identifier)? @import.name)) @import.def
259            "#
260            .to_string(),
261
262            variables_query: r#"
263                (var_declaration
264                    spec: (var_spec
265                        name: (identifier) @variable.name
266                        type: (type_identifier)? @variable.type
267                        value: (expression_list)? @variable.value)) @variable.def
268
269                (short_var_declaration
270                    left: (expression_list) @var.names
271                    right: (expression_list) @var.values) @short.var
272            "#
273            .to_string(),
274
275            comments_query: r#"
276                (comment) @comment
277            "#
278            .to_string(),
279        }
280    }
281
282    fn java_queries() -> Self {
283        Self {
284            functions_query: r#"
285                (method_declaration
286                    name: (identifier) @method.name
287                    parameters: (formal_parameters) @method.parameters
288                    type: (type_identifier)? @method.return_type
289                    body: (block) @method.body) @method.def
290
291                (constructor_declaration
292                    name: (identifier) @constructor.name
293                    parameters: (formal_parameters) @constructor.parameters
294                    body: (block) @constructor.body) @constructor.def
295            "#
296            .to_string(),
297
298            classes_query: r#"
299                (class_declaration
300                    name: (identifier) @class.name
301                    body: (class_body) @class.body) @class.def
302
303                (interface_declaration
304                    name: (identifier) @interface.name
305                    body: (interface_body) @interface.body) @interface.def
306            "#
307            .to_string(),
308
309            imports_query: r#"
310                (import_declaration
311                    qualified_name: (qualified_name) @import.name) @import.def
312
313                (package_declaration
314                    qualified_name: (qualified_name) @package.name) @package.def
315            "#
316            .to_string(),
317
318            variables_query: r#"
319                (field_declaration
320                    declarator: (variable_declarator
321                        name: (identifier) @field.name
322                        dimensions: (dimensions)? @field.dimensions)
323                    type: (type_identifier) @field.type) @field.def
324
325                (local_variable_declaration
326                    declarator: (variable_declarator
327                        name: (identifier) @variable.name)
328                    type: (type_identifier) @variable.type) @variable.def
329            "#
330            .to_string(),
331
332            comments_query: r#"
333                (line_comment) @comment.line
334                (block_comment) @comment.block
335            "#
336            .to_string(),
337        }
338    }
339
340    #[allow(dead_code)]
341    fn swift_queries() -> Self {
342        Self {
343            functions_query: r#"
344                (function_declaration
345                    name: (simple_identifier) @function.name
346                    parameter: (parameter_clause) @function.parameters
347                    return_type: (type_annotation)? @function.return_type
348                    body: (function_body) @function.body) @function.def
349
350                (method_declaration
351                    name: (simple_identifier) @method.name
352                    parameter: (parameter_clause) @method.parameters
353                    return_type: (type_annotation)? @method.return_type
354                    body: (function_body) @method.body) @method.def
355
356                (initializer_declaration
357                    parameter: (parameter_clause) @initializer.parameters
358                    body: (function_body) @initializer.body) @initializer.def
359
360                (deinitializer_declaration
361                    body: (function_body) @deinitializer.body) @deinitializer.def
362            "#
363            .to_string(),
364
365            classes_query: r#"
366                (class_declaration
367                    name: (type_identifier) @class.name
368                    inheritance: (inheritance_clause)? @class.inheritance
369                    body: (class_body) @class.body) @class.def
370
371                (struct_declaration
372                    name: (type_identifier) @struct.name
373                    inheritance: (inheritance_clause)? @struct.inheritance
374                    body: (struct_body) @struct.body) @struct.def
375
376                (enum_declaration
377                    name: (type_identifier) @enum.name
378                    inheritance: (inheritance_clause)? @enum.inheritance
379                    body: (enum_body) @enum.body) @enum.def
380
381                (protocol_declaration
382                    name: (type_identifier) @protocol.name
383                    inheritance: (inheritance_clause)? @protocol.inheritance
384                    body: (protocol_body) @protocol.body) @protocol.def
385
386                (extension_declaration
387                    extended_type: (type_identifier) @extension.type
388                    inheritance: (inheritance_clause)? @extension.inheritance
389                    body: (extension_body) @extension.body) @extension.def
390            "#
391            .to_string(),
392
393            imports_query: r#"
394                (import_declaration
395                    import_kind: (import_kind)? @import.kind
396                    path: (import_path) @import.path) @import.def
397            "#
398            .to_string(),
399
400            variables_query: r#"
401                (property_declaration
402                    name: (pattern) @property.name
403                    type_annotation: (type_annotation)? @property.type
404                    initializer: (initializer_clause)? @property.initializer) @property.def
405
406                (constant_declaration
407                    name: (pattern) @constant.name
408                    type_annotation: (type_annotation)? @constant.type
409                    initializer: (initializer_clause) @constant.initializer) @constant.def
410
411                (variable_declaration
412                    name: (pattern) @variable.name
413                    type_annotation: (type_annotation)? @variable.type
414                    initializer: (initializer_clause)? @variable.initializer) @variable.def
415
416                (parameter
417                    name: (simple_identifier) @parameter.name
418                    type_annotation: (type_annotation) @parameter.type) @parameter.def
419            "#
420            .to_string(),
421
422            comments_query: r#"
423                (comment) @comment
424                (multiline_comment) @comment.multiline
425            "#
426            .to_string(),
427        }
428    }
429}
430
431/// Symbol information extracted from code
432#[derive(Debug, Clone, Serialize, Deserialize)]
433pub struct SymbolInfo {
434    pub name: String,
435    pub kind: SymbolKind,
436    pub position: Position,
437    pub scope: Option<String>,
438    pub signature: Option<String>,
439    pub documentation: Option<String>,
440}
441
442#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
443pub enum SymbolKind {
444    Function,
445    Method,
446    Class,
447    Struct,
448    Interface,
449    Trait,
450    Variable,
451    Constant,
452    Import,
453    Module,
454    Type,
455}
456
457/// Language-specific symbol extraction
458pub struct LanguageAnalyzer {
459    #[allow(dead_code)]
460    queries: LanguageQueries,
461}
462
463impl LanguageAnalyzer {
464    pub fn new(language: &LanguageSupport) -> Self {
465        Self {
466            queries: LanguageQueries::for_language(language),
467        }
468    }
469
470    /// Extract symbols from syntax tree
471    pub fn extract_symbols(
472        &self,
473        tree: &crate::tools::tree_sitter::analyzer::SyntaxTree,
474    ) -> Vec<SymbolInfo> {
475        let mut symbols = Vec::new();
476
477        // Extract functions
478        symbols.extend(self.extract_functions(&tree.root));
479
480        // Extract classes/structs
481        symbols.extend(self.extract_classes(&tree.root));
482
483        // Extract variables
484        symbols.extend(self.extract_variables(&tree.root));
485
486        // Extract imports
487        symbols.extend(self.extract_imports(&tree.root));
488
489        symbols
490    }
491
492    fn extract_functions(&self, node: &SyntaxNode) -> Vec<SymbolInfo> {
493        let mut functions = Vec::new();
494
495        if node.kind.contains("function") || node.kind.contains("method") {
496            if let Some(name_node) = node
497                .named_children
498                .get("name")
499                .and_then(|children| children.first())
500            {
501                let function = SymbolInfo {
502                    name: name_node.text.clone(),
503                    kind: if node.kind.contains("method") {
504                        SymbolKind::Method
505                    } else {
506                        SymbolKind::Function
507                    },
508                    position: name_node.start_position.clone(),
509                    scope: Some(
510                        if node.kind.contains("method") {
511                            "method"
512                        } else {
513                            "function"
514                        }
515                        .to_string(),
516                    ),
517                    signature: self.extract_signature(node),
518                    documentation: self.extract_documentation(node),
519                };
520                functions.push(function);
521            }
522        }
523
524        // Recursively extract from children
525        for child in &node.children {
526            functions.extend(self.extract_functions(child));
527        }
528
529        functions
530    }
531
532    fn extract_classes(&self, node: &SyntaxNode) -> Vec<SymbolInfo> {
533        let mut classes = Vec::new();
534
535        if node.kind.contains("class")
536            || node.kind.contains("struct")
537            || node.kind.contains("interface")
538        {
539            if let Some(name_node) = node
540                .named_children
541                .get("name")
542                .and_then(|children| children.first())
543            {
544                let kind = match node.kind.as_str() {
545                    k if k.contains("interface") => SymbolKind::Interface,
546                    k if k.contains("struct") => SymbolKind::Struct,
547                    _ => SymbolKind::Class,
548                };
549
550                let class = SymbolInfo {
551                    name: name_node.text.clone(),
552                    kind,
553                    position: name_node.start_position.clone(),
554                    scope: Some("class".to_string()),
555                    signature: None,
556                    documentation: self.extract_documentation(node),
557                };
558                classes.push(class);
559            }
560        }
561
562        // Recursively extract from children
563        for child in &node.children {
564            classes.extend(self.extract_classes(child));
565        }
566
567        classes
568    }
569
570    fn extract_variables(&self, node: &SyntaxNode) -> Vec<SymbolInfo> {
571        let mut variables = Vec::new();
572
573        if node.kind.contains("variable")
574            || node.kind.contains("const")
575            || node.kind.contains("let")
576        {
577            // Extract variable names from children
578            for child in &node.children {
579                if child.kind == "identifier" && !child.text.is_empty() {
580                    let variable = SymbolInfo {
581                        name: child.text.clone(),
582                        kind: if node.kind.contains("const") {
583                            SymbolKind::Constant
584                        } else {
585                            SymbolKind::Variable
586                        },
587                        position: child.start_position.clone(),
588                        scope: Some("variable".to_string()),
589                        signature: None,
590                        documentation: None,
591                    };
592                    variables.push(variable);
593                    break; // Only take the first identifier (variable name)
594                }
595            }
596        }
597
598        // Recursively extract from children
599        for child in &node.children {
600            variables.extend(self.extract_variables(child));
601        }
602
603        variables
604    }
605
606    fn extract_imports(&self, node: &SyntaxNode) -> Vec<SymbolInfo> {
607        let mut imports = Vec::new();
608
609        if node.kind.contains("import") {
610            // Extract import information
611            for child in &node.children {
612                if child.kind.contains("identifier") || child.kind.contains("name") {
613                    let import = SymbolInfo {
614                        name: child.text.clone(),
615                        kind: SymbolKind::Import,
616                        position: child.start_position.clone(),
617                        scope: Some("import".to_string()),
618                        signature: None,
619                        documentation: None,
620                    };
621                    imports.push(import);
622                }
623            }
624        }
625
626        // Recursively extract from children
627        for child in &node.children {
628            imports.extend(self.extract_imports(child));
629        }
630
631        imports
632    }
633
634    fn extract_signature(&self, node: &SyntaxNode) -> Option<String> {
635        // Extract function/method signature
636        if let Some(params_node) = node
637            .named_children
638            .get("parameters")
639            .and_then(|children| children.first())
640        {
641            let params = &params_node.text;
642
643            let return_type = node
644                .named_children
645                .get("return_type")
646                .and_then(|children| children.first())
647                .map(|rt| format!(" -> {}", rt.text))
648                .unwrap_or_default();
649
650            Some(format!("({}){}", params, return_type))
651        } else {
652            None
653        }
654    }
655
656    fn extract_documentation(&self, node: &SyntaxNode) -> Option<String> {
657        // Heuristic: combine leading sibling comments (captured during AST build)
658        // with any immediate child comment nodes.
659        let mut docs = Vec::new();
660
661        // Preceding sibling comments collected on the node
662        for c in &node.leading_comments {
663            if !c.is_empty() {
664                docs.push(c.clone());
665            }
666        }
667
668        // Immediate child comments
669        for child in &node.children {
670            let kind = child.kind.to_lowercase();
671            if kind.contains("comment") {
672                let t = child.text.trim();
673                if !t.is_empty() {
674                    docs.push(t.to_string());
675                }
676            }
677        }
678
679        if docs.is_empty() {
680            None
681        } else {
682            Some(docs.join("\n"))
683        }
684    }
685}