context_creator/core/semantic/
query_engine.rs

1//! Tree-sitter query engine for efficient semantic analysis
2//!
3//! This module provides a declarative query-based approach to semantic analysis
4//! using Tree-sitter's query engine, replacing manual AST traversal.
5
6use crate::core::semantic::analyzer::{
7    AnalysisResult, FunctionCall, FunctionDefinition, Import, TypeReference,
8};
9use crate::utils::error::ContextCreatorError;
10use std::collections::HashMap;
11use tree_sitter::{Language, Parser, Query, QueryCursor, Tree};
12
13/// Query engine for semantic analysis using Tree-sitter queries
14pub struct QueryEngine {
15    #[allow(dead_code)]
16    language: Language,
17    #[allow(dead_code)]
18    language_name: String,
19    import_query: Query,
20    function_call_query: Query,
21    type_reference_query: Query,
22    function_definition_query: Query,
23}
24
25impl QueryEngine {
26    /// Create a new query engine for the specified language
27    pub fn new(language: Language, language_name: &str) -> Result<Self, ContextCreatorError> {
28        let import_query = Self::create_import_query(language, language_name)?;
29        let function_call_query = Self::create_function_call_query(language, language_name)?;
30        let type_reference_query = Self::create_type_reference_query(language, language_name)?;
31        let function_definition_query =
32            Self::create_function_definition_query(language, language_name)?;
33
34        Ok(Self {
35            language,
36            language_name: language_name.to_string(),
37            import_query,
38            function_call_query,
39            type_reference_query,
40            function_definition_query,
41        })
42    }
43
44    /// Analyze content using Tree-sitter queries
45    pub fn analyze_with_parser(
46        &self,
47        parser: &mut Parser,
48        content: &str,
49    ) -> Result<AnalysisResult, ContextCreatorError> {
50        // Parse the content
51        let tree = parser.parse(content, None).ok_or_else(|| {
52            ContextCreatorError::ParseError("Failed to parse content".to_string())
53        })?;
54
55        self.analyze_tree(&tree, content)
56    }
57
58    /// Analyze a parsed tree using queries
59    pub fn analyze_tree(
60        &self,
61        tree: &Tree,
62        content: &str,
63    ) -> Result<AnalysisResult, ContextCreatorError> {
64        let mut result = AnalysisResult::default();
65        let mut query_cursor = QueryCursor::new();
66        let root_node = tree.root_node();
67
68        // Execute import query
69        let import_matches =
70            query_cursor.matches(&self.import_query, root_node, content.as_bytes());
71        result.imports = self.extract_imports(import_matches, content)?;
72
73        // Execute function call query
74        let call_matches =
75            query_cursor.matches(&self.function_call_query, root_node, content.as_bytes());
76        result.function_calls = self.extract_function_calls(call_matches, content)?;
77
78        // Execute type reference query
79        let type_matches =
80            query_cursor.matches(&self.type_reference_query, root_node, content.as_bytes());
81        result.type_references = self.extract_type_references(type_matches, content)?;
82
83        // Execute function definition query
84        let definition_matches = query_cursor.matches(
85            &self.function_definition_query,
86            root_node,
87            content.as_bytes(),
88        );
89        result.exported_functions =
90            self.extract_function_definitions(definition_matches, content)?;
91
92        Ok(result)
93    }
94
95    /// Create import query for the specified language
96    fn create_import_query(
97        language: Language,
98        language_name: &str,
99    ) -> Result<Query, ContextCreatorError> {
100        let query_text = match language_name {
101            "rust" => {
102                r#"
103                ; Use declarations with simple paths (use std::collections::HashMap)
104                (use_declaration
105                  argument: [(scoped_identifier) (identifier)] @rust_import_path
106                ) @rust_simple_import
107
108                ; Use declarations with use lists (use crate::module::{item1, item2})
109                (use_declaration
110                  argument: (scoped_use_list
111                    path: [(scoped_identifier) (identifier)] @rust_module_path
112                    list: (use_list
113                      [(scoped_identifier) (identifier)] @rust_import_item
114                    )
115                  )
116                ) @rust_scoped_import
117
118                ; Use declarations with renamed imports (use foo as bar)
119                (use_declaration
120                  argument: (use_as_clause
121                    path: (scoped_identifier) @rust_import_path
122                    alias: (identifier) @rust_import_alias
123                  )
124                ) @rust_aliased_import
125
126                ; Use declarations with wildcard (use module::*)
127                (use_declaration
128                  argument: (use_wildcard
129                    (scoped_identifier) @rust_wildcard_path
130                  )
131                ) @rust_wildcard_import
132
133                ; Module declarations  
134                (mod_item
135                  name: (identifier) @mod_name
136                ) @rust_module
137
138                ; Extern crate declarations
139                (extern_crate_declaration
140                  name: (identifier) @crate_name
141                ) @extern_crate
142            "#
143            }
144            "python" => {
145                r#"
146                ; Simple import statements (import os, import sys)
147                (import_statement
148                  (dotted_name) @module_name
149                ) @simple_import
150
151                ; From import statements with absolute modules (from pathlib import Path)
152                (import_from_statement
153                  module_name: (dotted_name) @from_module
154                  (dotted_name) @import_item
155                ) @from_import
156                
157                ; From import with aliased imports  
158                (import_from_statement
159                  module_name: (dotted_name) @from_module
160                  (aliased_import
161                    name: (dotted_name) @import_item
162                  )
163                ) @from_import_aliased
164
165                ; Wildcard imports (from module import *)
166                (import_from_statement
167                  module_name: (dotted_name) @from_module
168                  (wildcard_import) @wildcard
169                ) @wildcard_import
170
171                ; Relative wildcard imports (from . import *, from ..utils import *)
172                (import_from_statement
173                  module_name: (relative_import) @relative_module
174                  (wildcard_import) @wildcard
175                ) @relative_wildcard_import
176
177                ; Relative from imports (from . import utils, from ..lib import helper)
178                (import_from_statement
179                  module_name: (relative_import) @relative_module
180                  (dotted_name) @import_item
181                ) @relative_from_import
182
183                ; Relative from imports with aliased imports
184                (import_from_statement
185                  module_name: (relative_import) @relative_module
186                  (aliased_import
187                    name: (dotted_name) @import_item
188                  )
189                ) @relative_from_import_aliased
190            "#
191            }
192            "javascript" => {
193                r#"
194                ; Import declarations
195                (import_statement
196                  (import_clause
197                    [
198                      (identifier) @import_name
199                      (namespace_import (identifier) @import_name)
200                      (named_imports
201                        (import_specifier
202                          [
203                            (identifier) @import_name
204                            name: (identifier) @import_name
205                          ]
206                        )
207                      )
208                    ]
209                  )?
210                  source: (string) @module_path
211                ) @js_import
212
213                ; Require calls (CommonJS)
214                (call_expression
215                  function: (identifier) @require_fn (#eq? @require_fn "require")
216                  arguments: (arguments (string) @module_path)
217                ) @require
218            "#
219            }
220            "typescript" => {
221                r#"
222                ; Import declarations
223                (import_statement
224                  (import_clause
225                    [
226                      (identifier) @import_name
227                      (namespace_import (identifier) @import_name)
228                      (named_imports
229                        (import_specifier
230                          [
231                            (identifier) @import_name
232                            name: (identifier) @import_name
233                          ]
234                        )
235                      )
236                    ]
237                  )?
238                  source: (string) @module_path
239                ) @ts_import
240
241                ; Require calls (CommonJS)
242                (call_expression
243                  function: (identifier) @require_fn (#eq? @require_fn "require")
244                  arguments: (arguments (string) @module_path)
245                ) @require
246            "#
247            }
248            _ => {
249                return Err(ContextCreatorError::ParseError(format!(
250                    "Unsupported language for import queries: {language_name}"
251                )))
252            }
253        };
254
255        Query::new(language, query_text).map_err(|e| {
256            ContextCreatorError::ParseError(format!("Failed to create import query: {e}"))
257        })
258    }
259
260    /// Create function call query for the specified language
261    fn create_function_call_query(
262        language: Language,
263        language_name: &str,
264    ) -> Result<Query, ContextCreatorError> {
265        let query_text = match language_name {
266            "rust" => {
267                r#"
268                ; Simple function calls (helper)
269                (call_expression
270                  function: (identifier) @fn_name
271                ) @call
272
273                ; Scoped function calls (lib::greet)
274                (call_expression
275                  function: (scoped_identifier
276                    path: (identifier) @module_name
277                    name: (identifier) @fn_name
278                  )
279                ) @scoped_call
280
281                ; Nested scoped function calls (lib::User::new)
282                (call_expression
283                  function: (scoped_identifier
284                    path: (scoped_identifier
285                      path: (identifier) @module_name
286                      name: (identifier) @type_name
287                    )
288                    name: (identifier) @fn_name
289                  )
290                ) @nested_scoped_call
291
292                ; Method calls (obj.method())
293                (call_expression
294                  function: (field_expression
295                    field: (field_identifier) @method_name
296                  )
297                ) @method_call
298
299                ; Macro calls (println!)
300                (macro_invocation
301                  macro: (identifier) @macro_name
302                ) @macro_call
303            "#
304            }
305            "python" => {
306                r#"
307                ; Simple function calls (print, len)
308                (call
309                  function: (identifier) @fn_name
310                ) @call
311
312                ; Module attribute calls (os.path, module.func)
313                (call
314                  function: (attribute
315                    object: (identifier) @module_name
316                    attribute: (identifier) @fn_name
317                  )
318                ) @module_call
319
320                ; Nested attribute calls (os.path.join)
321                (call
322                  function: (attribute
323                    attribute: (identifier) @fn_name
324                  )
325                ) @nested_call
326            "#
327            }
328            "javascript" => {
329                r#"
330                ; Function calls
331                (call_expression
332                  function: [
333                    (identifier) @fn_name
334                    (member_expression
335                      object: (identifier) @module_name
336                      property: (property_identifier) @fn_name
337                    )
338                  ]
339                ) @call
340            "#
341            }
342            "typescript" => {
343                r#"
344                ; Function calls
345                (call_expression
346                  function: [
347                    (identifier) @fn_name
348                    (member_expression
349                      object: (identifier) @module_name
350                      property: (property_identifier) @fn_name
351                    )
352                  ]
353                ) @call
354            "#
355            }
356            _ => {
357                return Err(ContextCreatorError::ParseError(format!(
358                    "Unsupported language for function call queries: {language_name}"
359                )))
360            }
361        };
362
363        Query::new(language, query_text).map_err(|e| {
364            ContextCreatorError::ParseError(format!("Failed to create function call query: {e}"))
365        })
366    }
367
368    /// Create function definition query for the specified language
369    fn create_function_definition_query(
370        language: Language,
371        language_name: &str,
372    ) -> Result<Query, ContextCreatorError> {
373        let query_text = match language_name {
374            "rust" => {
375                r#"
376                ; Function declarations with visibility
377                (function_item
378                  (visibility_modifier)? @visibility
379                  name: (identifier) @fn_name
380                ) @function
381                
382                ; Method declarations in impl blocks
383                (impl_item
384                  body: (declaration_list
385                    (function_item
386                      (visibility_modifier)? @method_visibility
387                      name: (identifier) @method_name
388                    ) @method
389                  )
390                )
391                
392                ; Trait method declarations
393                (trait_item
394                  body: (declaration_list
395                    (function_signature_item
396                      name: (identifier) @trait_fn_name
397                    ) @trait_function
398                  )
399                )
400            "#
401            }
402            "python" => {
403                r#"
404                ; Function definitions
405                (function_definition
406                  name: (identifier) @fn_name
407                ) @function
408                
409                ; Method definitions in classes
410                (class_definition
411                  body: (block
412                    (function_definition
413                      name: (identifier) @method_name
414                    ) @method
415                  )
416                )
417                
418                ; Async function definitions
419                (function_definition
420                  "async" @async_marker
421                  name: (identifier) @async_fn_name
422                ) @async_function
423            "#
424            }
425            "javascript" => {
426                r#"
427                ; Function declarations
428                (function_declaration
429                  name: (identifier) @fn_name
430                ) @function
431                
432                ; Arrow function assigned to const/let/var
433                (variable_declarator
434                  name: (identifier) @arrow_fn_name
435                  value: (arrow_function)
436                ) @arrow_function
437                
438                ; Function expressions assigned to const/let/var
439                (variable_declarator
440                  name: (identifier) @fn_expr_name
441                  value: (function_expression)
442                ) @function_expression
443                
444                ; Method definitions in objects
445                (method_definition
446                  name: (property_identifier) @method_name
447                ) @method
448                
449                ; Export function declarations
450                (export_statement
451                  declaration: (function_declaration
452                    name: (identifier) @export_fn_name
453                  )
454                ) @export_function
455                
456                ; CommonJS exports pattern: exports.functionName = function()
457                (assignment_expression
458                  left: (member_expression
459                    object: (identifier) @exports_obj (#eq? @exports_obj "exports")
460                    property: (property_identifier) @commonjs_export_name
461                  )
462                  right: [
463                    (function_expression)
464                    (arrow_function)
465                  ]
466                ) @commonjs_export
467            "#
468            }
469            "typescript" => {
470                r#"
471                ; Function declarations
472                (function_declaration
473                  name: (identifier) @fn_name
474                ) @function
475                
476                ; Arrow function assigned to const/let/var
477                (variable_declarator
478                  name: (identifier) @arrow_fn_name
479                  value: (arrow_function)
480                ) @arrow_function
481                
482                ; Function expressions assigned to const/let/var
483                (variable_declarator
484                  name: (identifier) @fn_expr_name
485                  value: (function_expression)
486                ) @function_expression
487                
488                ; Method definitions in classes
489                (method_definition
490                  name: (property_identifier) @method_name
491                ) @method
492                
493                ; Export function declarations
494                (export_statement
495                  declaration: (function_declaration
496                    name: (identifier) @export_fn_name
497                  )
498                ) @export_function
499            "#
500            }
501            _ => {
502                return Err(ContextCreatorError::ParseError(format!(
503                    "Unsupported language for function definition queries: {language_name}"
504                )))
505            }
506        };
507
508        Query::new(language, query_text).map_err(|e| {
509            ContextCreatorError::ParseError(format!(
510                "Failed to create function definition query: {e}"
511            ))
512        })
513    }
514
515    /// Create type reference query for the specified language
516    fn create_type_reference_query(
517        language: Language,
518        language_name: &str,
519    ) -> Result<Query, ContextCreatorError> {
520        let query_text = match language_name {
521            "rust" => {
522                r#"
523                ; Type identifiers (excluding definitions)
524                (type_identifier) @type_name
525                (#not-match? @type_name "^(i8|i16|i32|i64|i128|u8|u16|u32|u64|u128|f32|f64|bool|char|str|String|Vec|Option|Result)$")
526
527                ; Generic types
528                (generic_type
529                  type: (type_identifier) @type_name
530                )
531
532                ; Scoped type identifiers with simple path
533                (scoped_type_identifier
534                  path: (identifier) @module_name
535                  name: (type_identifier) @type_name
536                )
537                
538                ; Scoped type identifiers with scoped path (e.g., crate::models)
539                (scoped_type_identifier
540                  path: (scoped_identifier) @scoped_module
541                  name: (type_identifier) @type_name
542                )
543
544                ; Types in function parameters
545                (parameter
546                  type: [
547                    (type_identifier) @param_type
548                    (generic_type type: (type_identifier) @param_type)
549                    (reference_type type: (type_identifier) @param_type)
550                  ]
551                )
552
553                ; Return types
554                (function_item
555                  return_type: [
556                    (type_identifier) @return_type
557                    (generic_type type: (type_identifier) @return_type)
558                    (reference_type type: (type_identifier) @return_type)
559                  ]
560                )
561
562                ; Field types in structs
563                (field_declaration
564                  type: [
565                    (type_identifier) @field_type
566                    (generic_type type: (type_identifier) @field_type)
567                    (reference_type type: (type_identifier) @field_type)
568                  ]
569                )
570
571                ; Trait bounds
572                (trait_bounds
573                  (type_identifier) @trait_name
574                )
575
576                ; Types in use statements (traits and types)
577                (use_declaration
578                  (scoped_identifier
579                    name: (identifier) @imported_type
580                  )
581                )
582                (#match? @imported_type "^[A-Z]")
583            "#
584            }
585            "python" => {
586                r#"
587                ; Type identifiers in type positions
588                (type (identifier) @type_name)
589
590                ; Function parameter type annotations 
591                (typed_parameter (identifier) @param_type)
592
593                ; Class inheritance 
594                (class_definition
595                  superclasses: (argument_list (identifier) @parent_class)
596                )
597
598                ; Generic/subscript type references
599                (subscript (identifier) @subscript_type)
600                
601                ; Attribute access on types (e.g., UserRole.ADMIN)
602                (attribute
603                  object: (identifier) @type_name
604                  (#match? @type_name "^[A-Z]")
605                )
606            "#
607            }
608            "javascript" => {
609                r#"
610                ; JSX element types (React components)
611                (jsx_element
612                  open_tag: (jsx_opening_element
613                    name: (identifier) @jsx_type
614                  )
615                )
616                (#match? @jsx_type "^[A-Z]")
617
618                ; JSX self-closing elements
619                (jsx_self_closing_element
620                  name: (identifier) @jsx_type
621                )
622                (#match? @jsx_type "^[A-Z]")
623            "#
624            }
625            "typescript" => {
626                r#"
627                ; Type annotations
628                (type_annotation
629                  (type_identifier) @type_name
630                )
631
632                ; Predefined type annotations (void, any, etc.)
633                (type_annotation
634                  (predefined_type) @type_name
635                )
636
637                ; Generic type arguments
638                (type_arguments
639                  (type_identifier) @type_arg
640                )
641
642                ; Interface declarations
643                (interface_declaration
644                  name: (type_identifier) @interface_name
645                )
646
647                ; Type aliases
648                (type_alias_declaration
649                  name: (type_identifier) @type_alias
650                )
651            "#
652            }
653            _ => {
654                return Err(ContextCreatorError::ParseError(format!(
655                    "Unsupported language for type queries: {language_name}"
656                )))
657            }
658        };
659
660        Query::new(language, query_text).map_err(|e| {
661            ContextCreatorError::ParseError(format!("Failed to create type reference query: {e}"))
662        })
663    }
664
665    /// Extract imports from query matches
666    fn extract_imports<'a>(
667        &self,
668        matches: tree_sitter::QueryMatches<'a, 'a, &'a [u8]>,
669        content: &str,
670    ) -> Result<Vec<Import>, ContextCreatorError> {
671        let mut imports = Vec::new();
672        let import_query_captures = self.import_query.capture_names();
673
674        for match_ in matches {
675            let mut module = String::new();
676            let mut items = Vec::new();
677            let mut is_relative = false;
678            let mut line = 0;
679
680            for capture in match_.captures {
681                let capture_name = &import_query_captures[capture.index as usize];
682                let node = capture.node;
683                line = node.start_position().row + 1;
684
685                match capture_name.as_str() {
686                    "rust_simple_import" => {
687                        // Simple Rust import like "use std::collections::HashMap"
688                        // The path will be captured by rust_import_path
689                    }
690                    "rust_scoped_import" => {
691                        // Scoped Rust import like "use crate::module::{item1, item2}"
692                        // The module path and items will be captured separately
693                    }
694                    "rust_aliased_import" => {
695                        // Aliased Rust import like "use foo as bar"
696                        // The path and alias will be captured separately
697                    }
698                    "rust_wildcard_import" => {
699                        // Wildcard Rust import like "use module::*"
700                        items.push("*".to_string());
701                    }
702                    "rust_import_path" | "rust_module_path" | "rust_wildcard_path" => {
703                        // Capture the module path for Rust imports
704                        if let Ok(path_text) = node.utf8_text(content.as_bytes()) {
705                            module = path_text.to_string();
706                            is_relative = path_text.starts_with("self::")
707                                || path_text.starts_with("super::")
708                                || path_text.starts_with("crate::");
709                        }
710                    }
711                    "rust_import_item" => {
712                        // Capture individual items in a scoped import
713                        if let Ok(item_text) = node.utf8_text(content.as_bytes()) {
714                            items.push(item_text.to_string());
715                        }
716                    }
717                    "rust_import_alias" => {
718                        // For aliased imports, we might want to track the alias
719                        // For now, we'll just add it to items
720                        if let Ok(alias_text) = node.utf8_text(content.as_bytes()) {
721                            items.push(format!("as {alias_text}"));
722                        }
723                    }
724                    "js_import" | "ts_import" => {
725                        // For JavaScript/TypeScript, we rely on module_path and import_name captures
726                        // The module and items will be set by those specific captures
727                    }
728                    "simple_import" => {
729                        // Python simple import statement
730                    }
731                    "from_import" | "from_import_aliased" => {
732                        // Python from import statement
733                    }
734                    "wildcard_import" => {
735                        // Python wildcard import statement (from module import *)
736                        items.push("*".to_string());
737                    }
738                    "relative_wildcard_import" => {
739                        // Python relative wildcard import statement
740                        is_relative = true;
741                        items.push("*".to_string());
742                    }
743                    "relative_from_import" | "relative_from_import_aliased" => {
744                        // Python relative from import statement
745                        is_relative = true;
746                    }
747                    "rust_module" => {
748                        // Parse module declaration (mod item)
749                        let (parsed_module, parsed_items, is_rel) =
750                            self.parse_rust_module_declaration(node, content);
751                        module = parsed_module;
752                        items = parsed_items;
753                        is_relative = is_rel;
754                    }
755                    "mod_name" | "crate_name" => {
756                        if let Ok(name) = node.utf8_text(content.as_bytes()) {
757                            // Only set module if it's not already set by the full module parsing
758                            if module.is_empty() {
759                                module = name.to_string();
760                                is_relative = capture_name == "mod_name";
761                            }
762                        }
763                    }
764                    "module_name" => {
765                        // For Python simple imports and Rust/JS module paths
766                        if let Ok(name) = node.utf8_text(content.as_bytes()) {
767                            module = name.trim_matches('"').to_string();
768                        }
769                    }
770                    "from_module" => {
771                        // For Python from imports
772                        if let Ok(name) = node.utf8_text(content.as_bytes()) {
773                            module = name.to_string();
774                        }
775                    }
776                    "relative_module" => {
777                        // For Python relative imports (. or ..lib)
778                        if let Ok(name) = node.utf8_text(content.as_bytes()) {
779                            module = name.to_string();
780                            is_relative = true;
781                        }
782                    }
783                    "import_name" | "import_item" => {
784                        if let Ok(name) = node.utf8_text(content.as_bytes()) {
785                            items.push(name.to_string());
786                        }
787                    }
788                    "wildcard" => {
789                        // Wildcard import (*)
790                        items.push("*".to_string());
791                    }
792                    "module_path" => {
793                        if let Ok(name) = node.utf8_text(content.as_bytes()) {
794                            module = name.trim_matches('"').trim_matches('\'').to_string();
795                            // Check if it's a relative import for JavaScript/TypeScript
796                            if module.starts_with('.') {
797                                is_relative = true;
798                            }
799                        }
800                    }
801                    _ => {}
802                }
803            }
804
805            if !module.is_empty() || !items.is_empty() {
806                // Security check: validate the module path before adding
807                if self.is_secure_import(&module) {
808                    imports.push(Import {
809                        module,
810                        items,
811                        is_relative,
812                        line,
813                    });
814                } else {
815                    // Log dangerous imports but don't include them
816                    eprintln!("Warning: Blocked potentially dangerous import: {module}");
817                }
818            }
819        }
820
821        Ok(imports)
822    }
823
824    /// Extract function calls from query matches
825    fn extract_function_calls<'a>(
826        &self,
827        matches: tree_sitter::QueryMatches<'a, 'a, &'a [u8]>,
828        content: &str,
829    ) -> Result<Vec<FunctionCall>, ContextCreatorError> {
830        let mut calls = Vec::new();
831        let call_query_captures = self.function_call_query.capture_names();
832
833        for match_ in matches {
834            let mut name = String::new();
835            let mut module = None;
836            let mut line = 0;
837            let mut module_name = String::new();
838            let mut type_name = String::new();
839
840            for capture in match_.captures {
841                let capture_name = &call_query_captures[capture.index as usize];
842                let node = capture.node;
843                line = node.start_position().row + 1;
844
845                match capture_name.as_str() {
846                    "fn_name" | "method_name" => {
847                        if let Ok(fn_name) = node.utf8_text(content.as_bytes()) {
848                            name = fn_name.to_string();
849                        }
850                    }
851                    "module_name" => {
852                        if let Ok(mod_name) = node.utf8_text(content.as_bytes()) {
853                            module_name = mod_name.to_string();
854                            module = Some(mod_name.to_string());
855                        }
856                    }
857                    "type_name" => {
858                        if let Ok(type_name_str) = node.utf8_text(content.as_bytes()) {
859                            type_name = type_name_str.to_string();
860                        }
861                    }
862                    "macro_name" => {
863                        if let Ok(macro_name) = node.utf8_text(content.as_bytes()) {
864                            name = macro_name.to_string();
865                        }
866                    }
867                    _ => {}
868                }
869            }
870
871            // Handle nested scoped calls (lib::User::new)
872            if !module_name.is_empty() && !type_name.is_empty() {
873                module = Some(format!("{module_name}::{type_name}"));
874            }
875
876            if !name.is_empty() {
877                calls.push(FunctionCall { name, module, line });
878            }
879        }
880
881        Ok(calls)
882    }
883
884    /// Extract type references from query matches
885    fn extract_type_references<'a>(
886        &self,
887        matches: tree_sitter::QueryMatches<'a, 'a, &'a [u8]>,
888        content: &str,
889    ) -> Result<Vec<TypeReference>, ContextCreatorError> {
890        let mut type_refs = Vec::new();
891        let type_query_captures = self.type_reference_query.capture_names();
892
893        for match_ in matches {
894            let mut names = HashMap::new();
895            let mut module = None;
896            let mut line = 0;
897
898            for capture in match_.captures {
899                let capture_name = &type_query_captures[capture.index as usize];
900                let node = capture.node;
901                line = node.start_position().row + 1;
902
903                if let Ok(text) = node.utf8_text(content.as_bytes()) {
904                    match capture_name.as_str() {
905                        "type_name" | "param_type" | "return_type" | "field_type"
906                        | "trait_name" | "imported_type" | "interface_name" | "type_alias"
907                        | "jsx_type" | "parent_class" | "type_arg" | "base_type"
908                        | "subscript_type" => {
909                            names.insert(capture_name.to_string(), text.to_string());
910                        }
911                        "module_name" => {
912                            module = Some(text.to_string());
913                        }
914                        "scoped_module" => {
915                            // For scoped modules like "crate::models", use as-is
916                            module = Some(text.to_string());
917                        }
918                        _ => {}
919                    }
920                }
921            }
922
923            // Create type references for each captured type name
924            for (_, type_name) in names {
925                // Skip built-in types and primitives
926                if self.is_builtin_type(&type_name) {
927                    continue;
928                }
929
930                type_refs.push(TypeReference {
931                    name: type_name.clone(),
932                    module: module.clone(),
933                    line,
934                    definition_path: None,
935                    is_external: false,
936                    external_package: None,
937                });
938            }
939        }
940
941        Ok(type_refs)
942    }
943
944    /// Resolve type definitions for type references
945    /// This method attempts to find the file that defines each type
946    pub fn resolve_type_definitions(
947        &self,
948        type_refs: &mut [TypeReference],
949        current_file: &std::path::Path,
950        project_root: &std::path::Path,
951    ) -> Result<(), ContextCreatorError> {
952        use crate::core::semantic::path_validator::validate_import_path;
953
954        for type_ref in type_refs.iter_mut() {
955            // Skip if already resolved or is external
956            if type_ref.definition_path.is_some() || type_ref.is_external {
957                continue;
958            }
959
960            // Try to resolve the type definition
961            if let Some(def_path) = self.find_type_definition(
962                &type_ref.name,
963                type_ref.module.as_deref(),
964                current_file,
965                project_root,
966            )? {
967                // Validate the path for security
968                match validate_import_path(project_root, &def_path) {
969                    Ok(validated_path) => {
970                        type_ref.definition_path = Some(validated_path);
971                    }
972                    Err(_) => {
973                        // Path validation failed, mark as external for safety
974                        type_ref.is_external = true;
975                    }
976                }
977            }
978        }
979
980        Ok(())
981    }
982
983    /// Find the definition file for a given type
984    fn find_type_definition(
985        &self,
986        type_name: &str,
987        module_name: Option<&str>,
988        current_file: &std::path::Path,
989        project_root: &std::path::Path,
990    ) -> Result<Option<std::path::PathBuf>, ContextCreatorError> {
991        use std::fs;
992
993        // Get the directory of the current file
994        let current_dir = current_file.parent().unwrap_or(project_root);
995
996        // Convert type name to lowercase for file matching
997        let type_name_lower = type_name.to_lowercase();
998
999        // Get file extensions based on current file
1000        let extensions = self.get_search_extensions(current_file);
1001
1002        // Build search patterns
1003        let mut patterns = vec![
1004            // Direct file name matches
1005            format!("{type_name_lower}.{}", extensions[0]),
1006            // Types files
1007            format!("types.{}", extensions[0]),
1008            // Module files
1009            format!("mod.{}", extensions[0]),
1010            format!("index.{}", extensions[0]),
1011            // Common type definition patterns
1012            format!("{type_name_lower}_types.{}", extensions[0]),
1013            format!("{type_name_lower}_type.{}", extensions[0]),
1014            format!("{type_name_lower}s.{}", extensions[0]), // plural form
1015        ];
1016
1017        // Add patterns for all supported extensions
1018        for ext in &extensions[1..] {
1019            patterns.push(format!("{type_name_lower}.{ext}"));
1020            patterns.push(format!("types.{ext}"));
1021            patterns.push(format!("index.{ext}"));
1022        }
1023
1024        // If we have a module name, add module-based patterns
1025        if let Some(module) = module_name {
1026            // Handle Rust module paths like "crate::models"
1027            if module.starts_with("crate::") {
1028                let relative_path = module.strip_prefix("crate::").unwrap();
1029                // Convert module path to file path (e.g., "models" or "domain::types")
1030                let module_path = relative_path.replace("::", "/");
1031
1032                for ext in &extensions {
1033                    // Try the type as a file in the module directory
1034                    patterns.insert(0, format!("{module_path}/{type_name_lower}.{ext}"));
1035                    // Try the module file itself (mod.rs)
1036                    patterns.insert(1, format!("{module_path}/mod.{ext}"));
1037                    // Try the module as a file (models.rs)
1038                    patterns.insert(2, format!("{module_path}.{ext}"));
1039                }
1040            } else if module.contains("::") {
1041                // Handle other module paths like "shared::types"
1042                let module_path = module.replace("::", "/");
1043
1044                for ext in &extensions {
1045                    // Try the type as a file in the module directory
1046                    patterns.insert(0, format!("{module_path}/{type_name_lower}.{ext}"));
1047                    // Try the module file itself (mod.rs)
1048                    patterns.insert(1, format!("{module_path}/mod.{ext}"));
1049                    // Try the module as a file
1050                    patterns.insert(2, format!("{module_path}.{ext}"));
1051                }
1052            } else {
1053                // Handle simple module names
1054                let module_lower = module.to_lowercase();
1055                for ext in &extensions {
1056                    patterns.insert(0, format!("{module_lower}.{ext}"));
1057                    patterns.insert(1, format!("{module}.{ext}")); // Also try original case
1058                }
1059            }
1060        }
1061
1062        // Search directories in priority order
1063        let mut search_dirs = vec![
1064            project_root.join("src"), // Start with project root src for crate:: paths
1065            project_root.to_path_buf(),
1066            current_dir.to_path_buf(),
1067        ];
1068
1069        // Add parent directory if it exists
1070        if let Some(parent_dir) = current_dir.parent() {
1071            search_dirs.push(parent_dir.to_path_buf());
1072        }
1073
1074        // Add common project directories
1075        search_dirs.extend(vec![
1076            project_root.join("src/models"),
1077            project_root.join("src/types"),
1078            project_root.join("shared"),
1079            project_root.join("shared/types"),
1080            project_root.join("lib"),
1081            project_root.join("domain"),
1082            current_dir.join("models"),
1083            current_dir.join("types"),
1084        ]);
1085
1086        for search_dir in search_dirs {
1087            if !search_dir.exists() {
1088                continue;
1089            }
1090
1091            for pattern in &patterns {
1092                let candidate = search_dir.join(pattern);
1093                if candidate.exists() {
1094                    // Read the file to verify it contains the type definition
1095                    if let Ok(content) = fs::read_to_string(&candidate) {
1096                        if self.file_contains_definition(&candidate, &content, type_name)? {
1097                            return Ok(Some(candidate));
1098                        }
1099                    }
1100                }
1101            }
1102        }
1103
1104        Ok(None)
1105    }
1106
1107    /// Check if a file contains a definition for a given type name using AST parsing
1108    fn file_contains_definition(
1109        &self,
1110        path: &std::path::Path,
1111        content: &str,
1112        type_name: &str,
1113    ) -> Result<bool, ContextCreatorError> {
1114        // Determine the language from the file extension
1115        let language = match path.extension().and_then(|s| s.to_str()) {
1116            Some("rs") => Some(tree_sitter_rust::language()),
1117            Some("py") => Some(tree_sitter_python::language()),
1118            Some("ts") | Some("tsx") => Some(tree_sitter_typescript::language_typescript()),
1119            Some("js") | Some("jsx") => Some(tree_sitter_javascript::language()),
1120            _ => None,
1121        };
1122
1123        if let Some(language) = language {
1124            let mut parser = tree_sitter::Parser::new();
1125            if parser.set_language(language).is_err() {
1126                return Ok(false);
1127            }
1128
1129            if let Some(tree) = parser.parse(content, None) {
1130                // Language-specific queries for type definitions
1131                let query_text = match path.extension().and_then(|s| s.to_str()) {
1132                    Some("rs") => {
1133                        r#"
1134                        [
1135                          (struct_item name: (type_identifier) @name)
1136                          (enum_item name: (type_identifier) @name)
1137                          (trait_item name: (type_identifier) @name)
1138                          (type_item name: (type_identifier) @name)
1139                          (union_item name: (type_identifier) @name)
1140                        ]
1141                    "#
1142                    }
1143                    Some("py") => {
1144                        r#"
1145                        [
1146                          (class_definition name: (identifier) @name)
1147                          (function_definition name: (identifier) @name)
1148                        ]
1149                    "#
1150                    }
1151                    Some("ts") | Some("tsx") => {
1152                        r#"
1153                        [
1154                          (interface_declaration name: (type_identifier) @name)
1155                          (type_alias_declaration name: (type_identifier) @name)
1156                          (class_declaration name: (type_identifier) @name)
1157                          (enum_declaration name: (identifier) @name)
1158                        ]
1159                    "#
1160                    }
1161                    Some("js") | Some("jsx") => {
1162                        r#"
1163                        [
1164                          (class_declaration name: (identifier) @name)
1165                          (function_declaration name: (identifier) @name)
1166                        ]
1167                    "#
1168                    }
1169                    _ => return Ok(false),
1170                };
1171
1172                if let Ok(query) = tree_sitter::Query::new(language, query_text) {
1173                    let mut cursor = tree_sitter::QueryCursor::new();
1174                    let matches = cursor.matches(&query, tree.root_node(), content.as_bytes());
1175
1176                    // Check each match to see if the captured name matches our target type
1177                    for m in matches {
1178                        for capture in m.captures {
1179                            if let Ok(captured_text) = capture.node.utf8_text(content.as_bytes()) {
1180                                if captured_text == type_name {
1181                                    return Ok(true);
1182                                }
1183                            }
1184                        }
1185                    }
1186                }
1187            }
1188        }
1189
1190        Ok(false)
1191    }
1192
1193    /// Get appropriate file extensions for searching based on current file
1194    fn get_search_extensions(&self, current_file: &std::path::Path) -> Vec<&'static str> {
1195        match current_file.extension().and_then(|s| s.to_str()) {
1196            Some("rs") => vec!["rs"],
1197            Some("py") => vec!["py"],
1198            Some("ts") | Some("tsx") => vec!["ts", "tsx", "js", "jsx"],
1199            Some("js") | Some("jsx") => vec!["js", "jsx", "ts", "tsx"],
1200            _ => vec!["rs", "py", "ts", "js"], // Default fallback
1201        }
1202    }
1203
1204    /// Parse Rust use tree structure
1205    #[allow(dead_code)]
1206    fn parse_rust_use_tree(
1207        &self,
1208        node: tree_sitter::Node,
1209        content: &str,
1210    ) -> (String, Vec<String>, bool) {
1211        // Implementation would recursively parse the use tree structure
1212        // For now, simplified implementation
1213        if let Ok(text) = node.utf8_text(content.as_bytes()) {
1214            let is_relative =
1215                text.contains("self::") || text.contains("super::") || text.contains("crate::");
1216            (text.to_string(), Vec::new(), is_relative)
1217        } else {
1218            (String::new(), Vec::new(), false)
1219        }
1220    }
1221
1222    /// Parse Rust module declaration structure
1223    fn parse_rust_module_declaration(
1224        &self,
1225        node: tree_sitter::Node,
1226        content: &str,
1227    ) -> (String, Vec<String>, bool) {
1228        // Parse module declaration like "mod config;"
1229        if let Ok(text) = node.utf8_text(content.as_bytes()) {
1230            // Look for the module name after "mod"
1231            if let Some(mod_start) = text.find("mod ") {
1232                let after_mod = &text[mod_start + 4..];
1233                if let Some(end_pos) = after_mod.find(';') {
1234                    let module_name = after_mod[..end_pos].trim();
1235                    return (module_name.to_string(), Vec::new(), true);
1236                } else if let Some(end_pos) = after_mod.find(' ') {
1237                    let module_name = after_mod[..end_pos].trim();
1238                    return (module_name.to_string(), Vec::new(), true);
1239                }
1240            }
1241        }
1242        (String::new(), Vec::new(), false)
1243    }
1244
1245    /// Parse Rust use declaration structure
1246    #[allow(dead_code)]
1247    fn parse_rust_use_declaration(
1248        &self,
1249        node: tree_sitter::Node,
1250        content: &str,
1251    ) -> (String, Vec<String>, bool) {
1252        // Parse the entire use declaration
1253        if let Ok(text) = node.utf8_text(content.as_bytes()) {
1254            // Extract module path and imported items from use declaration
1255            // Example: "use model::{Account, DatabaseFactory, Rule};"
1256            let clean_text = text
1257                .trim()
1258                .trim_start_matches("use ")
1259                .trim_end_matches(';')
1260                .trim();
1261
1262            let is_relative = clean_text.contains("self::")
1263                || clean_text.contains("super::")
1264                || clean_text.contains("crate::");
1265
1266            if clean_text.contains('{') && clean_text.contains('}') {
1267                // Handle scoped imports like "model::{Account, DatabaseFactory}"
1268                if let Some(colon_pos) = clean_text.find("::") {
1269                    let module = clean_text[..colon_pos].to_string();
1270
1271                    // Extract items from braces
1272                    if let Some(start) = clean_text.find('{') {
1273                        if let Some(end) = clean_text.find('}') {
1274                            let items_str = &clean_text[start + 1..end];
1275                            let items: Vec<String> = items_str
1276                                .split(',')
1277                                .map(|s| s.trim().to_string())
1278                                .filter(|s| !s.is_empty())
1279                                .collect();
1280                            return (module, items, is_relative);
1281                        }
1282                    }
1283                }
1284            } else {
1285                // Handle simple imports like "use std::collections::HashMap;" or "use my_lib::parsing::parse_line;"
1286                // For Rust, we need to separate the module path from the imported item
1287                let parts: Vec<&str> = clean_text.split("::").collect();
1288                if parts.len() > 1 {
1289                    // Check if the last part is likely a function/type (starts with lowercase for functions, uppercase for types)
1290                    let last_part = parts.last().unwrap();
1291                    if !last_part.is_empty() {
1292                        let first_char = last_part.chars().next().unwrap();
1293                        // If it's a function (lowercase) or type (uppercase), it's the imported item
1294                        if first_char.is_alphabetic()
1295                            && (first_char.is_lowercase() || first_char.is_uppercase())
1296                        {
1297                            // Module is everything except the last part
1298                            let module = parts[..parts.len() - 1].join("::");
1299                            let items = vec![last_part.to_string()];
1300                            return (module, items, is_relative);
1301                        }
1302                    }
1303                }
1304                // Otherwise, it's just a module import
1305                return (clean_text.to_string(), Vec::new(), is_relative);
1306            }
1307
1308            (clean_text.to_string(), Vec::new(), is_relative)
1309        } else {
1310            (String::new(), Vec::new(), false)
1311        }
1312    }
1313
1314    /// Check if an import is secure (doesn't attempt path traversal or system access)
1315    fn is_secure_import(&self, module: &str) -> bool {
1316        // Reject empty modules
1317        if module.is_empty() {
1318            return false;
1319        }
1320
1321        // Check for absolute paths that could be system paths
1322        if module.starts_with('/') {
1323            // Unix absolute paths like /etc/passwd
1324            if module.contains("/etc/") || module.contains("/sys/") || module.contains("/proc/") {
1325                return false;
1326            }
1327        }
1328
1329        // Check for Windows absolute paths
1330        if module.len() >= 2 && module.chars().nth(1) == Some(':') {
1331            // Windows paths like C:\Windows\System32
1332            if module.to_lowercase().contains("windows")
1333                || module.to_lowercase().contains("system32")
1334            {
1335                return false;
1336            }
1337        }
1338
1339        // Check for excessive path traversal
1340        let dot_dot_count = module.matches("..").count();
1341        if dot_dot_count > 3 {
1342            // More than 3 levels of .. is suspicious
1343            return false;
1344        }
1345
1346        // Check for known dangerous patterns
1347        let dangerous_patterns = [
1348            "/etc/passwd",
1349            "/etc/shadow",
1350            "/root/",
1351            "C:\\Windows\\",
1352            "C:\\System32\\",
1353            "../../../../etc/",
1354            "..\\..\\..\\..\\windows\\",
1355            "file:///",
1356            "~/../../../",
1357            "%USERPROFILE%",
1358            "$HOME/../../../",
1359        ];
1360
1361        for pattern in &dangerous_patterns {
1362            if module.contains(pattern) {
1363                return false;
1364            }
1365        }
1366
1367        // Check for suspicious characters that might indicate injection
1368        if module.contains('\0') || module.contains('\x00') {
1369            return false;
1370        }
1371
1372        // Allow the import if it passes all checks
1373        true
1374    }
1375
1376    /// Extract function definitions from query matches
1377    fn extract_function_definitions<'a>(
1378        &self,
1379        matches: tree_sitter::QueryMatches<'a, 'a, &'a [u8]>,
1380        content: &str,
1381    ) -> Result<Vec<FunctionDefinition>, ContextCreatorError> {
1382        let mut definitions = Vec::new();
1383        let def_query_captures = self.function_definition_query.capture_names();
1384
1385        for match_ in matches {
1386            let mut name = String::new();
1387            let mut is_exported = false;
1388            let mut line = 0;
1389
1390            for capture in match_.captures {
1391                let capture_name = &def_query_captures[capture.index as usize];
1392                let node = capture.node;
1393                line = node.start_position().row + 1;
1394
1395                match capture_name.as_str() {
1396                    "fn_name"
1397                    | "method_name"
1398                    | "assoc_fn_name"
1399                    | "arrow_fn_name"
1400                    | "fn_expr_name"
1401                    | "async_fn_name"
1402                    | "export_fn_name"
1403                    | "trait_fn_name"
1404                    | "commonjs_export_name" => {
1405                        if let Ok(fn_name) = node.utf8_text(content.as_bytes()) {
1406                            name = fn_name.to_string();
1407                        }
1408                    }
1409                    "visibility" | "method_visibility" => {
1410                        if let Ok(vis) = node.utf8_text(content.as_bytes()) {
1411                            // In Rust, pub means exported
1412                            is_exported = vis.contains("pub");
1413                        }
1414                    }
1415                    "export_function" | "commonjs_export" => {
1416                        // JavaScript/TypeScript export
1417                        is_exported = true;
1418                    }
1419                    "function"
1420                    | "method"
1421                    | "assoc_function"
1422                    | "arrow_function"
1423                    | "function_expression"
1424                    | "async_function" => {
1425                        // For languages without explicit visibility, check context
1426                        if self.language_name == "python" {
1427                            // In Python, functions not starting with _ are considered public
1428                            is_exported = !name.starts_with('_');
1429                        } else if self.language_name == "javascript"
1430                            || self.language_name == "typescript"
1431                        {
1432                            // In JS/TS, all module-level functions are potentially callable
1433                            // unless explicitly marked private or are nested
1434                            is_exported = true;
1435                        }
1436                    }
1437                    _ => {}
1438                }
1439            }
1440
1441            if !name.is_empty() {
1442                // Special handling for Python methods
1443                if self.language_name == "python" && !name.starts_with('_') {
1444                    is_exported = true;
1445                }
1446
1447                // Special handling for JavaScript/TypeScript without explicit export
1448                if (self.language_name == "javascript" || self.language_name == "typescript")
1449                    && !is_exported
1450                {
1451                    // Default to exported for top-level functions
1452                    is_exported = true;
1453                }
1454
1455                definitions.push(FunctionDefinition {
1456                    name,
1457                    is_exported,
1458                    line,
1459                });
1460            }
1461        }
1462
1463        Ok(definitions)
1464    }
1465
1466    /// Check if a type name is a built-in type
1467    fn is_builtin_type(&self, type_name: &str) -> bool {
1468        matches!(
1469            type_name,
1470            "i8" | "i16"
1471                | "i32"
1472                | "i64"
1473                | "i128"
1474                | "u8"
1475                | "u16"
1476                | "u32"
1477                | "u64"
1478                | "u128"
1479                | "f32"
1480                | "f64"
1481                | "bool"
1482                | "char"
1483                | "str"
1484                | "String"
1485                | "Vec"
1486                | "Option"
1487                | "Result"
1488                | "Box"
1489                | "Rc"
1490                | "Arc"
1491                | "HashMap"
1492                | "HashSet"
1493                | "number"
1494                | "string"
1495                | "boolean"
1496                | "object"
1497                | "int"
1498                | "float"
1499                | "list"
1500                | "dict"
1501                | "tuple"
1502                | "set"
1503        )
1504    }
1505}
1506
1507#[cfg(test)]
1508mod tests {
1509    use super::*;
1510
1511    #[test]
1512    fn test_rust_query_creation() {
1513        let engine = QueryEngine::new(tree_sitter_rust::language(), "rust");
1514        assert!(engine.is_ok());
1515    }
1516
1517    #[test]
1518    fn test_python_query_creation() {
1519        let engine = QueryEngine::new(tree_sitter_python::language(), "python");
1520        if let Err(e) = &engine {
1521            println!("Python QueryEngine error: {e}");
1522        }
1523        assert!(engine.is_ok());
1524    }
1525
1526    #[test]
1527    fn test_javascript_query_creation() {
1528        let engine = QueryEngine::new(tree_sitter_javascript::language(), "javascript");
1529        if let Err(e) = &engine {
1530            println!("JavaScript QueryEngine error: {e}");
1531        }
1532        assert!(engine.is_ok());
1533    }
1534
1535    #[test]
1536    fn test_typescript_query_creation() {
1537        let engine = QueryEngine::new(tree_sitter_typescript::language_typescript(), "typescript");
1538        if let Err(e) = &engine {
1539            println!("TypeScript QueryEngine error: {e}");
1540        }
1541        assert!(engine.is_ok());
1542    }
1543
1544    #[test]
1545    fn test_builtin_type_detection() {
1546        let engine = QueryEngine::new(tree_sitter_rust::language(), "rust").unwrap();
1547
1548        assert!(engine.is_builtin_type("String"));
1549        assert!(engine.is_builtin_type("Vec"));
1550        assert!(engine.is_builtin_type("i32"));
1551        assert!(!engine.is_builtin_type("MyCustomType"));
1552    }
1553}