scribe_selection/
ast_parser.rs

1//! Tree-sitter based AST parsing for accurate code analysis
2//! 
3//! This module replaces regex-based parsing with proper syntax-aware analysis
4//! using tree-sitter parsers for multiple programming languages.
5
6use std::collections::HashMap;
7use serde::{Deserialize, Serialize};
8use tree_sitter::{Parser, Language, Node, Tree, Query, QueryCursor};
9use scribe_core::{Result, ScribeError};
10
11/// Supported programming languages for AST parsing
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
13pub enum AstLanguage {
14    Python,
15    JavaScript,
16    TypeScript,
17    Go,
18    Rust,
19}
20
21impl AstLanguage {
22    /// Get the tree-sitter language for this language
23    pub fn tree_sitter_language(&self) -> Language {
24        match self {
25            AstLanguage::Python => tree_sitter_python::language(),
26            AstLanguage::JavaScript => tree_sitter_javascript::language(),
27            AstLanguage::TypeScript => tree_sitter_typescript::language_typescript(),
28            AstLanguage::Go => tree_sitter_go::language(),
29            AstLanguage::Rust => tree_sitter_rust::language(),
30        }
31    }
32    
33    /// Detect language from file extension
34    pub fn from_extension(ext: &str) -> Option<Self> {
35        match ext.to_lowercase().as_str() {
36            "py" | "pyi" | "pyw" => Some(AstLanguage::Python),
37            "js" | "mjs" | "cjs" => Some(AstLanguage::JavaScript),
38            "ts" | "mts" | "cts" => Some(AstLanguage::TypeScript),
39            "go" => Some(AstLanguage::Go),
40            "rs" => Some(AstLanguage::Rust),
41            _ => None,
42        }
43    }
44}
45
46/// Import information extracted from AST
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct AstImport {
49    /// The module being imported
50    pub module: String,
51    /// Optional alias for the import
52    pub alias: Option<String>,
53    /// Specific items being imported (for from-imports)
54    pub items: Vec<String>,
55    /// Line number where the import appears
56    pub line_number: usize,
57    /// Whether this is a relative import
58    pub is_relative: bool,
59}
60
61/// A parsed code chunk with semantic information
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct AstChunk {
64    /// The text content of this chunk
65    pub content: String,
66    /// Type of the chunk (function, class, import, etc.)
67    pub chunk_type: String,
68    /// Start line (1-indexed)
69    pub start_line: usize,
70    /// End line (1-indexed)  
71    pub end_line: usize,
72    /// Start byte offset
73    pub start_byte: usize,
74    /// End byte offset
75    pub end_byte: usize,
76    /// Semantic importance score (0.0-1.0)
77    pub importance_score: f64,
78    /// Estimated token count
79    pub estimated_tokens: usize,
80    /// Dependencies (other chunks this depends on)
81    pub dependencies: Vec<String>,
82    /// Name/identifier of this chunk (if applicable)
83    pub name: Option<String>,
84    /// Whether this is publicly visible
85    pub is_public: bool,
86    /// Whether this has documentation
87    pub has_documentation: bool,
88}
89
90/// Extracted signature information
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct AstSignature {
93    /// The signature text
94    pub signature: String,
95    /// Type of signature (function, class, interface, etc.)
96    pub signature_type: String,
97    /// Name/identifier
98    pub name: String,
99    /// Parameters (for functions/methods)
100    pub parameters: Vec<String>,
101    /// Return type (if available)
102    pub return_type: Option<String>,
103    /// Whether this is public/exported
104    pub is_public: bool,
105    /// Line number
106    pub line: usize,
107}
108
109/// Tree-sitter based AST parser and analyzer
110pub struct AstParser {
111    parsers: HashMap<AstLanguage, Parser>,
112}
113
114impl AstParser {
115    /// Create a new AST parser with support for all languages
116    pub fn new() -> Result<Self> {
117        let mut parsers = HashMap::new();
118        
119        for language in [
120            AstLanguage::Python,
121            AstLanguage::JavaScript, 
122            AstLanguage::TypeScript,
123            AstLanguage::Go,
124            AstLanguage::Rust,
125        ] {
126            let mut parser = Parser::new();
127            parser.set_language(language.tree_sitter_language())
128                .map_err(|e| ScribeError::parse(format!("Failed to set tree-sitter language: {}", e)))?;
129            parsers.insert(language, parser);
130        }
131        
132        Ok(Self { parsers })
133    }
134    
135    /// Parse code into chunks using tree-sitter AST
136    pub fn parse_chunks(&mut self, content: &str, file_path: &str) -> Result<Vec<AstChunk>> {
137        let language = self.detect_language(file_path)?;
138        let parser = self.parsers.get_mut(&language)
139            .ok_or_else(|| ScribeError::parse(format!("No parser for language: {:?}", language)))?;
140        
141        let tree = parser.parse(content, None)
142            .ok_or_else(|| ScribeError::parse("Failed to parse source code".to_string()))?;
143        
144        let chunks = match language {
145            AstLanguage::Python => self.parse_python_chunks(content, &tree)?,
146            AstLanguage::JavaScript => self.parse_javascript_chunks(content, &tree)?,
147            AstLanguage::TypeScript => self.parse_typescript_chunks(content, &tree)?,
148            AstLanguage::Go => self.parse_go_chunks(content, &tree)?,
149            AstLanguage::Rust => self.parse_rust_chunks(content, &tree)?,
150        };
151        
152        Ok(chunks)
153    }
154    
155    /// Extract signatures using tree-sitter AST
156    /// Extract imports from the given content using tree-sitter
157    pub fn extract_imports(&self, content: &str, language: AstLanguage) -> Result<Vec<AstImport>> {
158        // Create a fresh parser for this operation to avoid mutable borrow issues
159        let mut parser = Parser::new();
160        parser.set_language(language.tree_sitter_language()).map_err(|e| 
161            ScribeError::parse(format!("Failed to set language: {}", e)))?;
162        
163        let tree = parser.parse(content, None)
164            .ok_or_else(|| ScribeError::parse("Failed to parse content"))?;
165        
166        let mut imports = Vec::new();
167        let root_node = tree.root_node();
168        
169        // Extract imports based on language
170        match language {
171            AstLanguage::Python => {
172                self.extract_python_imports(&root_node, content, &mut imports)?;
173            }
174            AstLanguage::JavaScript | AstLanguage::TypeScript => {
175                self.extract_js_ts_imports(&root_node, content, &mut imports)?;
176            }
177            AstLanguage::Go => {
178                self.extract_go_imports(&root_node, content, &mut imports)?;
179            }
180            AstLanguage::Rust => {
181                self.extract_rust_imports(&root_node, content, &mut imports)?;
182            }
183        }
184        
185        Ok(imports)
186    }
187
188    pub fn extract_signatures(&mut self, content: &str, file_path: &str) -> Result<Vec<AstSignature>> {
189        let language = self.detect_language(file_path)?;
190        let parser = self.parsers.get_mut(&language)
191            .ok_or_else(|| ScribeError::parse(format!("No parser for language: {:?}", language)))?;
192        
193        let tree = parser.parse(content, None)
194            .ok_or_else(|| ScribeError::parse("Failed to parse source code".to_string()))?;
195        
196        let signatures = match language {
197            AstLanguage::Python => self.extract_python_signatures(content, &tree)?,
198            AstLanguage::JavaScript => self.extract_javascript_signatures(content, &tree)?,
199            AstLanguage::TypeScript => self.extract_typescript_signatures(content, &tree)?,
200            AstLanguage::Go => self.extract_go_signatures(content, &tree)?,
201            AstLanguage::Rust => self.extract_rust_signatures(content, &tree)?,
202        };
203        
204        Ok(signatures)
205    }
206    
207    /// Detect language from file path
208    fn detect_language(&self, file_path: &str) -> Result<AstLanguage> {
209        let extension = std::path::Path::new(file_path)
210            .extension()
211            .and_then(|ext| ext.to_str())
212            .unwrap_or("");
213        
214        AstLanguage::from_extension(extension)
215            .ok_or_else(|| ScribeError::parse(format!("Unsupported file extension: {}", extension)))
216    }
217    
218    /// Parse Python code chunks using tree-sitter
219    fn parse_python_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
220        let mut chunks = Vec::new();
221        let root_node = tree.root_node();
222        
223        // Query for Python constructs
224        let query_str = r#"
225            (import_statement) @import
226            (import_from_statement) @import_from
227            (function_definition) @function
228            (class_definition) @class
229            (assignment 
230                left: (identifier) @const_name
231                right: (_) @const_value
232                (#match? @const_name "^[A-Z_][A-Z0-9_]*$")
233            ) @constant
234        "#;
235        
236        let query = Query::new(AstLanguage::Python.tree_sitter_language(), query_str)
237            .map_err(|e| ScribeError::parse(format!("Invalid Python query: {}", e)))?;
238        
239        let mut cursor = QueryCursor::new();
240        let captures = cursor.matches(&query, root_node, content.as_bytes());
241        
242        for match_ in captures {
243            for capture in match_.captures {
244                let node = capture.node;
245                let chunk_type = &query.capture_names()[capture.index as usize];
246                
247                let chunk = self.create_chunk_from_node(content, node, chunk_type, &AstLanguage::Python)?;
248                chunks.push(chunk);
249            }
250        }
251        
252        // Sort by start position
253        chunks.sort_by_key(|c| c.start_byte);
254        Ok(chunks)
255    }
256    
257    /// Parse JavaScript code chunks using tree-sitter
258    fn parse_javascript_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
259        let mut chunks = Vec::new();
260        let root_node = tree.root_node();
261        
262        let query_str = r#"
263            (import_statement) @import
264            (export_statement) @export
265            (function_declaration) @function
266            (arrow_function) @arrow_function
267            (class_declaration) @class
268            (interface_declaration) @interface
269            (type_alias_declaration) @type_alias
270            (variable_declaration
271                declarations: (variable_declarator
272                    name: (identifier) @const_name
273                    value: (_) @const_value
274                ) @const_declarator
275                (#match? @const_name "^[A-Z_][A-Z0-9_]*$")
276            ) @constant
277        "#;
278        
279        let query = Query::new(AstLanguage::JavaScript.tree_sitter_language(), query_str)
280            .map_err(|e| ScribeError::parse(format!("Invalid JavaScript query: {}", e)))?;
281        
282        let mut cursor = QueryCursor::new();
283        let captures = cursor.matches(&query, root_node, content.as_bytes());
284        
285        for match_ in captures {
286            for capture in match_.captures {
287                let node = capture.node;
288                let chunk_type = &query.capture_names()[capture.index as usize];
289                
290                let chunk = self.create_chunk_from_node(content, node, chunk_type, &AstLanguage::JavaScript)?;
291                chunks.push(chunk);
292            }
293        }
294        
295        chunks.sort_by_key(|c| c.start_byte);
296        Ok(chunks)
297    }
298    
299    /// Parse TypeScript code chunks using tree-sitter
300    fn parse_typescript_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
301        let mut chunks = Vec::new();
302        let root_node = tree.root_node();
303        
304        let query_str = r#"
305            (import_statement) @import
306            (export_statement) @export
307            (function_declaration) @function
308            (arrow_function) @arrow_function
309            (class_declaration) @class
310            (interface_declaration) @interface
311            (type_alias_declaration) @type_alias
312            (enum_declaration) @enum
313            (module_declaration) @module
314            (variable_declaration
315                declarations: (variable_declarator
316                    name: (identifier) @const_name
317                    value: (_) @const_value
318                ) @const_declarator
319                (#match? @const_name "^[A-Z_][A-Z0-9_]*$")
320            ) @constant
321        "#;
322        
323        let query = Query::new(AstLanguage::TypeScript.tree_sitter_language(), query_str)
324            .map_err(|e| ScribeError::parse(format!("Invalid TypeScript query: {}", e)))?;
325        
326        let mut cursor = QueryCursor::new();
327        let captures = cursor.matches(&query, root_node, content.as_bytes());
328        
329        for match_ in captures {
330            for capture in match_.captures {
331                let node = capture.node;
332                let chunk_type = &query.capture_names()[capture.index as usize];
333                
334                let chunk = self.create_chunk_from_node(content, node, chunk_type, &AstLanguage::TypeScript)?;
335                chunks.push(chunk);
336            }
337        }
338        
339        chunks.sort_by_key(|c| c.start_byte);
340        Ok(chunks)
341    }
342    
343    /// Parse Go code chunks using tree-sitter
344    fn parse_go_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
345        let mut chunks = Vec::new();
346        let root_node = tree.root_node();
347        
348        let query_str = r#"
349            (package_clause) @package
350            (import_declaration) @import
351            (function_declaration) @function
352            (method_declaration) @method
353            (type_declaration) @type
354            (const_declaration) @const
355            (var_declaration) @var
356        "#;
357        
358        let query = Query::new(AstLanguage::Go.tree_sitter_language(), query_str)
359            .map_err(|e| ScribeError::parse(format!("Invalid Go query: {}", e)))?;
360        
361        let mut cursor = QueryCursor::new();
362        let captures = cursor.matches(&query, root_node, content.as_bytes());
363        
364        for match_ in captures {
365            for capture in match_.captures {
366                let node = capture.node;
367                let chunk_type = &query.capture_names()[capture.index as usize];
368                
369                let chunk = self.create_chunk_from_node(content, node, chunk_type, &AstLanguage::Go)?;
370                chunks.push(chunk);
371            }
372        }
373        
374        chunks.sort_by_key(|c| c.start_byte);
375        Ok(chunks)
376    }
377    
378    /// Parse Rust code chunks using tree-sitter
379    fn parse_rust_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
380        let mut chunks = Vec::new();
381        let root_node = tree.root_node();
382        
383        let query_str = r#"
384            (use_declaration) @use
385            (mod_item) @mod
386            (struct_item) @struct
387            (enum_item) @enum
388            (trait_item) @trait
389            (impl_item) @impl
390            (function_item) @function
391            (const_item) @const
392            (static_item) @static
393            (type_item) @type_alias
394        "#;
395        
396        let query = Query::new(AstLanguage::Rust.tree_sitter_language(), query_str)
397            .map_err(|e| ScribeError::parse(format!("Invalid Rust query: {}", e)))?;
398        
399        let mut cursor = QueryCursor::new();
400        let captures = cursor.matches(&query, root_node, content.as_bytes());
401        
402        for match_ in captures {
403            for capture in match_.captures {
404                let node = capture.node;
405                let chunk_type = &query.capture_names()[capture.index as usize];
406                
407                let chunk = self.create_chunk_from_node(content, node, chunk_type, &AstLanguage::Rust)?;
408                chunks.push(chunk);
409            }
410        }
411        
412        chunks.sort_by_key(|c| c.start_byte);
413        Ok(chunks)
414    }
415    
416    /// Create a chunk from a tree-sitter node
417    fn create_chunk_from_node(
418        &self,
419        content: &str,
420        node: Node,
421        chunk_type: &str,
422        language: &AstLanguage,
423    ) -> Result<AstChunk> {
424        let start_byte = node.start_byte();
425        let end_byte = node.end_byte();
426        let start_position = node.start_position();
427        let end_position = node.end_position();
428        
429        let chunk_content = &content[start_byte..end_byte];
430        let estimated_tokens = chunk_content.split_whitespace().count();
431        
432        // Calculate importance score based on chunk type and language
433        let importance_score = self.calculate_importance_score(chunk_type, language, node, content);
434        
435        // Extract name if available
436        let name = self.extract_name_from_node(node, content);
437        
438        // Check if public/exported
439        let is_public = self.is_node_public(node, content);
440        
441        // Check for documentation
442        let has_documentation = self.has_documentation(node, content);
443        
444        // Extract dependencies (simplified for now)
445        let dependencies = self.extract_dependencies(node, content);
446        
447        Ok(AstChunk {
448            content: chunk_content.to_string(),
449            chunk_type: chunk_type.to_string(),
450            start_line: start_position.row + 1,
451            end_line: end_position.row + 1,
452            start_byte,
453            end_byte,
454            importance_score,
455            estimated_tokens,
456            dependencies,
457            name,
458            is_public,
459            has_documentation,
460        })
461    }
462    
463    /// Calculate importance score based on AST analysis
464    fn calculate_importance_score(&self, chunk_type: &str, language: &AstLanguage, node: Node, content: &str) -> f64 {
465        let mut score: f64 = match chunk_type {
466            "import" | "import_from" | "use" => 0.9, // Imports are crucial
467            "package" => 0.95, // Package declarations are essential
468            "class" | "struct_item" | "trait_item" => 0.85, // Type definitions
469            "interface" | "type_alias" | "enum" => 0.8, // Type definitions
470            "function" | "method" => 0.75, // Functions
471            "const" | "constant" | "static" => 0.6, // Constants
472            "export" => 0.7, // Exports
473            "mod" | "module" => 0.65, // Modules
474            _ => 0.5, // Default
475        };
476        
477        // Boost score for public/exported items
478        if self.is_node_public(node, content) {
479            score += 0.1;
480        }
481        
482        // Boost score for documented items
483        if self.has_documentation(node, content) {
484            score += 0.05;
485        }
486        
487        // Language-specific adjustments
488        match language {
489            AstLanguage::Rust => {
490                // Rust impl blocks are very important
491                if chunk_type == "impl" {
492                    score = 0.85;
493                }
494            }
495            AstLanguage::TypeScript => {
496                // TypeScript interfaces are crucial
497                if chunk_type == "interface" {
498                    score = 0.9;
499                }
500            }
501            _ => {}
502        }
503        
504        score.min(1.0)
505    }
506    
507    /// Extract name/identifier from a node
508    fn extract_name_from_node(&self, node: Node, content: &str) -> Option<String> {
509        // Look for name field in node
510        for i in 0..node.child_count() {
511            if let Some(child) = node.child(i) {
512                if child.kind() == "identifier" || child.kind() == "type_identifier" {
513                    let name_bytes = &content.as_bytes()[child.start_byte()..child.end_byte()];
514                    if let Ok(name) = std::str::from_utf8(name_bytes) {
515                        return Some(name.to_string());
516                    }
517                }
518            }
519        }
520        None
521    }
522    
523    /// Check if a node represents a public/exported item
524    fn is_node_public(&self, node: Node, content: &str) -> bool {
525        // Check for pub keyword in Rust
526        if let Some(parent) = node.parent() {
527            for i in 0..parent.child_count() {
528                if let Some(child) = parent.child(i) {
529                    if child.kind() == "visibility_modifier" {
530                        let vis_bytes = &content.as_bytes()[child.start_byte()..child.end_byte()];
531                        if let Ok(vis) = std::str::from_utf8(vis_bytes) {
532                            return vis.contains("pub");
533                        }
534                    }
535                }
536            }
537        }
538        
539        // Check for export in JS/TS
540        let node_text = &content[node.start_byte()..node.end_byte()];
541        node_text.starts_with("export") || node_text.contains("export")
542    }
543    
544    /// Check if a node has associated documentation
545    fn has_documentation(&self, node: Node, content: &str) -> bool {
546        // Look for comments before the node
547        if let Some(prev_sibling) = node.prev_sibling() {
548            if prev_sibling.kind() == "comment" {
549                return true;
550            }
551        }
552        
553        // Look for docstrings in Python
554        if node.kind() == "function_definition" || node.kind() == "class_definition" {
555            for i in 0..node.child_count() {
556                if let Some(child) = node.child(i) {
557                    if child.kind() == "expression_statement" {
558                        if let Some(grandchild) = child.child(0) {
559                            if grandchild.kind() == "string" {
560                                let string_content = &content[grandchild.start_byte()..grandchild.end_byte()];
561                                if string_content.starts_with("\"\"\"") || string_content.starts_with("'''") {
562                                    return true;
563                                }
564                            }
565                        }
566                    }
567                }
568            }
569        }
570        
571        false
572    }
573    
574    /// Extract dependencies from a node (simplified implementation)
575    fn extract_dependencies(&self, node: Node, content: &str) -> Vec<String> {
576        let mut dependencies = Vec::new();
577        
578        // For import nodes, extract the imported modules
579        if node.kind() == "import_statement" || node.kind() == "import_from_statement" || node.kind() == "use_declaration" {
580            // This is a simplified implementation
581            // In a full implementation, we'd parse the specific import syntax
582            let import_text = &content[node.start_byte()..node.end_byte()];
583            
584            // Extract quoted strings as module names
585            let mut in_quote = false;
586            let mut quote_char = '"';
587            let mut current_module = String::new();
588            
589            for ch in import_text.chars() {
590                if ch == '"' || ch == '\'' {
591                    if !in_quote {
592                        in_quote = true;
593                        quote_char = ch;
594                    } else if ch == quote_char {
595                        in_quote = false;
596                        if !current_module.is_empty() {
597                            dependencies.push(current_module.clone());
598                            current_module.clear();
599                        }
600                    }
601                } else if in_quote {
602                    current_module.push(ch);
603                }
604            }
605        }
606        
607        dependencies
608    }
609    
610    /// Extract signatures for Python
611    fn extract_python_signatures(&self, content: &str, tree: &Tree) -> Result<Vec<AstSignature>> {
612        let mut signatures = Vec::new();
613        let root_node = tree.root_node();
614        
615        let query_str = r#"
616            (function_definition 
617                name: (identifier) @func_name
618                parameters: (parameters) @func_params
619            ) @function
620            (class_definition 
621                name: (identifier) @class_name
622            ) @class
623            (import_statement) @import
624            (import_from_statement) @import_from
625        "#;
626        
627        let query = Query::new(AstLanguage::Python.tree_sitter_language(), query_str)
628            .map_err(|e| ScribeError::parse(format!("Invalid Python signature query: {}", e)))?;
629        
630        let mut cursor = QueryCursor::new();
631        let captures = cursor.matches(&query, root_node, content.as_bytes());
632        
633        for match_ in captures {
634            let signature = self.extract_signature_from_match(content, &match_, &query)?;
635            signatures.push(signature);
636        }
637        
638        Ok(signatures)
639    }
640    
641    /// Extract signatures for other languages (similar pattern)
642    fn extract_javascript_signatures(&self, content: &str, tree: &Tree) -> Result<Vec<AstSignature>> {
643        // Similar implementation for JavaScript
644        Ok(Vec::new()) // Simplified for now
645    }
646    
647    fn extract_typescript_signatures(&self, content: &str, tree: &Tree) -> Result<Vec<AstSignature>> {
648        // Similar implementation for TypeScript
649        Ok(Vec::new()) // Simplified for now
650    }
651    
652    fn extract_go_signatures(&self, content: &str, tree: &Tree) -> Result<Vec<AstSignature>> {
653        // Similar implementation for Go
654        Ok(Vec::new()) // Simplified for now
655    }
656    
657    fn extract_rust_signatures(&self, content: &str, tree: &Tree) -> Result<Vec<AstSignature>> {
658        // Similar implementation for Rust
659        Ok(Vec::new()) // Simplified for now
660    }
661    
662    /// Extract signature from a query match
663    fn extract_signature_from_match(
664        &self,
665        content: &str,
666        match_: &tree_sitter::QueryMatch,
667        query: &Query,
668    ) -> Result<AstSignature> {
669        let mut signature_text = String::new();
670        let mut signature_type = String::new();
671        let mut name = String::new();
672        let mut line = 0;
673        
674        for capture in match_.captures {
675            let capture_name = &query.capture_names()[capture.index as usize];
676            let node = capture.node;
677            let node_text = &content[node.start_byte()..node.end_byte()];
678            
679            match capture_name.as_str() {
680                "function" | "class" | "import" | "import_from" => {
681                    signature_text = node_text.lines().next().unwrap_or("").to_string();
682                    signature_type = capture_name.to_string();
683                    line = node.start_position().row + 1;
684                }
685                "func_name" | "class_name" => {
686                    name = node_text.to_string();
687                }
688                _ => {}
689            }
690        }
691        
692        Ok(AstSignature {
693            signature: signature_text,
694            signature_type,
695            name,
696            parameters: Vec::new(), // Simplified
697            return_type: None,      // Simplified
698            is_public: false,       // Simplified
699            line,
700        })
701    }
702
703    /// Extract Python imports from AST
704    fn extract_python_imports(&self, node: &Node, content: &str, imports: &mut Vec<AstImport>) -> Result<()> {
705        let mut cursor = node.walk();
706        
707        
708        // Look for import_statement and import_from_statement nodes
709        if node.kind() == "import_statement" {
710            // Handle import statements like "import os" or "import sys as system"
711            for i in 0..node.child_count() {
712                if let Some(child) = node.child(i) {
713                    if child.kind() == "aliased_import" {
714                        // Handle "import sys as system"
715                        if let Some(name_node) = child.child_by_field_name("name") {
716                            let module = self.node_text(name_node, content);
717                            let alias = child.child_by_field_name("alias")
718                                .map(|alias_node| self.node_text(alias_node, content));
719                            let line_number = name_node.start_position().row + 1;
720                            
721                            imports.push(AstImport {
722                                module,
723                                alias,
724                                items: vec![],
725                                line_number,
726                                is_relative: false,
727                            });
728                        }
729                    } else if child.kind() == "dotted_as_name" {
730                        // Handle dotted imports with alias like "import package.module as mod"
731                        if let Some(name_node) = child.child_by_field_name("name") {
732                            let module = self.node_text(name_node, content);
733                            let alias = child.child_by_field_name("alias")
734                                .map(|alias_node| self.node_text(alias_node, content));
735                            let line_number = name_node.start_position().row + 1;
736                            
737                            imports.push(AstImport {
738                                module,
739                                alias,
740                                items: vec![],
741                                line_number,
742                                is_relative: false,
743                            });
744                        }
745                    } else if child.kind() == "dotted_name" || child.kind() == "identifier" {
746                        // Handle simple "import os"
747                        let module = self.node_text(child, content);
748                        let line_number = child.start_position().row + 1;
749                        
750                        imports.push(AstImport {
751                            module,
752                            alias: None,
753                            items: vec![],
754                            line_number,
755                            is_relative: false,
756                        });
757                    }
758                }
759            }
760        } else if node.kind() == "import_from_statement" {
761            let mut module = String::new();
762            let mut items = Vec::new();
763            let mut is_relative = false;
764            
765            if let Some(module_node) = node.child_by_field_name("module_name") {
766                module = self.node_text(module_node, content);
767                is_relative = module.starts_with('.');
768            }
769            
770            // Get imported items
771            for i in 0..node.child_count() {
772                if let Some(child) = node.child(i) {
773                    if child.kind() == "import_list" {
774                        for j in 0..child.child_count() {
775                            if let Some(item) = child.child(j) {
776                                if item.kind() == "dotted_name" || item.kind() == "identifier" {
777                                    items.push(self.node_text(item, content));
778                                }
779                            }
780                        }
781                    }
782                }
783            }
784            
785            let line_number = node.start_position().row + 1;
786            imports.push(AstImport {
787                module,
788                alias: None,
789                items,
790                line_number,
791                is_relative,
792            });
793        }
794        
795        // Recursively process children
796        for i in 0..node.child_count() {
797            if let Some(child) = node.child(i) {
798                self.extract_python_imports(&child, content, imports)?;
799            }
800        }
801        
802        Ok(())
803    }
804
805    /// Extract JavaScript/TypeScript imports from AST
806    fn extract_js_ts_imports(&self, node: &Node, content: &str, imports: &mut Vec<AstImport>) -> Result<()> {
807        if node.kind() == "import_statement" {
808            let mut module = String::new();
809            let mut items = Vec::new();
810            
811            // Find the source
812            for i in 0..node.child_count() {
813                if let Some(child) = node.child(i) {
814                    if child.kind() == "string" {
815                        module = self.node_text(child, content);
816                        // Remove quotes
817                        module = module.trim_matches('"').trim_matches('\'').to_string();
818                        break;
819                    }
820                }
821            }
822            
823            let line_number = node.start_position().row + 1;
824            imports.push(AstImport {
825                module,
826                alias: None,
827                items,
828                line_number,
829                is_relative: false,
830            });
831        }
832        
833        // Recursively process children
834        for i in 0..node.child_count() {
835            if let Some(child) = node.child(i) {
836                self.extract_js_ts_imports(&child, content, imports)?;
837            }
838        }
839        
840        Ok(())
841    }
842
843    /// Extract Go imports from AST
844    fn extract_go_imports(&self, node: &Node, content: &str, imports: &mut Vec<AstImport>) -> Result<()> {
845        if node.kind() == "import_spec" {
846            for i in 0..node.child_count() {
847                if let Some(child) = node.child(i) {
848                    if child.kind() == "interpreted_string_literal" {
849                        let module = self.node_text(child, content);
850                        let module = module.trim_matches('"').to_string();
851                        let line_number = child.start_position().row + 1;
852                        
853                        imports.push(AstImport {
854                            module,
855                            alias: None,
856                            items: vec![],
857                            line_number,
858                            is_relative: false,
859                        });
860                    }
861                }
862            }
863        }
864        
865        // Recursively process children
866        for i in 0..node.child_count() {
867            if let Some(child) = node.child(i) {
868                self.extract_go_imports(&child, content, imports)?;
869            }
870        }
871        
872        Ok(())
873    }
874
875    /// Extract Rust imports from AST
876    fn extract_rust_imports(&self, node: &Node, content: &str, imports: &mut Vec<AstImport>) -> Result<()> {
877        if node.kind() == "use_declaration" {
878            if let Some(use_tree) = node.child_by_field_name("argument") {
879                let module = self.node_text(use_tree, content);
880                let line_number = node.start_position().row + 1;
881                
882                imports.push(AstImport {
883                    module,
884                    alias: None,
885                    items: vec![],
886                    line_number,
887                    is_relative: false,
888                });
889            }
890        }
891        
892        // Recursively process children
893        for i in 0..node.child_count() {
894            if let Some(child) = node.child(i) {
895                self.extract_rust_imports(&child, content, imports)?;
896            }
897        }
898        
899        Ok(())
900    }
901
902    /// Helper to extract text from a node
903    fn node_text(&self, node: Node, content: &str) -> String {
904        content[node.start_byte()..node.end_byte()].to_string()
905    }
906}
907
908impl Default for AstParser {
909    fn default() -> Self {
910        Self::new().expect("Failed to create AstParser")
911    }
912}
913
914#[cfg(test)]
915mod tests {
916    use super::*;
917
918    #[test]
919    fn test_ast_parser_creation() {
920        let parser = AstParser::new();
921        assert!(parser.is_ok());
922    }
923
924    #[test]
925    fn test_language_detection() {
926        assert_eq!(AstLanguage::from_extension("py"), Some(AstLanguage::Python));
927        assert_eq!(AstLanguage::from_extension("js"), Some(AstLanguage::JavaScript));
928        assert_eq!(AstLanguage::from_extension("ts"), Some(AstLanguage::TypeScript));
929        assert_eq!(AstLanguage::from_extension("go"), Some(AstLanguage::Go));
930        assert_eq!(AstLanguage::from_extension("rs"), Some(AstLanguage::Rust));
931        assert_eq!(AstLanguage::from_extension("unknown"), None);
932    }
933
934    #[test]
935    fn test_python_parsing() {
936        let mut parser = AstParser::new().unwrap();
937        let content = r#"
938import os
939import sys
940
941def hello_world():
942    """A simple function."""
943    print("Hello, world!")
944
945class Calculator:
946    """A simple calculator."""
947    
948    def add(self, a, b):
949        return a + b
950"#;
951        
952        let chunks = parser.parse_chunks(content, "test.py").unwrap();
953        assert!(!chunks.is_empty());
954        
955        // Should find imports, function, and class
956        let chunk_types: Vec<&str> = chunks.iter().map(|c| c.chunk_type.as_str()).collect();
957        assert!(chunk_types.contains(&"import"));
958        assert!(chunk_types.contains(&"function"));
959        assert!(chunk_types.contains(&"class"));
960    }
961
962    #[test]
963    fn test_rust_parsing() {
964        let mut parser = AstParser::new().unwrap();
965        let content = r#"
966use std::collections::HashMap;
967
968pub struct DataProcessor {
969    data: HashMap<String, i32>,
970}
971
972impl DataProcessor {
973    pub fn new() -> Self {
974        Self {
975            data: HashMap::new(),
976        }
977    }
978}
979"#;
980        
981        let chunks = parser.parse_chunks(content, "test.rs").unwrap();
982        assert!(!chunks.is_empty());
983        
984        let chunk_types: Vec<&str> = chunks.iter().map(|c| c.chunk_type.as_str()).collect();
985        assert!(chunk_types.contains(&"use"));
986        assert!(chunk_types.contains(&"struct"));
987        assert!(chunk_types.contains(&"impl"));
988    }
989
990    #[test]
991    fn test_signature_extraction() {
992        let mut parser = AstParser::new().unwrap();
993        let content = r#"
994def calculate(a: int, b: int) -> int:
995    return a + b
996
997class Calculator:
998    def multiply(self, x, y):
999        return x * y
1000"#;
1001        
1002        let signatures = parser.extract_signatures(content, "test.py").unwrap();
1003        assert!(!signatures.is_empty());
1004    }
1005}