1use std::collections::HashMap;
7use serde::{Deserialize, Serialize};
8use tree_sitter::{Parser, Language, Node, Tree, Query, QueryCursor};
9use scribe_core::{Result, ScribeError};
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
13pub enum AstLanguage {
14 Python,
15 JavaScript,
16 TypeScript,
17 Go,
18 Rust,
19}
20
21impl AstLanguage {
22 pub fn tree_sitter_language(&self) -> Language {
24 match self {
25 AstLanguage::Python => tree_sitter_python::language(),
26 AstLanguage::JavaScript => tree_sitter_javascript::language(),
27 AstLanguage::TypeScript => tree_sitter_typescript::language_typescript(),
28 AstLanguage::Go => tree_sitter_go::language(),
29 AstLanguage::Rust => tree_sitter_rust::language(),
30 }
31 }
32
33 pub fn from_extension(ext: &str) -> Option<Self> {
35 match ext.to_lowercase().as_str() {
36 "py" | "pyi" | "pyw" => Some(AstLanguage::Python),
37 "js" | "mjs" | "cjs" => Some(AstLanguage::JavaScript),
38 "ts" | "mts" | "cts" => Some(AstLanguage::TypeScript),
39 "go" => Some(AstLanguage::Go),
40 "rs" => Some(AstLanguage::Rust),
41 _ => None,
42 }
43 }
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct AstImport {
49 pub module: String,
51 pub alias: Option<String>,
53 pub items: Vec<String>,
55 pub line_number: usize,
57 pub is_relative: bool,
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct AstChunk {
64 pub content: String,
66 pub chunk_type: String,
68 pub start_line: usize,
70 pub end_line: usize,
72 pub start_byte: usize,
74 pub end_byte: usize,
76 pub importance_score: f64,
78 pub estimated_tokens: usize,
80 pub dependencies: Vec<String>,
82 pub name: Option<String>,
84 pub is_public: bool,
86 pub has_documentation: bool,
88}
89
90#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct AstSignature {
93 pub signature: String,
95 pub signature_type: String,
97 pub name: String,
99 pub parameters: Vec<String>,
101 pub return_type: Option<String>,
103 pub is_public: bool,
105 pub line: usize,
107}
108
109pub struct AstParser {
111 parsers: HashMap<AstLanguage, Parser>,
112}
113
114impl AstParser {
115 pub fn new() -> Result<Self> {
117 let mut parsers = HashMap::new();
118
119 for language in [
120 AstLanguage::Python,
121 AstLanguage::JavaScript,
122 AstLanguage::TypeScript,
123 AstLanguage::Go,
124 AstLanguage::Rust,
125 ] {
126 let mut parser = Parser::new();
127 parser.set_language(language.tree_sitter_language())
128 .map_err(|e| ScribeError::parse(format!("Failed to set tree-sitter language: {}", e)))?;
129 parsers.insert(language, parser);
130 }
131
132 Ok(Self { parsers })
133 }
134
135 pub fn parse_chunks(&mut self, content: &str, file_path: &str) -> Result<Vec<AstChunk>> {
137 let language = self.detect_language(file_path)?;
138 let parser = self.parsers.get_mut(&language)
139 .ok_or_else(|| ScribeError::parse(format!("No parser for language: {:?}", language)))?;
140
141 let tree = parser.parse(content, None)
142 .ok_or_else(|| ScribeError::parse("Failed to parse source code".to_string()))?;
143
144 let chunks = match language {
145 AstLanguage::Python => self.parse_python_chunks(content, &tree)?,
146 AstLanguage::JavaScript => self.parse_javascript_chunks(content, &tree)?,
147 AstLanguage::TypeScript => self.parse_typescript_chunks(content, &tree)?,
148 AstLanguage::Go => self.parse_go_chunks(content, &tree)?,
149 AstLanguage::Rust => self.parse_rust_chunks(content, &tree)?,
150 };
151
152 Ok(chunks)
153 }
154
155 pub fn extract_imports(&self, content: &str, language: AstLanguage) -> Result<Vec<AstImport>> {
158 let mut parser = Parser::new();
160 parser.set_language(language.tree_sitter_language()).map_err(|e|
161 ScribeError::parse(format!("Failed to set language: {}", e)))?;
162
163 let tree = parser.parse(content, None)
164 .ok_or_else(|| ScribeError::parse("Failed to parse content"))?;
165
166 let mut imports = Vec::new();
167 let root_node = tree.root_node();
168
169 match language {
171 AstLanguage::Python => {
172 self.extract_python_imports(&root_node, content, &mut imports)?;
173 }
174 AstLanguage::JavaScript | AstLanguage::TypeScript => {
175 self.extract_js_ts_imports(&root_node, content, &mut imports)?;
176 }
177 AstLanguage::Go => {
178 self.extract_go_imports(&root_node, content, &mut imports)?;
179 }
180 AstLanguage::Rust => {
181 self.extract_rust_imports(&root_node, content, &mut imports)?;
182 }
183 }
184
185 Ok(imports)
186 }
187
188 pub fn extract_signatures(&mut self, content: &str, file_path: &str) -> Result<Vec<AstSignature>> {
189 let language = self.detect_language(file_path)?;
190 let parser = self.parsers.get_mut(&language)
191 .ok_or_else(|| ScribeError::parse(format!("No parser for language: {:?}", language)))?;
192
193 let tree = parser.parse(content, None)
194 .ok_or_else(|| ScribeError::parse("Failed to parse source code".to_string()))?;
195
196 let signatures = match language {
197 AstLanguage::Python => self.extract_python_signatures(content, &tree)?,
198 AstLanguage::JavaScript => self.extract_javascript_signatures(content, &tree)?,
199 AstLanguage::TypeScript => self.extract_typescript_signatures(content, &tree)?,
200 AstLanguage::Go => self.extract_go_signatures(content, &tree)?,
201 AstLanguage::Rust => self.extract_rust_signatures(content, &tree)?,
202 };
203
204 Ok(signatures)
205 }
206
207 fn detect_language(&self, file_path: &str) -> Result<AstLanguage> {
209 let extension = std::path::Path::new(file_path)
210 .extension()
211 .and_then(|ext| ext.to_str())
212 .unwrap_or("");
213
214 AstLanguage::from_extension(extension)
215 .ok_or_else(|| ScribeError::parse(format!("Unsupported file extension: {}", extension)))
216 }
217
218 fn parse_python_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
220 let mut chunks = Vec::new();
221 let root_node = tree.root_node();
222
223 let query_str = r#"
225 (import_statement) @import
226 (import_from_statement) @import_from
227 (function_definition) @function
228 (class_definition) @class
229 (assignment
230 left: (identifier) @const_name
231 right: (_) @const_value
232 (#match? @const_name "^[A-Z_][A-Z0-9_]*$")
233 ) @constant
234 "#;
235
236 let query = Query::new(AstLanguage::Python.tree_sitter_language(), query_str)
237 .map_err(|e| ScribeError::parse(format!("Invalid Python query: {}", e)))?;
238
239 let mut cursor = QueryCursor::new();
240 let captures = cursor.matches(&query, root_node, content.as_bytes());
241
242 for match_ in captures {
243 for capture in match_.captures {
244 let node = capture.node;
245 let chunk_type = &query.capture_names()[capture.index as usize];
246
247 let chunk = self.create_chunk_from_node(content, node, chunk_type, &AstLanguage::Python)?;
248 chunks.push(chunk);
249 }
250 }
251
252 chunks.sort_by_key(|c| c.start_byte);
254 Ok(chunks)
255 }
256
257 fn parse_javascript_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
259 let mut chunks = Vec::new();
260 let root_node = tree.root_node();
261
262 let query_str = r#"
263 (import_statement) @import
264 (export_statement) @export
265 (function_declaration) @function
266 (arrow_function) @arrow_function
267 (class_declaration) @class
268 (interface_declaration) @interface
269 (type_alias_declaration) @type_alias
270 (variable_declaration
271 declarations: (variable_declarator
272 name: (identifier) @const_name
273 value: (_) @const_value
274 ) @const_declarator
275 (#match? @const_name "^[A-Z_][A-Z0-9_]*$")
276 ) @constant
277 "#;
278
279 let query = Query::new(AstLanguage::JavaScript.tree_sitter_language(), query_str)
280 .map_err(|e| ScribeError::parse(format!("Invalid JavaScript query: {}", e)))?;
281
282 let mut cursor = QueryCursor::new();
283 let captures = cursor.matches(&query, root_node, content.as_bytes());
284
285 for match_ in captures {
286 for capture in match_.captures {
287 let node = capture.node;
288 let chunk_type = &query.capture_names()[capture.index as usize];
289
290 let chunk = self.create_chunk_from_node(content, node, chunk_type, &AstLanguage::JavaScript)?;
291 chunks.push(chunk);
292 }
293 }
294
295 chunks.sort_by_key(|c| c.start_byte);
296 Ok(chunks)
297 }
298
299 fn parse_typescript_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
301 let mut chunks = Vec::new();
302 let root_node = tree.root_node();
303
304 let query_str = r#"
305 (import_statement) @import
306 (export_statement) @export
307 (function_declaration) @function
308 (arrow_function) @arrow_function
309 (class_declaration) @class
310 (interface_declaration) @interface
311 (type_alias_declaration) @type_alias
312 (enum_declaration) @enum
313 (module_declaration) @module
314 (variable_declaration
315 declarations: (variable_declarator
316 name: (identifier) @const_name
317 value: (_) @const_value
318 ) @const_declarator
319 (#match? @const_name "^[A-Z_][A-Z0-9_]*$")
320 ) @constant
321 "#;
322
323 let query = Query::new(AstLanguage::TypeScript.tree_sitter_language(), query_str)
324 .map_err(|e| ScribeError::parse(format!("Invalid TypeScript query: {}", e)))?;
325
326 let mut cursor = QueryCursor::new();
327 let captures = cursor.matches(&query, root_node, content.as_bytes());
328
329 for match_ in captures {
330 for capture in match_.captures {
331 let node = capture.node;
332 let chunk_type = &query.capture_names()[capture.index as usize];
333
334 let chunk = self.create_chunk_from_node(content, node, chunk_type, &AstLanguage::TypeScript)?;
335 chunks.push(chunk);
336 }
337 }
338
339 chunks.sort_by_key(|c| c.start_byte);
340 Ok(chunks)
341 }
342
343 fn parse_go_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
345 let mut chunks = Vec::new();
346 let root_node = tree.root_node();
347
348 let query_str = r#"
349 (package_clause) @package
350 (import_declaration) @import
351 (function_declaration) @function
352 (method_declaration) @method
353 (type_declaration) @type
354 (const_declaration) @const
355 (var_declaration) @var
356 "#;
357
358 let query = Query::new(AstLanguage::Go.tree_sitter_language(), query_str)
359 .map_err(|e| ScribeError::parse(format!("Invalid Go query: {}", e)))?;
360
361 let mut cursor = QueryCursor::new();
362 let captures = cursor.matches(&query, root_node, content.as_bytes());
363
364 for match_ in captures {
365 for capture in match_.captures {
366 let node = capture.node;
367 let chunk_type = &query.capture_names()[capture.index as usize];
368
369 let chunk = self.create_chunk_from_node(content, node, chunk_type, &AstLanguage::Go)?;
370 chunks.push(chunk);
371 }
372 }
373
374 chunks.sort_by_key(|c| c.start_byte);
375 Ok(chunks)
376 }
377
378 fn parse_rust_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
380 let mut chunks = Vec::new();
381 let root_node = tree.root_node();
382
383 let query_str = r#"
384 (use_declaration) @use
385 (mod_item) @mod
386 (struct_item) @struct
387 (enum_item) @enum
388 (trait_item) @trait
389 (impl_item) @impl
390 (function_item) @function
391 (const_item) @const
392 (static_item) @static
393 (type_item) @type_alias
394 "#;
395
396 let query = Query::new(AstLanguage::Rust.tree_sitter_language(), query_str)
397 .map_err(|e| ScribeError::parse(format!("Invalid Rust query: {}", e)))?;
398
399 let mut cursor = QueryCursor::new();
400 let captures = cursor.matches(&query, root_node, content.as_bytes());
401
402 for match_ in captures {
403 for capture in match_.captures {
404 let node = capture.node;
405 let chunk_type = &query.capture_names()[capture.index as usize];
406
407 let chunk = self.create_chunk_from_node(content, node, chunk_type, &AstLanguage::Rust)?;
408 chunks.push(chunk);
409 }
410 }
411
412 chunks.sort_by_key(|c| c.start_byte);
413 Ok(chunks)
414 }
415
416 fn create_chunk_from_node(
418 &self,
419 content: &str,
420 node: Node,
421 chunk_type: &str,
422 language: &AstLanguage,
423 ) -> Result<AstChunk> {
424 let start_byte = node.start_byte();
425 let end_byte = node.end_byte();
426 let start_position = node.start_position();
427 let end_position = node.end_position();
428
429 let chunk_content = &content[start_byte..end_byte];
430 let estimated_tokens = chunk_content.split_whitespace().count();
431
432 let importance_score = self.calculate_importance_score(chunk_type, language, node, content);
434
435 let name = self.extract_name_from_node(node, content);
437
438 let is_public = self.is_node_public(node, content);
440
441 let has_documentation = self.has_documentation(node, content);
443
444 let dependencies = self.extract_dependencies(node, content);
446
447 Ok(AstChunk {
448 content: chunk_content.to_string(),
449 chunk_type: chunk_type.to_string(),
450 start_line: start_position.row + 1,
451 end_line: end_position.row + 1,
452 start_byte,
453 end_byte,
454 importance_score,
455 estimated_tokens,
456 dependencies,
457 name,
458 is_public,
459 has_documentation,
460 })
461 }
462
463 fn calculate_importance_score(&self, chunk_type: &str, language: &AstLanguage, node: Node, content: &str) -> f64 {
465 let mut score: f64 = match chunk_type {
466 "import" | "import_from" | "use" => 0.9, "package" => 0.95, "class" | "struct_item" | "trait_item" => 0.85, "interface" | "type_alias" | "enum" => 0.8, "function" | "method" => 0.75, "const" | "constant" | "static" => 0.6, "export" => 0.7, "mod" | "module" => 0.65, _ => 0.5, };
476
477 if self.is_node_public(node, content) {
479 score += 0.1;
480 }
481
482 if self.has_documentation(node, content) {
484 score += 0.05;
485 }
486
487 match language {
489 AstLanguage::Rust => {
490 if chunk_type == "impl" {
492 score = 0.85;
493 }
494 }
495 AstLanguage::TypeScript => {
496 if chunk_type == "interface" {
498 score = 0.9;
499 }
500 }
501 _ => {}
502 }
503
504 score.min(1.0)
505 }
506
507 fn extract_name_from_node(&self, node: Node, content: &str) -> Option<String> {
509 for i in 0..node.child_count() {
511 if let Some(child) = node.child(i) {
512 if child.kind() == "identifier" || child.kind() == "type_identifier" {
513 let name_bytes = &content.as_bytes()[child.start_byte()..child.end_byte()];
514 if let Ok(name) = std::str::from_utf8(name_bytes) {
515 return Some(name.to_string());
516 }
517 }
518 }
519 }
520 None
521 }
522
523 fn is_node_public(&self, node: Node, content: &str) -> bool {
525 if let Some(parent) = node.parent() {
527 for i in 0..parent.child_count() {
528 if let Some(child) = parent.child(i) {
529 if child.kind() == "visibility_modifier" {
530 let vis_bytes = &content.as_bytes()[child.start_byte()..child.end_byte()];
531 if let Ok(vis) = std::str::from_utf8(vis_bytes) {
532 return vis.contains("pub");
533 }
534 }
535 }
536 }
537 }
538
539 let node_text = &content[node.start_byte()..node.end_byte()];
541 node_text.starts_with("export") || node_text.contains("export")
542 }
543
544 fn has_documentation(&self, node: Node, content: &str) -> bool {
546 if let Some(prev_sibling) = node.prev_sibling() {
548 if prev_sibling.kind() == "comment" {
549 return true;
550 }
551 }
552
553 if node.kind() == "function_definition" || node.kind() == "class_definition" {
555 for i in 0..node.child_count() {
556 if let Some(child) = node.child(i) {
557 if child.kind() == "expression_statement" {
558 if let Some(grandchild) = child.child(0) {
559 if grandchild.kind() == "string" {
560 let string_content = &content[grandchild.start_byte()..grandchild.end_byte()];
561 if string_content.starts_with("\"\"\"") || string_content.starts_with("'''") {
562 return true;
563 }
564 }
565 }
566 }
567 }
568 }
569 }
570
571 false
572 }
573
574 fn extract_dependencies(&self, node: Node, content: &str) -> Vec<String> {
576 let mut dependencies = Vec::new();
577
578 if node.kind() == "import_statement" || node.kind() == "import_from_statement" || node.kind() == "use_declaration" {
580 let import_text = &content[node.start_byte()..node.end_byte()];
583
584 let mut in_quote = false;
586 let mut quote_char = '"';
587 let mut current_module = String::new();
588
589 for ch in import_text.chars() {
590 if ch == '"' || ch == '\'' {
591 if !in_quote {
592 in_quote = true;
593 quote_char = ch;
594 } else if ch == quote_char {
595 in_quote = false;
596 if !current_module.is_empty() {
597 dependencies.push(current_module.clone());
598 current_module.clear();
599 }
600 }
601 } else if in_quote {
602 current_module.push(ch);
603 }
604 }
605 }
606
607 dependencies
608 }
609
610 fn extract_python_signatures(&self, content: &str, tree: &Tree) -> Result<Vec<AstSignature>> {
612 let mut signatures = Vec::new();
613 let root_node = tree.root_node();
614
615 let query_str = r#"
616 (function_definition
617 name: (identifier) @func_name
618 parameters: (parameters) @func_params
619 ) @function
620 (class_definition
621 name: (identifier) @class_name
622 ) @class
623 (import_statement) @import
624 (import_from_statement) @import_from
625 "#;
626
627 let query = Query::new(AstLanguage::Python.tree_sitter_language(), query_str)
628 .map_err(|e| ScribeError::parse(format!("Invalid Python signature query: {}", e)))?;
629
630 let mut cursor = QueryCursor::new();
631 let captures = cursor.matches(&query, root_node, content.as_bytes());
632
633 for match_ in captures {
634 let signature = self.extract_signature_from_match(content, &match_, &query)?;
635 signatures.push(signature);
636 }
637
638 Ok(signatures)
639 }
640
641 fn extract_javascript_signatures(&self, content: &str, tree: &Tree) -> Result<Vec<AstSignature>> {
643 Ok(Vec::new()) }
646
647 fn extract_typescript_signatures(&self, content: &str, tree: &Tree) -> Result<Vec<AstSignature>> {
648 Ok(Vec::new()) }
651
652 fn extract_go_signatures(&self, content: &str, tree: &Tree) -> Result<Vec<AstSignature>> {
653 Ok(Vec::new()) }
656
657 fn extract_rust_signatures(&self, content: &str, tree: &Tree) -> Result<Vec<AstSignature>> {
658 Ok(Vec::new()) }
661
662 fn extract_signature_from_match(
664 &self,
665 content: &str,
666 match_: &tree_sitter::QueryMatch,
667 query: &Query,
668 ) -> Result<AstSignature> {
669 let mut signature_text = String::new();
670 let mut signature_type = String::new();
671 let mut name = String::new();
672 let mut line = 0;
673
674 for capture in match_.captures {
675 let capture_name = &query.capture_names()[capture.index as usize];
676 let node = capture.node;
677 let node_text = &content[node.start_byte()..node.end_byte()];
678
679 match capture_name.as_str() {
680 "function" | "class" | "import" | "import_from" => {
681 signature_text = node_text.lines().next().unwrap_or("").to_string();
682 signature_type = capture_name.to_string();
683 line = node.start_position().row + 1;
684 }
685 "func_name" | "class_name" => {
686 name = node_text.to_string();
687 }
688 _ => {}
689 }
690 }
691
692 Ok(AstSignature {
693 signature: signature_text,
694 signature_type,
695 name,
696 parameters: Vec::new(), return_type: None, is_public: false, line,
700 })
701 }
702
703 fn extract_python_imports(&self, node: &Node, content: &str, imports: &mut Vec<AstImport>) -> Result<()> {
705 let mut cursor = node.walk();
706
707
708 if node.kind() == "import_statement" {
710 for i in 0..node.child_count() {
712 if let Some(child) = node.child(i) {
713 if child.kind() == "aliased_import" {
714 if let Some(name_node) = child.child_by_field_name("name") {
716 let module = self.node_text(name_node, content);
717 let alias = child.child_by_field_name("alias")
718 .map(|alias_node| self.node_text(alias_node, content));
719 let line_number = name_node.start_position().row + 1;
720
721 imports.push(AstImport {
722 module,
723 alias,
724 items: vec![],
725 line_number,
726 is_relative: false,
727 });
728 }
729 } else if child.kind() == "dotted_as_name" {
730 if let Some(name_node) = child.child_by_field_name("name") {
732 let module = self.node_text(name_node, content);
733 let alias = child.child_by_field_name("alias")
734 .map(|alias_node| self.node_text(alias_node, content));
735 let line_number = name_node.start_position().row + 1;
736
737 imports.push(AstImport {
738 module,
739 alias,
740 items: vec![],
741 line_number,
742 is_relative: false,
743 });
744 }
745 } else if child.kind() == "dotted_name" || child.kind() == "identifier" {
746 let module = self.node_text(child, content);
748 let line_number = child.start_position().row + 1;
749
750 imports.push(AstImport {
751 module,
752 alias: None,
753 items: vec![],
754 line_number,
755 is_relative: false,
756 });
757 }
758 }
759 }
760 } else if node.kind() == "import_from_statement" {
761 let mut module = String::new();
762 let mut items = Vec::new();
763 let mut is_relative = false;
764
765 if let Some(module_node) = node.child_by_field_name("module_name") {
766 module = self.node_text(module_node, content);
767 is_relative = module.starts_with('.');
768 }
769
770 for i in 0..node.child_count() {
772 if let Some(child) = node.child(i) {
773 if child.kind() == "import_list" {
774 for j in 0..child.child_count() {
775 if let Some(item) = child.child(j) {
776 if item.kind() == "dotted_name" || item.kind() == "identifier" {
777 items.push(self.node_text(item, content));
778 }
779 }
780 }
781 }
782 }
783 }
784
785 let line_number = node.start_position().row + 1;
786 imports.push(AstImport {
787 module,
788 alias: None,
789 items,
790 line_number,
791 is_relative,
792 });
793 }
794
795 for i in 0..node.child_count() {
797 if let Some(child) = node.child(i) {
798 self.extract_python_imports(&child, content, imports)?;
799 }
800 }
801
802 Ok(())
803 }
804
805 fn extract_js_ts_imports(&self, node: &Node, content: &str, imports: &mut Vec<AstImport>) -> Result<()> {
807 if node.kind() == "import_statement" {
808 let mut module = String::new();
809 let mut items = Vec::new();
810
811 for i in 0..node.child_count() {
813 if let Some(child) = node.child(i) {
814 if child.kind() == "string" {
815 module = self.node_text(child, content);
816 module = module.trim_matches('"').trim_matches('\'').to_string();
818 break;
819 }
820 }
821 }
822
823 let line_number = node.start_position().row + 1;
824 imports.push(AstImport {
825 module,
826 alias: None,
827 items,
828 line_number,
829 is_relative: false,
830 });
831 }
832
833 for i in 0..node.child_count() {
835 if let Some(child) = node.child(i) {
836 self.extract_js_ts_imports(&child, content, imports)?;
837 }
838 }
839
840 Ok(())
841 }
842
843 fn extract_go_imports(&self, node: &Node, content: &str, imports: &mut Vec<AstImport>) -> Result<()> {
845 if node.kind() == "import_spec" {
846 for i in 0..node.child_count() {
847 if let Some(child) = node.child(i) {
848 if child.kind() == "interpreted_string_literal" {
849 let module = self.node_text(child, content);
850 let module = module.trim_matches('"').to_string();
851 let line_number = child.start_position().row + 1;
852
853 imports.push(AstImport {
854 module,
855 alias: None,
856 items: vec![],
857 line_number,
858 is_relative: false,
859 });
860 }
861 }
862 }
863 }
864
865 for i in 0..node.child_count() {
867 if let Some(child) = node.child(i) {
868 self.extract_go_imports(&child, content, imports)?;
869 }
870 }
871
872 Ok(())
873 }
874
875 fn extract_rust_imports(&self, node: &Node, content: &str, imports: &mut Vec<AstImport>) -> Result<()> {
877 if node.kind() == "use_declaration" {
878 if let Some(use_tree) = node.child_by_field_name("argument") {
879 let module = self.node_text(use_tree, content);
880 let line_number = node.start_position().row + 1;
881
882 imports.push(AstImport {
883 module,
884 alias: None,
885 items: vec![],
886 line_number,
887 is_relative: false,
888 });
889 }
890 }
891
892 for i in 0..node.child_count() {
894 if let Some(child) = node.child(i) {
895 self.extract_rust_imports(&child, content, imports)?;
896 }
897 }
898
899 Ok(())
900 }
901
902 fn node_text(&self, node: Node, content: &str) -> String {
904 content[node.start_byte()..node.end_byte()].to_string()
905 }
906}
907
908impl Default for AstParser {
909 fn default() -> Self {
910 Self::new().expect("Failed to create AstParser")
911 }
912}
913
914#[cfg(test)]
915mod tests {
916 use super::*;
917
918 #[test]
919 fn test_ast_parser_creation() {
920 let parser = AstParser::new();
921 assert!(parser.is_ok());
922 }
923
924 #[test]
925 fn test_language_detection() {
926 assert_eq!(AstLanguage::from_extension("py"), Some(AstLanguage::Python));
927 assert_eq!(AstLanguage::from_extension("js"), Some(AstLanguage::JavaScript));
928 assert_eq!(AstLanguage::from_extension("ts"), Some(AstLanguage::TypeScript));
929 assert_eq!(AstLanguage::from_extension("go"), Some(AstLanguage::Go));
930 assert_eq!(AstLanguage::from_extension("rs"), Some(AstLanguage::Rust));
931 assert_eq!(AstLanguage::from_extension("unknown"), None);
932 }
933
934 #[test]
935 fn test_python_parsing() {
936 let mut parser = AstParser::new().unwrap();
937 let content = r#"
938import os
939import sys
940
941def hello_world():
942 """A simple function."""
943 print("Hello, world!")
944
945class Calculator:
946 """A simple calculator."""
947
948 def add(self, a, b):
949 return a + b
950"#;
951
952 let chunks = parser.parse_chunks(content, "test.py").unwrap();
953 assert!(!chunks.is_empty());
954
955 let chunk_types: Vec<&str> = chunks.iter().map(|c| c.chunk_type.as_str()).collect();
957 assert!(chunk_types.contains(&"import"));
958 assert!(chunk_types.contains(&"function"));
959 assert!(chunk_types.contains(&"class"));
960 }
961
962 #[test]
963 fn test_rust_parsing() {
964 let mut parser = AstParser::new().unwrap();
965 let content = r#"
966use std::collections::HashMap;
967
968pub struct DataProcessor {
969 data: HashMap<String, i32>,
970}
971
972impl DataProcessor {
973 pub fn new() -> Self {
974 Self {
975 data: HashMap::new(),
976 }
977 }
978}
979"#;
980
981 let chunks = parser.parse_chunks(content, "test.rs").unwrap();
982 assert!(!chunks.is_empty());
983
984 let chunk_types: Vec<&str> = chunks.iter().map(|c| c.chunk_type.as_str()).collect();
985 assert!(chunk_types.contains(&"use"));
986 assert!(chunk_types.contains(&"struct"));
987 assert!(chunk_types.contains(&"impl"));
988 }
989
990 #[test]
991 fn test_signature_extraction() {
992 let mut parser = AstParser::new().unwrap();
993 let content = r#"
994def calculate(a: int, b: int) -> int:
995 return a + b
996
997class Calculator:
998 def multiply(self, x, y):
999 return x * y
1000"#;
1001
1002 let signatures = parser.extract_signatures(content, "test.py").unwrap();
1003 assert!(!signatures.is_empty());
1004 }
1005}