1use scribe_core::tokenization::{utils as token_utils, TokenCounter};
7use scribe_core::{Result, ScribeError};
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use tree_sitter::{Language, Node, Parser, Query, QueryCursor, Tree};
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
14pub enum AstLanguage {
15 Python,
16 JavaScript,
17 TypeScript,
18 Go,
19 Rust,
20}
21
22impl AstLanguage {
23 pub fn tree_sitter_language(&self) -> Language {
25 match self {
26 AstLanguage::Python => tree_sitter_python::language(),
27 AstLanguage::JavaScript => tree_sitter_javascript::language(),
28 AstLanguage::TypeScript => tree_sitter_typescript::language_typescript(),
29 AstLanguage::Go => tree_sitter_go::language(),
30 AstLanguage::Rust => tree_sitter_rust::language(),
31 }
32 }
33
34 pub fn from_extension(ext: &str) -> Option<Self> {
36 match ext.to_lowercase().as_str() {
37 "py" | "pyi" | "pyw" => Some(AstLanguage::Python),
38 "js" | "mjs" | "cjs" => Some(AstLanguage::JavaScript),
39 "ts" | "mts" | "cts" => Some(AstLanguage::TypeScript),
40 "go" => Some(AstLanguage::Go),
41 "rs" => Some(AstLanguage::Rust),
42 _ => None,
43 }
44 }
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct AstImport {
50 pub module: String,
52 pub alias: Option<String>,
54 pub items: Vec<String>,
56 pub line_number: usize,
58 pub is_relative: bool,
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct AstChunk {
65 pub content: String,
67 pub chunk_type: String,
69 pub start_line: usize,
71 pub end_line: usize,
73 pub start_byte: usize,
75 pub end_byte: usize,
77 pub importance_score: f64,
79 pub estimated_tokens: usize,
81 pub dependencies: Vec<String>,
83 pub name: Option<String>,
85 pub is_public: bool,
87 pub has_documentation: bool,
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
93pub struct AstSignature {
94 pub signature: String,
96 pub signature_type: String,
98 pub name: String,
100 pub parameters: Vec<String>,
102 pub return_type: Option<String>,
104 pub is_public: bool,
106 pub line: usize,
108}
109
110pub struct AstParser {
112 parsers: HashMap<AstLanguage, Parser>,
113}
114
115impl AstParser {
116 pub fn new() -> Result<Self> {
118 let mut parsers = HashMap::new();
119
120 for language in [
121 AstLanguage::Python,
122 AstLanguage::JavaScript,
123 AstLanguage::TypeScript,
124 AstLanguage::Go,
125 AstLanguage::Rust,
126 ] {
127 let mut parser = Parser::new();
128 parser
129 .set_language(language.tree_sitter_language())
130 .map_err(|e| {
131 ScribeError::parse(format!("Failed to set tree-sitter language: {}", e))
132 })?;
133 parsers.insert(language, parser);
134 }
135
136 Ok(Self { parsers })
137 }
138
139 pub fn parse_chunks(&mut self, content: &str, file_path: &str) -> Result<Vec<AstChunk>> {
141 let language = self.detect_language(file_path)?;
142 let parser = self
143 .parsers
144 .get_mut(&language)
145 .ok_or_else(|| ScribeError::parse(format!("No parser for language: {:?}", language)))?;
146
147 let tree = parser
148 .parse(content, None)
149 .ok_or_else(|| ScribeError::parse("Failed to parse source code".to_string()))?;
150
151 let chunks = match language {
152 AstLanguage::Python => self.parse_python_chunks(content, &tree)?,
153 AstLanguage::JavaScript => self.parse_javascript_chunks(content, &tree)?,
154 AstLanguage::TypeScript => self.parse_typescript_chunks(content, &tree)?,
155 AstLanguage::Go => self.parse_go_chunks(content, &tree)?,
156 AstLanguage::Rust => self.parse_rust_chunks(content, &tree)?,
157 };
158
159 Ok(chunks)
160 }
161
162 pub fn extract_imports(&self, content: &str, language: AstLanguage) -> Result<Vec<AstImport>> {
165 let mut parser = Parser::new();
167 parser
168 .set_language(language.tree_sitter_language())
169 .map_err(|e| ScribeError::parse(format!("Failed to set language: {}", e)))?;
170
171 let tree = parser
172 .parse(content, None)
173 .ok_or_else(|| ScribeError::parse("Failed to parse content"))?;
174
175 let mut imports = Vec::new();
176
177 let mut cursor = tree.walk();
179 self.extract_imports_with_cursor(&mut cursor, content, language, &mut imports)?;
180
181 Ok(imports)
182 }
183
184 fn extract_imports_with_cursor(
186 &self,
187 cursor: &mut tree_sitter::TreeCursor,
188 content: &str,
189 language: AstLanguage,
190 imports: &mut Vec<AstImport>,
191 ) -> Result<()> {
192 let node = cursor.node();
193
194 if !self.node_can_contain_imports(node.kind()) {
196 return Ok(());
197 }
198
199 if self.is_import_node(node.kind()) {
201 self.extract_import_from_node(node, content, language, imports)?;
202 }
203
204 if cursor.goto_first_child() {
206 loop {
207 self.extract_imports_with_cursor(cursor, content, language, imports)?;
208 if !cursor.goto_next_sibling() {
209 break;
210 }
211 }
212 cursor.goto_parent();
213 }
214
215 Ok(())
216 }
217
218 fn node_can_contain_imports(&self, kind: &str) -> bool {
220 matches!(
221 kind,
222 "import_statement"
223 | "import_from_statement"
224 | "use_declaration"
225 | "import_declaration"
226 | "import_spec"
227 | "source_file"
228 | "module"
229 | "program"
230 | "translation_unit"
231 | "block"
232 | "statement_block"
233 ) || kind.contains("import")
234 || kind.contains("use")
235 }
236
237 fn is_import_node(&self, kind: &str) -> bool {
239 matches!(
240 kind,
241 "import_statement"
242 | "import_from_statement"
243 | "use_declaration"
244 | "import_declaration"
245 | "import_spec"
246 )
247 }
248
249 fn extract_import_from_node(
251 &self,
252 node: Node,
253 content: &str,
254 language: AstLanguage,
255 imports: &mut Vec<AstImport>,
256 ) -> Result<()> {
257 match language {
258 AstLanguage::Python => {
259 self.extract_python_import_node(node, content, imports)?;
260 }
261 AstLanguage::JavaScript | AstLanguage::TypeScript => {
262 self.extract_js_ts_import_node(node, content, imports)?;
263 }
264 AstLanguage::Go => {
265 self.extract_go_import_node(node, content, imports)?;
266 }
267 AstLanguage::Rust => {
268 self.extract_rust_import_node(node, content, imports)?;
269 }
270 }
271 Ok(())
272 }
273
274 pub fn extract_signatures(
275 &mut self,
276 content: &str,
277 file_path: &str,
278 ) -> Result<Vec<AstSignature>> {
279 let language = self.detect_language(file_path)?;
280 let parser = self
281 .parsers
282 .get_mut(&language)
283 .ok_or_else(|| ScribeError::parse(format!("No parser for language: {:?}", language)))?;
284
285 let tree = parser
286 .parse(content, None)
287 .ok_or_else(|| ScribeError::parse("Failed to parse source code".to_string()))?;
288
289 let signatures = match language {
290 AstLanguage::Python => self.extract_python_signatures(content, &tree)?,
291 AstLanguage::JavaScript => self.extract_javascript_signatures(content, &tree)?,
292 AstLanguage::TypeScript => self.extract_typescript_signatures(content, &tree)?,
293 AstLanguage::Go => self.extract_go_signatures(content, &tree)?,
294 AstLanguage::Rust => self.extract_rust_signatures(content, &tree)?,
295 };
296
297 Ok(signatures)
298 }
299
300 fn detect_language(&self, file_path: &str) -> Result<AstLanguage> {
302 let extension = std::path::Path::new(file_path)
303 .extension()
304 .and_then(|ext| ext.to_str())
305 .unwrap_or("");
306
307 AstLanguage::from_extension(extension)
308 .ok_or_else(|| ScribeError::parse(format!("Unsupported file extension: {}", extension)))
309 }
310
311 fn parse_python_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
313 let mut chunks = Vec::new();
314 let root_node = tree.root_node();
315
316 let query_str = r#"
318 (import_statement) @import
319 (import_from_statement) @import_from
320 (function_definition) @function
321 (class_definition) @class
322 (assignment
323 left: (identifier) @const_name
324 right: (_) @const_value
325 (#match? @const_name "^[A-Z_][A-Z0-9_]*$")
326 ) @constant
327 "#;
328
329 let query = Query::new(AstLanguage::Python.tree_sitter_language(), query_str)
330 .map_err(|e| ScribeError::parse(format!("Invalid Python query: {}", e)))?;
331
332 let mut cursor = QueryCursor::new();
333 let captures = cursor.matches(&query, root_node, content.as_bytes());
334
335 for match_ in captures {
336 for capture in match_.captures {
337 let node = capture.node;
338 let chunk_type = &query.capture_names()[capture.index as usize];
339
340 let chunk =
341 self.create_chunk_from_node(content, node, chunk_type, &AstLanguage::Python)?;
342 chunks.push(chunk);
343 }
344 }
345
346 chunks.sort_by_key(|c| c.start_byte);
348 Ok(chunks)
349 }
350
351 fn parse_javascript_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
353 let mut chunks = Vec::new();
354 let root_node = tree.root_node();
355
356 let query_str = r#"
357 (import_statement) @import
358 (export_statement) @export
359 (function_declaration) @function
360 (arrow_function) @arrow_function
361 (class_declaration) @class
362 (interface_declaration) @interface
363 (type_alias_declaration) @type_alias
364 (variable_declaration
365 declarations: (variable_declarator
366 name: (identifier) @const_name
367 value: (_) @const_value
368 ) @const_declarator
369 (#match? @const_name "^[A-Z_][A-Z0-9_]*$")
370 ) @constant
371 "#;
372
373 let query = Query::new(AstLanguage::JavaScript.tree_sitter_language(), query_str)
374 .map_err(|e| ScribeError::parse(format!("Invalid JavaScript query: {}", e)))?;
375
376 let mut cursor = QueryCursor::new();
377 let captures = cursor.matches(&query, root_node, content.as_bytes());
378
379 for match_ in captures {
380 for capture in match_.captures {
381 let node = capture.node;
382 let chunk_type = &query.capture_names()[capture.index as usize];
383
384 let chunk = self.create_chunk_from_node(
385 content,
386 node,
387 chunk_type,
388 &AstLanguage::JavaScript,
389 )?;
390 chunks.push(chunk);
391 }
392 }
393
394 chunks.sort_by_key(|c| c.start_byte);
395 Ok(chunks)
396 }
397
398 fn parse_typescript_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
400 let mut chunks = Vec::new();
401 let root_node = tree.root_node();
402
403 let query_str = r#"
404 (import_statement) @import
405 (export_statement) @export
406 (function_declaration) @function
407 (arrow_function) @arrow_function
408 (class_declaration) @class
409 (interface_declaration) @interface
410 (type_alias_declaration) @type_alias
411 (enum_declaration) @enum
412 (module_declaration) @module
413 (variable_declaration
414 declarations: (variable_declarator
415 name: (identifier) @const_name
416 value: (_) @const_value
417 ) @const_declarator
418 (#match? @const_name "^[A-Z_][A-Z0-9_]*$")
419 ) @constant
420 "#;
421
422 let query = Query::new(AstLanguage::TypeScript.tree_sitter_language(), query_str)
423 .map_err(|e| ScribeError::parse(format!("Invalid TypeScript query: {}", e)))?;
424
425 let mut cursor = QueryCursor::new();
426 let captures = cursor.matches(&query, root_node, content.as_bytes());
427
428 for match_ in captures {
429 for capture in match_.captures {
430 let node = capture.node;
431 let chunk_type = &query.capture_names()[capture.index as usize];
432
433 let chunk = self.create_chunk_from_node(
434 content,
435 node,
436 chunk_type,
437 &AstLanguage::TypeScript,
438 )?;
439 chunks.push(chunk);
440 }
441 }
442
443 chunks.sort_by_key(|c| c.start_byte);
444 Ok(chunks)
445 }
446
447 fn parse_go_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
449 let mut chunks = Vec::new();
450 let root_node = tree.root_node();
451
452 let query_str = r#"
453 (package_clause) @package
454 (import_declaration) @import
455 (function_declaration) @function
456 (method_declaration) @method
457 (type_declaration) @type
458 (const_declaration) @const
459 (var_declaration) @var
460 "#;
461
462 let query = Query::new(AstLanguage::Go.tree_sitter_language(), query_str)
463 .map_err(|e| ScribeError::parse(format!("Invalid Go query: {}", e)))?;
464
465 let mut cursor = QueryCursor::new();
466 let captures = cursor.matches(&query, root_node, content.as_bytes());
467
468 for match_ in captures {
469 for capture in match_.captures {
470 let node = capture.node;
471 let chunk_type = &query.capture_names()[capture.index as usize];
472
473 let chunk =
474 self.create_chunk_from_node(content, node, chunk_type, &AstLanguage::Go)?;
475 chunks.push(chunk);
476 }
477 }
478
479 chunks.sort_by_key(|c| c.start_byte);
480 Ok(chunks)
481 }
482
483 fn parse_rust_chunks(&self, content: &str, tree: &Tree) -> Result<Vec<AstChunk>> {
485 let mut chunks = Vec::new();
486 let root_node = tree.root_node();
487
488 let query_str = r#"
489 (use_declaration) @use
490 (mod_item) @mod
491 (struct_item) @struct
492 (enum_item) @enum
493 (trait_item) @trait
494 (impl_item) @impl
495 (function_item) @function
496 (const_item) @const
497 (static_item) @static
498 (type_item) @type_alias
499 "#;
500
501 let query = Query::new(AstLanguage::Rust.tree_sitter_language(), query_str)
502 .map_err(|e| ScribeError::parse(format!("Invalid Rust query: {}", e)))?;
503
504 let mut cursor = QueryCursor::new();
505 let captures = cursor.matches(&query, root_node, content.as_bytes());
506
507 for match_ in captures {
508 for capture in match_.captures {
509 let node = capture.node;
510 let chunk_type = &query.capture_names()[capture.index as usize];
511
512 let chunk =
513 self.create_chunk_from_node(content, node, chunk_type, &AstLanguage::Rust)?;
514 chunks.push(chunk);
515 }
516 }
517
518 chunks.sort_by_key(|c| c.start_byte);
519 Ok(chunks)
520 }
521
522 fn create_chunk_from_node(
524 &self,
525 content: &str,
526 node: Node,
527 chunk_type: &str,
528 language: &AstLanguage,
529 ) -> Result<AstChunk> {
530 let start_byte = node.start_byte();
531 let end_byte = node.end_byte();
532 let start_position = node.start_position();
533 let end_position = node.end_position();
534
535 let chunk_content = &content[start_byte..end_byte];
536 let estimated_tokens = TokenCounter::global()
537 .count_tokens(chunk_content)
538 .unwrap_or_else(|_| token_utils::estimate_tokens_legacy(chunk_content));
539
540 let importance_score = self.calculate_importance_score(chunk_type, language, node, content);
542
543 let name = self.extract_name_from_node(node, content);
545
546 let is_public = self.is_node_public(node, content);
548
549 let has_documentation = self.has_documentation(node, content);
551
552 let dependencies = self.extract_dependencies(node, content);
554
555 Ok(AstChunk {
556 content: chunk_content.to_string(),
557 chunk_type: chunk_type.to_string(),
558 start_line: start_position.row + 1,
559 end_line: end_position.row + 1,
560 start_byte,
561 end_byte,
562 importance_score,
563 estimated_tokens,
564 dependencies,
565 name,
566 is_public,
567 has_documentation,
568 })
569 }
570
571 fn calculate_importance_score(
573 &self,
574 chunk_type: &str,
575 language: &AstLanguage,
576 node: Node,
577 content: &str,
578 ) -> f64 {
579 let mut score: f64 = match chunk_type {
580 "import" | "import_from" | "use" => 0.9, "package" => 0.95, "class" | "struct_item" | "trait_item" => 0.85, "interface" | "type_alias" | "enum" => 0.8, "function" | "method" => 0.75, "const" | "constant" | "static" => 0.6, "export" => 0.7, "mod" | "module" => 0.65, _ => 0.5, };
590
591 if self.is_node_public(node, content) {
593 score += 0.1;
594 }
595
596 if self.has_documentation(node, content) {
598 score += 0.05;
599 }
600
601 match language {
603 AstLanguage::Rust => {
604 if chunk_type == "impl" {
606 score = 0.85;
607 }
608 }
609 AstLanguage::TypeScript => {
610 if chunk_type == "interface" {
612 score = 0.9;
613 }
614 }
615 _ => {}
616 }
617
618 score.min(1.0)
619 }
620
621 fn extract_name_from_node(&self, node: Node, content: &str) -> Option<String> {
623 for i in 0..node.child_count() {
625 if let Some(child) = node.child(i) {
626 if child.kind() == "identifier" || child.kind() == "type_identifier" {
627 let name_bytes = &content.as_bytes()[child.start_byte()..child.end_byte()];
628 if let Ok(name) = std::str::from_utf8(name_bytes) {
629 return Some(name.to_string());
630 }
631 }
632 }
633 }
634 None
635 }
636
637 fn is_node_public(&self, node: Node, content: &str) -> bool {
639 if let Some(parent) = node.parent() {
641 for i in 0..parent.child_count() {
642 if let Some(child) = parent.child(i) {
643 if child.kind() == "visibility_modifier" {
644 let vis_bytes = &content.as_bytes()[child.start_byte()..child.end_byte()];
645 if let Ok(vis) = std::str::from_utf8(vis_bytes) {
646 return vis.contains("pub");
647 }
648 }
649 }
650 }
651 }
652
653 let node_text = &content[node.start_byte()..node.end_byte()];
655 node_text.starts_with("export") || node_text.contains("export")
656 }
657
658 fn has_documentation(&self, node: Node, content: &str) -> bool {
660 if let Some(prev_sibling) = node.prev_sibling() {
662 if prev_sibling.kind() == "comment" {
663 return true;
664 }
665 }
666
667 if node.kind() == "function_definition" || node.kind() == "class_definition" {
669 for i in 0..node.child_count() {
670 if let Some(child) = node.child(i) {
671 if child.kind() == "expression_statement" {
672 if let Some(grandchild) = child.child(0) {
673 if grandchild.kind() == "string" {
674 let string_content =
675 &content[grandchild.start_byte()..grandchild.end_byte()];
676 if string_content.starts_with("\"\"\"")
677 || string_content.starts_with("'''")
678 {
679 return true;
680 }
681 }
682 }
683 }
684 }
685 }
686 }
687
688 false
689 }
690
691 fn extract_dependencies(&self, node: Node, content: &str) -> Vec<String> {
693 let mut dependencies = Vec::new();
694
695 if node.kind() == "import_statement"
697 || node.kind() == "import_from_statement"
698 || node.kind() == "use_declaration"
699 {
700 let import_text = &content[node.start_byte()..node.end_byte()];
703
704 let mut in_quote = false;
706 let mut quote_char = '"';
707 let mut current_module = String::new();
708
709 for ch in import_text.chars() {
710 if ch == '"' || ch == '\'' {
711 if !in_quote {
712 in_quote = true;
713 quote_char = ch;
714 } else if ch == quote_char {
715 in_quote = false;
716 if !current_module.is_empty() {
717 dependencies.push(current_module.clone());
718 current_module.clear();
719 }
720 }
721 } else if in_quote {
722 current_module.push(ch);
723 }
724 }
725 }
726
727 dependencies
728 }
729
730 fn extract_python_signatures(&self, content: &str, tree: &Tree) -> Result<Vec<AstSignature>> {
732 let mut signatures = Vec::new();
733 let root_node = tree.root_node();
734
735 let query_str = r#"
736 (function_definition
737 name: (identifier) @func_name
738 parameters: (parameters) @func_params
739 ) @function
740 (class_definition
741 name: (identifier) @class_name
742 ) @class
743 (import_statement) @import
744 (import_from_statement) @import_from
745 "#;
746
747 let query = Query::new(AstLanguage::Python.tree_sitter_language(), query_str)
748 .map_err(|e| ScribeError::parse(format!("Invalid Python signature query: {}", e)))?;
749
750 let mut cursor = QueryCursor::new();
751 let captures = cursor.matches(&query, root_node, content.as_bytes());
752
753 for match_ in captures {
754 let signature = self.extract_signature_from_match(content, &match_, &query)?;
755 signatures.push(signature);
756 }
757
758 Ok(signatures)
759 }
760
761 fn extract_javascript_signatures(
763 &self,
764 content: &str,
765 tree: &Tree,
766 ) -> Result<Vec<AstSignature>> {
767 let query_str = r#"
768 (function_declaration
769 name: (identifier) @name
770 ) @function
771
772 (arrow_function) @function
773
774 (class_declaration
775 name: (identifier) @name
776 ) @class
777
778 (import_statement) @import
779 (export_statement) @export
780 "#;
781
782 let query =
783 Query::new(AstLanguage::JavaScript.tree_sitter_language(), query_str).map_err(|e| {
784 ScribeError::parse(format!("Invalid JavaScript signature query: {}", e))
785 })?;
786
787 let root_node = tree.root_node();
788 let mut cursor = tree_sitter::QueryCursor::new();
789 let matches = cursor.matches(&query, root_node, content.as_bytes());
790
791 let mut signatures = Vec::new();
792 for match_ in matches {
793 let signature = self.extract_signature_from_match(content, &match_, &query)?;
794 signatures.push(signature);
795 }
796
797 Ok(signatures)
798 }
799
800 fn extract_typescript_signatures(
801 &self,
802 content: &str,
803 tree: &Tree,
804 ) -> Result<Vec<AstSignature>> {
805 let query_str = r#"
806 (function_declaration
807 name: (identifier) @name
808 ) @function
809
810 (interface_declaration
811 name: (type_identifier) @name
812 ) @interface
813
814 (type_alias_declaration
815 name: (type_identifier) @name
816 ) @type
817
818 (class_declaration
819 name: (identifier) @name
820 ) @class
821
822 (import_statement) @import
823 (export_statement) @export
824 "#;
825
826 let query =
827 Query::new(AstLanguage::TypeScript.tree_sitter_language(), query_str).map_err(|e| {
828 ScribeError::parse(format!("Invalid TypeScript signature query: {}", e))
829 })?;
830
831 let root_node = tree.root_node();
832 let mut cursor = tree_sitter::QueryCursor::new();
833 let matches = cursor.matches(&query, root_node, content.as_bytes());
834
835 let mut signatures = Vec::new();
836 for match_ in matches {
837 let signature = self.extract_signature_from_match(content, &match_, &query)?;
838 signatures.push(signature);
839 }
840
841 Ok(signatures)
842 }
843
844 fn extract_go_signatures(&self, content: &str, tree: &Tree) -> Result<Vec<AstSignature>> {
845 let query_str = r#"
846 (function_declaration
847 name: (identifier) @name
848 ) @function
849
850 (type_declaration
851 (type_spec
852 name: (type_identifier) @name
853 )
854 ) @type
855
856 (import_declaration) @import
857 (package_clause) @package
858 "#;
859
860 let query = Query::new(AstLanguage::Go.tree_sitter_language(), query_str)
861 .map_err(|e| ScribeError::parse(format!("Invalid Go signature query: {}", e)))?;
862
863 let root_node = tree.root_node();
864 let mut cursor = tree_sitter::QueryCursor::new();
865 let matches = cursor.matches(&query, root_node, content.as_bytes());
866
867 let mut signatures = Vec::new();
868 for match_ in matches {
869 let signature = self.extract_signature_from_match(content, &match_, &query)?;
870 signatures.push(signature);
871 }
872
873 Ok(signatures)
874 }
875
876 fn extract_rust_signatures(&self, content: &str, tree: &Tree) -> Result<Vec<AstSignature>> {
877 let query_str = r#"
878 (function_item
879 name: (identifier) @name
880 ) @function
881
882 (impl_item
883 type: (type_identifier) @type_name
884 ) @impl
885
886 (struct_item
887 name: (type_identifier) @name
888 ) @struct
889
890 (enum_item
891 name: (type_identifier) @name
892 ) @enum
893
894 (trait_item
895 name: (type_identifier) @name
896 ) @trait
897
898 (mod_item
899 name: (identifier) @name
900 ) @module
901
902 (use_declaration) @use
903 "#;
904
905 let query = Query::new(AstLanguage::Rust.tree_sitter_language(), query_str)
906 .map_err(|e| ScribeError::parse(format!("Invalid Rust signature query: {}", e)))?;
907
908 let root_node = tree.root_node();
909 let mut cursor = tree_sitter::QueryCursor::new();
910 let matches = cursor.matches(&query, root_node, content.as_bytes());
911
912 let mut signatures = Vec::new();
913 for match_ in matches {
914 let signature = self.extract_signature_from_match(content, &match_, &query)?;
915 signatures.push(signature);
916 }
917
918 Ok(signatures)
919 }
920
921 fn extract_signature_from_match(
923 &self,
924 content: &str,
925 match_: &tree_sitter::QueryMatch,
926 query: &Query,
927 ) -> Result<AstSignature> {
928 let mut signature_text = String::new();
929 let mut signature_type = String::new();
930 let mut name = String::new();
931 let mut line = 0;
932
933 for capture in match_.captures {
934 let capture_name = &query.capture_names()[capture.index as usize];
935 let node = capture.node;
936 let node_text = &content[node.start_byte()..node.end_byte()];
937
938 match capture_name.as_str() {
939 "function" | "class" | "import" | "import_from" => {
940 signature_text = node_text.lines().next().unwrap_or("").to_string();
941 signature_type = capture_name.to_string();
942 line = node.start_position().row + 1;
943 }
944 "func_name" | "class_name" => {
945 name = node_text.to_string();
946 }
947 _ => {}
948 }
949 }
950
951 Ok(AstSignature {
952 signature: signature_text,
953 signature_type,
954 name,
955 parameters: Vec::new(), return_type: None, is_public: false, line,
959 })
960 }
961
962 fn extract_python_import_node(
964 &self,
965 node: Node,
966 content: &str,
967 imports: &mut Vec<AstImport>,
968 ) -> Result<()> {
969 if node.kind() == "import_statement" {
971 for i in 0..node.child_count() {
973 if let Some(child) = node.child(i) {
974 if child.kind() == "aliased_import" {
975 if let Some(name_node) = child.child_by_field_name("name") {
977 let module = self.node_text(name_node, content);
978 let alias = child
979 .child_by_field_name("alias")
980 .map(|alias_node| self.node_text(alias_node, content));
981 let line_number = name_node.start_position().row + 1;
982
983 imports.push(AstImport {
984 module,
985 alias,
986 items: vec![],
987 line_number,
988 is_relative: false,
989 });
990 }
991 } else if child.kind() == "dotted_as_name" {
992 if let Some(name_node) = child.child_by_field_name("name") {
994 let module = self.node_text(name_node, content);
995 let alias = child
996 .child_by_field_name("alias")
997 .map(|alias_node| self.node_text(alias_node, content));
998 let line_number = name_node.start_position().row + 1;
999
1000 imports.push(AstImport {
1001 module,
1002 alias,
1003 items: vec![],
1004 line_number,
1005 is_relative: false,
1006 });
1007 }
1008 } else if child.kind() == "dotted_name" || child.kind() == "identifier" {
1009 let module = self.node_text(child, content);
1011 let line_number = child.start_position().row + 1;
1012
1013 imports.push(AstImport {
1014 module,
1015 alias: None,
1016 items: vec![],
1017 line_number,
1018 is_relative: false,
1019 });
1020 }
1021 }
1022 }
1023 } else if node.kind() == "import_from_statement" {
1024 let mut module = String::new();
1025 let mut items = Vec::new();
1026 let mut is_relative = false;
1027
1028 if let Some(module_node) = node.child_by_field_name("module_name") {
1029 module = self.node_text(module_node, content);
1030 is_relative = module.starts_with('.');
1031 }
1032
1033 for i in 0..node.child_count() {
1035 if let Some(child) = node.child(i) {
1036 if child.kind() == "import_list" {
1037 for j in 0..child.child_count() {
1038 if let Some(item) = child.child(j) {
1039 if item.kind() == "dotted_name" || item.kind() == "identifier" {
1040 items.push(self.node_text(item, content));
1041 }
1042 }
1043 }
1044 }
1045 }
1046 }
1047
1048 let line_number = node.start_position().row + 1;
1049 imports.push(AstImport {
1050 module,
1051 alias: None,
1052 items,
1053 line_number,
1054 is_relative,
1055 });
1056 }
1057
1058 Ok(())
1059 }
1060
1061 fn extract_js_ts_import_node(
1063 &self,
1064 node: Node,
1065 content: &str,
1066 imports: &mut Vec<AstImport>,
1067 ) -> Result<()> {
1068 if node.kind() == "import_statement" {
1069 let mut module = String::new();
1070 let items = Vec::new();
1071
1072 for i in 0..node.child_count() {
1074 if let Some(child) = node.child(i) {
1075 if child.kind() == "string" {
1076 module = self.node_text(child, content);
1077 module = module.trim_matches('"').trim_matches('\'').to_string();
1079 break;
1080 }
1081 }
1082 }
1083
1084 let line_number = node.start_position().row + 1;
1085 imports.push(AstImport {
1086 module,
1087 alias: None,
1088 items,
1089 line_number,
1090 is_relative: false,
1091 });
1092 }
1093 Ok(())
1094 }
1095
1096 fn extract_go_import_node(
1098 &self,
1099 node: Node,
1100 content: &str,
1101 imports: &mut Vec<AstImport>,
1102 ) -> Result<()> {
1103 if node.kind() == "import_spec" {
1104 for i in 0..node.child_count() {
1105 if let Some(child) = node.child(i) {
1106 if child.kind() == "interpreted_string_literal" {
1107 let module = self.node_text(child, content);
1108 let module = module.trim_matches('"').to_string();
1109 let line_number = child.start_position().row + 1;
1110
1111 imports.push(AstImport {
1112 module,
1113 alias: None,
1114 items: vec![],
1115 line_number,
1116 is_relative: false,
1117 });
1118 }
1119 }
1120 }
1121 }
1122 Ok(())
1123 }
1124
1125 fn extract_rust_import_node(
1127 &self,
1128 node: Node,
1129 content: &str,
1130 imports: &mut Vec<AstImport>,
1131 ) -> Result<()> {
1132 if node.kind() == "use_declaration" {
1133 if let Some(use_tree) = node.child_by_field_name("argument") {
1134 let module = self.node_text(use_tree, content);
1135 let line_number = node.start_position().row + 1;
1136
1137 imports.push(AstImport {
1138 module,
1139 alias: None,
1140 items: vec![],
1141 line_number,
1142 is_relative: false,
1143 });
1144 }
1145 }
1146 Ok(())
1147 }
1148
1149 fn node_text(&self, node: Node, content: &str) -> String {
1151 content[node.start_byte()..node.end_byte()].to_string()
1152 }
1153
1154 pub fn find_entities(
1158 &mut self,
1159 content: &str,
1160 file_path: &str,
1161 query: &EntityQuery,
1162 ) -> Result<Vec<EntityLocation>> {
1163 let chunks = self.parse_chunks(content, file_path)?;
1164 let mut locations = Vec::new();
1165
1166 for chunk in chunks {
1167 if self.matches_query(&chunk, query) {
1168 locations.push(EntityLocation {
1169 file_path: file_path.to_string(),
1170 entity_type: chunk.chunk_type.clone(),
1171 entity_name: chunk.name.clone().unwrap_or_default(),
1172 start_line: chunk.start_line,
1173 end_line: chunk.end_line,
1174 is_public: chunk.is_public,
1175 content: chunk.content.clone(),
1176 });
1177 }
1178 }
1179
1180 Ok(locations)
1181 }
1182
1183 fn matches_query(&self, chunk: &AstChunk, query: &EntityQuery) -> bool {
1185 if let Some(ref entity_type) = query.entity_type {
1187 if !self.chunk_type_matches(entity_type, &chunk.chunk_type) {
1188 return false;
1189 }
1190 }
1191
1192 if let Some(ref name_pattern) = query.name_pattern {
1194 let chunk_name = chunk.name.as_deref().unwrap_or("");
1195 if query.exact_match {
1196 if chunk_name != name_pattern {
1197 return false;
1198 }
1199 } else {
1200 if !chunk_name.to_lowercase().contains(&name_pattern.to_lowercase()) {
1202 return false;
1203 }
1204 }
1205 }
1206
1207 if let Some(public_only) = query.public_only {
1209 if public_only && !chunk.is_public {
1210 return false;
1211 }
1212 }
1213
1214 true
1215 }
1216
1217 fn chunk_type_matches(&self, requested: &EntityType, chunk_type: &str) -> bool {
1219 match requested {
1220 EntityType::Function => matches!(chunk_type, "function" | "method"),
1221 EntityType::Class => matches!(chunk_type, "class" | "struct_item" | "trait_item"),
1222 EntityType::Module => matches!(chunk_type, "mod" | "module" | "package"),
1223 EntityType::Interface => matches!(chunk_type, "interface" | "trait_item"),
1224 EntityType::Constant => matches!(chunk_type, "const" | "constant" | "static"),
1225 EntityType::Any => true,
1226 }
1227 }
1228}
1229
1230#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1232pub enum EntityType {
1233 Function,
1234 Class,
1235 Module,
1236 Interface,
1237 Constant,
1238 Any,
1239}
1240
1241#[derive(Debug, Clone, Serialize, Deserialize)]
1243pub struct EntityQuery {
1244 pub entity_type: Option<EntityType>,
1246 pub name_pattern: Option<String>,
1248 pub exact_match: bool,
1250 pub public_only: Option<bool>,
1252}
1253
1254impl EntityQuery {
1255 pub fn by_name(name: &str) -> Self {
1257 Self {
1258 entity_type: None,
1259 name_pattern: Some(name.to_string()),
1260 exact_match: false,
1261 public_only: None,
1262 }
1263 }
1264
1265 pub fn by_type(entity_type: EntityType) -> Self {
1267 Self {
1268 entity_type: Some(entity_type),
1269 name_pattern: None,
1270 exact_match: false,
1271 public_only: None,
1272 }
1273 }
1274
1275 pub fn function(name: &str) -> Self {
1277 Self {
1278 entity_type: Some(EntityType::Function),
1279 name_pattern: Some(name.to_string()),
1280 exact_match: false,
1281 public_only: None,
1282 }
1283 }
1284
1285 pub fn class(name: &str) -> Self {
1287 Self {
1288 entity_type: Some(EntityType::Class),
1289 name_pattern: Some(name.to_string()),
1290 exact_match: false,
1291 public_only: None,
1292 }
1293 }
1294
1295 pub fn module(path: &str) -> Self {
1297 Self {
1298 entity_type: Some(EntityType::Module),
1299 name_pattern: Some(path.to_string()),
1300 exact_match: false,
1301 public_only: None,
1302 }
1303 }
1304
1305 pub fn exact(mut self) -> Self {
1307 self.exact_match = true;
1308 self
1309 }
1310
1311 pub fn public(mut self) -> Self {
1313 self.public_only = Some(true);
1314 self
1315 }
1316}
1317
1318#[derive(Debug, Clone, Serialize, Deserialize)]
1320pub struct EntityLocation {
1321 pub file_path: String,
1323 pub entity_type: String,
1325 pub entity_name: String,
1327 pub start_line: usize,
1329 pub end_line: usize,
1331 pub is_public: bool,
1333 pub content: String,
1335}
1336
1337impl EntityLocation {
1338 pub fn identifier(&self) -> String {
1340 format!("{}::{}", self.file_path, self.entity_name)
1341 }
1342}
1343
1344impl Default for AstParser {
1345 fn default() -> Self {
1346 Self::new().expect("Failed to create AstParser")
1347 }
1348}
1349
1350#[cfg(test)]
1351mod tests {
1352 use super::*;
1353
1354 #[test]
1355 fn test_ast_parser_creation() {
1356 let parser = AstParser::new();
1357 assert!(parser.is_ok());
1358 }
1359
1360 #[test]
1361 fn test_language_detection() {
1362 assert_eq!(AstLanguage::from_extension("py"), Some(AstLanguage::Python));
1363 assert_eq!(
1364 AstLanguage::from_extension("js"),
1365 Some(AstLanguage::JavaScript)
1366 );
1367 assert_eq!(
1368 AstLanguage::from_extension("ts"),
1369 Some(AstLanguage::TypeScript)
1370 );
1371 assert_eq!(AstLanguage::from_extension("go"), Some(AstLanguage::Go));
1372 assert_eq!(AstLanguage::from_extension("rs"), Some(AstLanguage::Rust));
1373 assert_eq!(AstLanguage::from_extension("unknown"), None);
1374 }
1375
1376 #[test]
1377 fn test_python_parsing() {
1378 let mut parser = AstParser::new().unwrap();
1379 let content = r#"
1380import os
1381import sys
1382
1383def hello_world():
1384 """A simple function."""
1385 print("Hello, world!")
1386
1387class Calculator:
1388 """A simple calculator."""
1389
1390 def add(self, a, b):
1391 return a + b
1392"#;
1393
1394 let chunks = parser.parse_chunks(content, "test.py").unwrap();
1395 assert!(!chunks.is_empty());
1396
1397 let chunk_types: Vec<&str> = chunks.iter().map(|c| c.chunk_type.as_str()).collect();
1399 assert!(chunk_types.contains(&"import"));
1400 assert!(chunk_types.contains(&"function"));
1401 assert!(chunk_types.contains(&"class"));
1402 }
1403
1404 #[test]
1405 fn test_rust_parsing() {
1406 let mut parser = AstParser::new().unwrap();
1407 let content = r#"
1408use std::collections::HashMap;
1409
1410pub struct DataProcessor {
1411 data: HashMap<String, i32>,
1412}
1413
1414impl DataProcessor {
1415 pub fn new() -> Self {
1416 Self {
1417 data: HashMap::new(),
1418 }
1419 }
1420}
1421"#;
1422
1423 let chunks = parser.parse_chunks(content, "test.rs").unwrap();
1424 assert!(!chunks.is_empty());
1425
1426 let chunk_types: Vec<&str> = chunks.iter().map(|c| c.chunk_type.as_str()).collect();
1427 assert!(chunk_types.contains(&"use"));
1428 assert!(chunk_types.contains(&"struct"));
1429 assert!(chunk_types.contains(&"impl"));
1430 }
1431
1432 #[test]
1433 fn test_signature_extraction() {
1434 let mut parser = AstParser::new().unwrap();
1435 let content = r#"
1436def calculate(a: int, b: int) -> int:
1437 return a + b
1438
1439class Calculator:
1440 def multiply(self, x, y):
1441 return x * y
1442"#;
1443
1444 let signatures = parser.extract_signatures(content, "test.py").unwrap();
1445 assert!(!signatures.is_empty());
1446 }
1447}