1use std::path::PathBuf;
2
3use argus_core::ArgusError;
4use tree_sitter::{Node, Parser};
5
6use crate::walker::{Language, SourceFile};
7
8#[derive(Debug, Clone)]
27pub struct Symbol {
28 pub name: String,
30 pub kind: SymbolKind,
32 pub file: PathBuf,
34 pub line: u32,
36 pub signature: String,
38 pub token_cost: usize,
40}
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
53pub enum SymbolKind {
54 Function,
55 Method,
56 Struct,
57 Enum,
58 Trait,
59 Impl,
60 Class,
61 Interface,
62 Module,
63}
64
65#[derive(Debug, Clone)]
82pub struct Reference {
83 pub from_file: PathBuf,
85 pub from_symbol: Option<String>,
87 pub to_name: String,
89 pub line: u32,
91}
92
93pub fn extract_symbols(file: &SourceFile) -> Result<Vec<Symbol>, ArgusError> {
119 let Some(ts_language) = file.language.tree_sitter_language() else {
120 return Ok(Vec::new());
121 };
122
123 let mut parser = Parser::new();
124 parser
125 .set_language(&ts_language)
126 .map_err(|e| ArgusError::Parse(format!("failed to set language: {e}")))?;
127
128 let Some(tree) = parser.parse(&file.content, None) else {
129 return Ok(Vec::new());
130 };
131
132 let mut symbols = Vec::new();
133 let source = file.content.as_bytes();
134 collect_symbols(
135 tree.root_node(),
136 source,
137 &file.path,
138 file.language,
139 false,
140 &mut symbols,
141 );
142
143 Ok(symbols)
144}
145
146pub fn extract_references(file: &SourceFile) -> Result<Vec<Reference>, ArgusError> {
168 let Some(ts_language) = file.language.tree_sitter_language() else {
169 return Ok(Vec::new());
170 };
171
172 let mut parser = Parser::new();
173 parser
174 .set_language(&ts_language)
175 .map_err(|e| ArgusError::Parse(format!("failed to set language: {e}")))?;
176
177 let Some(tree) = parser.parse(&file.content, None) else {
178 return Ok(Vec::new());
179 };
180
181 let mut refs = Vec::new();
182 collect_references(
183 tree.root_node(),
184 file.content.as_bytes(),
185 &file.path,
186 &None,
187 &mut refs,
188 );
189
190 Ok(refs)
191}
192
193fn collect_symbols(
194 node: Node,
195 source: &[u8],
196 file: &PathBuf,
197 language: Language,
198 inside_impl: bool,
199 symbols: &mut Vec<Symbol>,
200) {
201 match language {
202 Language::Rust => collect_rust_symbols(node, source, file, inside_impl, symbols),
203 Language::Python => collect_python_symbols(node, source, file, false, symbols),
204 Language::TypeScript | Language::JavaScript => {
205 collect_js_ts_symbols(node, source, file, false, symbols);
206 }
207 Language::Go => collect_go_symbols(node, source, file, symbols),
208 Language::Unknown => {}
209 }
210}
211
212fn collect_rust_symbols(
213 node: Node,
214 source: &[u8],
215 file: &PathBuf,
216 inside_impl: bool,
217 symbols: &mut Vec<Symbol>,
218) {
219 let kind_str = node.kind();
220
221 match kind_str {
222 "function_item" => {
223 if let Some(name) = find_child_text(&node, "identifier", source) {
224 let sig = extract_signature(&node, source);
225 let kind = if inside_impl {
226 SymbolKind::Method
227 } else {
228 SymbolKind::Function
229 };
230 symbols.push(Symbol {
231 name,
232 kind,
233 file: file.clone(),
234 line: node.start_position().row as u32 + 1,
235 token_cost: sig.len() / 4,
236 signature: sig,
237 });
238 }
239 }
240 "struct_item" => {
241 if let Some(name) = find_child_text(&node, "type_identifier", source) {
242 let sig = extract_signature(&node, source);
243 symbols.push(Symbol {
244 name,
245 kind: SymbolKind::Struct,
246 file: file.clone(),
247 line: node.start_position().row as u32 + 1,
248 token_cost: sig.len() / 4,
249 signature: sig,
250 });
251 }
252 }
253 "enum_item" => {
254 if let Some(name) = find_child_text(&node, "type_identifier", source) {
255 let sig = extract_signature(&node, source);
256 symbols.push(Symbol {
257 name,
258 kind: SymbolKind::Enum,
259 file: file.clone(),
260 line: node.start_position().row as u32 + 1,
261 token_cost: sig.len() / 4,
262 signature: sig,
263 });
264 }
265 }
266 "trait_item" => {
267 if let Some(name) = find_child_text(&node, "type_identifier", source) {
268 let sig = extract_signature(&node, source);
269 symbols.push(Symbol {
270 name,
271 kind: SymbolKind::Trait,
272 file: file.clone(),
273 line: node.start_position().row as u32 + 1,
274 token_cost: sig.len() / 4,
275 signature: sig,
276 });
277 }
278 }
279 "impl_item" => {
280 if let Some(name) = find_child_text(&node, "type_identifier", source) {
281 let sig = extract_signature(&node, source);
282 symbols.push(Symbol {
283 name: name.clone(),
284 kind: SymbolKind::Impl,
285 file: file.clone(),
286 line: node.start_position().row as u32 + 1,
287 token_cost: sig.len() / 4,
288 signature: sig,
289 });
290 }
291 let mut cursor = node.walk();
293 for child in node.children(&mut cursor) {
294 collect_rust_symbols(child, source, file, true, symbols);
295 }
296 return; }
298 _ => {}
299 }
300
301 let mut cursor = node.walk();
303 for child in node.children(&mut cursor) {
304 collect_rust_symbols(child, source, file, inside_impl, symbols);
305 }
306}
307
308fn collect_python_symbols(
309 node: Node,
310 source: &[u8],
311 file: &PathBuf,
312 inside_class: bool,
313 symbols: &mut Vec<Symbol>,
314) {
315 let kind_str = node.kind();
316
317 match kind_str {
318 "function_definition" => {
319 if let Some(name) = find_child_text(&node, "identifier", source) {
320 let sig = extract_signature(&node, source);
321 let kind = if inside_class {
322 SymbolKind::Method
323 } else {
324 SymbolKind::Function
325 };
326 symbols.push(Symbol {
327 name,
328 kind,
329 file: file.clone(),
330 line: node.start_position().row as u32 + 1,
331 token_cost: sig.len() / 4,
332 signature: sig,
333 });
334 }
335 }
336 "class_definition" => {
337 if let Some(name) = find_child_text(&node, "identifier", source) {
338 let sig = extract_signature(&node, source);
339 symbols.push(Symbol {
340 name,
341 kind: SymbolKind::Class,
342 file: file.clone(),
343 line: node.start_position().row as u32 + 1,
344 token_cost: sig.len() / 4,
345 signature: sig,
346 });
347 }
348 let mut cursor = node.walk();
350 for child in node.children(&mut cursor) {
351 collect_python_symbols(child, source, file, true, symbols);
352 }
353 return;
354 }
355 _ => {}
356 }
357
358 let mut cursor = node.walk();
359 for child in node.children(&mut cursor) {
360 collect_python_symbols(child, source, file, inside_class, symbols);
361 }
362}
363
364fn collect_js_ts_symbols(
365 node: Node,
366 source: &[u8],
367 file: &PathBuf,
368 inside_class: bool,
369 symbols: &mut Vec<Symbol>,
370) {
371 let kind_str = node.kind();
372
373 match kind_str {
374 "function_declaration" => {
375 if let Some(name) = find_child_text(&node, "identifier", source) {
376 let sig = extract_signature(&node, source);
377 symbols.push(Symbol {
378 name,
379 kind: SymbolKind::Function,
380 file: file.clone(),
381 line: node.start_position().row as u32 + 1,
382 token_cost: sig.len() / 4,
383 signature: sig,
384 });
385 }
386 }
387 "class_declaration" => {
388 let name = find_child_text(&node, "type_identifier", source)
389 .or_else(|| find_child_text(&node, "identifier", source));
390 if let Some(name) = name {
391 let sig = extract_signature(&node, source);
392 symbols.push(Symbol {
393 name,
394 kind: SymbolKind::Class,
395 file: file.clone(),
396 line: node.start_position().row as u32 + 1,
397 token_cost: sig.len() / 4,
398 signature: sig,
399 });
400 }
401 let mut cursor = node.walk();
402 for child in node.children(&mut cursor) {
403 collect_js_ts_symbols(child, source, file, true, symbols);
404 }
405 return;
406 }
407 "method_definition" => {
408 if let Some(name) = find_child_text(&node, "property_identifier", source) {
409 let sig = extract_signature(&node, source);
410 symbols.push(Symbol {
411 name,
412 kind: SymbolKind::Method,
413 file: file.clone(),
414 line: node.start_position().row as u32 + 1,
415 token_cost: sig.len() / 4,
416 signature: sig,
417 });
418 }
419 }
420 "lexical_declaration" => {
421 let mut cursor = node.walk();
423 for child in node.children(&mut cursor) {
424 if child.kind() == "variable_declarator" {
425 let has_arrow = child_has_kind(&child, "arrow_function");
426 if has_arrow {
427 if let Some(name) = find_child_text(&child, "identifier", source) {
428 let sig = extract_signature(&node, source);
429 symbols.push(Symbol {
430 name,
431 kind: SymbolKind::Function,
432 file: file.clone(),
433 line: node.start_position().row as u32 + 1,
434 token_cost: sig.len() / 4,
435 signature: sig,
436 });
437 }
438 }
439 }
440 }
441 }
442 _ => {}
443 }
444
445 if !inside_class || kind_str != "class_declaration" {
446 let mut cursor = node.walk();
447 for child in node.children(&mut cursor) {
448 collect_js_ts_symbols(child, source, file, inside_class, symbols);
449 }
450 }
451}
452
453fn collect_go_symbols(node: Node, source: &[u8], file: &PathBuf, symbols: &mut Vec<Symbol>) {
454 let kind_str = node.kind();
455
456 match kind_str {
457 "function_declaration" => {
458 if let Some(name) = find_child_text(&node, "identifier", source) {
459 let sig = extract_signature(&node, source);
460 symbols.push(Symbol {
461 name,
462 kind: SymbolKind::Function,
463 file: file.clone(),
464 line: node.start_position().row as u32 + 1,
465 token_cost: sig.len() / 4,
466 signature: sig,
467 });
468 }
469 }
470 "method_declaration" => {
471 if let Some(name) = find_child_text(&node, "field_identifier", source) {
472 let sig = extract_signature(&node, source);
473 symbols.push(Symbol {
474 name,
475 kind: SymbolKind::Method,
476 file: file.clone(),
477 line: node.start_position().row as u32 + 1,
478 token_cost: sig.len() / 4,
479 signature: sig,
480 });
481 }
482 }
483 "type_declaration" => {
484 let mut cursor = node.walk();
485 for child in node.children(&mut cursor) {
486 if child.kind() == "type_spec" {
487 if let Some(name) = find_child_text(&child, "type_identifier", source) {
488 let has_struct = child_has_kind(&child, "struct_type");
489 let has_interface = child_has_kind(&child, "interface_type");
490 let kind = if has_struct {
491 SymbolKind::Struct
492 } else if has_interface {
493 SymbolKind::Interface
494 } else {
495 continue;
496 };
497 let sig = extract_signature(&child, source);
498 symbols.push(Symbol {
499 name,
500 kind,
501 file: file.clone(),
502 line: child.start_position().row as u32 + 1,
503 token_cost: sig.len() / 4,
504 signature: sig,
505 });
506 }
507 }
508 }
509 }
510 _ => {}
511 }
512
513 let mut cursor = node.walk();
514 for child in node.children(&mut cursor) {
515 collect_go_symbols(child, source, file, symbols);
516 }
517}
518
519fn collect_references(
520 node: Node,
521 source: &[u8],
522 file: &PathBuf,
523 enclosing: &Option<String>,
524 refs: &mut Vec<Reference>,
525) {
526 let kind_str = node.kind();
527
528 let new_enclosing = match kind_str {
530 "function_item" | "function_definition" | "function_declaration" | "method_declaration" => {
531 find_child_text(&node, "identifier", source)
532 .or_else(|| find_child_text(&node, "field_identifier", source))
533 }
534 _ => None,
535 };
536 let current_enclosing = if new_enclosing.is_some() {
537 &new_enclosing
538 } else {
539 enclosing
540 };
541
542 if kind_str == "identifier" || kind_str == "type_identifier" {
544 let parent_kind = node.parent().map(|p| p.kind().to_string());
545 let is_definition = matches!(
546 parent_kind.as_deref(),
547 Some(
548 "function_item"
549 | "function_definition"
550 | "function_declaration"
551 | "struct_item"
552 | "enum_item"
553 | "trait_item"
554 | "class_definition"
555 | "class_declaration"
556 | "method_definition"
557 | "variable_declarator"
558 | "type_spec"
559 )
560 );
561
562 if !is_definition {
563 let name = node_text(&node, source);
564 if !name.is_empty() {
565 refs.push(Reference {
566 from_file: file.clone(),
567 from_symbol: current_enclosing.clone(),
568 to_name: name,
569 line: node.start_position().row as u32 + 1,
570 });
571 }
572 }
573 }
574
575 let mut cursor = node.walk();
576 for child in node.children(&mut cursor) {
577 collect_references(child, source, file, current_enclosing, refs);
578 }
579}
580
581fn extract_signature(node: &Node, source: &[u8]) -> String {
583 let text = node_text(node, source);
584
585 let sig = if let Some(pos) = text.find('{') {
587 &text[..pos]
588 } else if let Some(pos) = text.find(':') {
589 &text[..pos]
591 } else {
592 &text
593 };
594
595 let collapsed: String = sig.split_whitespace().collect::<Vec<_>>().join(" ");
597
598 collapsed
599}
600
601fn node_text(node: &Node, source: &[u8]) -> String {
602 let start = node.start_byte();
603 let end = node.end_byte();
604 if start >= source.len() || end > source.len() {
605 return String::new();
606 }
607 String::from_utf8_lossy(&source[start..end]).to_string()
608}
609
610fn find_child_text(node: &Node, kind: &str, source: &[u8]) -> Option<String> {
611 let mut cursor = node.walk();
612 for child in node.children(&mut cursor) {
613 if child.kind() == kind {
614 let text = node_text(&child, source);
615 if !text.is_empty() {
616 return Some(text);
617 }
618 }
619 }
620 None
621}
622
623fn child_has_kind(node: &Node, kind: &str) -> bool {
624 let mut cursor = node.walk();
625 for child in node.children(&mut cursor) {
626 if child.kind() == kind {
627 return true;
628 }
629 }
630 false
631}
632
633#[cfg(test)]
634mod tests {
635 use super::*;
636
637 fn make_rust_file() -> SourceFile {
638 SourceFile {
639 path: PathBuf::from("src/lib.rs"),
640 language: Language::Rust,
641 content: r#"
642pub fn top_level(x: i32) -> bool {
643 x > 0
644}
645
646pub struct Config {
647 name: String,
648 value: u32,
649}
650
651pub enum Color {
652 Red,
653 Green,
654 Blue,
655}
656
657pub trait Drawable {
658 fn draw(&self);
659}
660
661impl Config {
662 pub fn new(name: String) -> Self {
663 Self { name, value: 0 }
664 }
665}
666"#
667 .to_string(),
668 }
669 }
670
671 fn make_python_file() -> SourceFile {
672 SourceFile {
673 path: PathBuf::from("app.py"),
674 language: Language::Python,
675 content: r#"
676def standalone():
677 pass
678
679class MyClass:
680 def method(self):
681 pass
682
683 def another(self, x):
684 return x + 1
685"#
686 .to_string(),
687 }
688 }
689
690 fn make_typescript_file() -> SourceFile {
691 SourceFile {
692 path: PathBuf::from("app.ts"),
693 language: Language::TypeScript,
694 content: r#"
695function greet(name: string): string {
696 return `Hello ${name}`;
697}
698
699class Greeter {
700 sayHello() {
701 console.log("hello");
702 }
703}
704
705const add = (a: number, b: number) => a + b;
706"#
707 .to_string(),
708 }
709 }
710
711 #[test]
712 fn parse_rust_symbols() {
713 let file = make_rust_file();
714 let symbols = extract_symbols(&file).unwrap();
715
716 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
717 assert!(names.contains(&"top_level"), "missing top_level: {names:?}");
718 assert!(
719 names.contains(&"Config"),
720 "missing Config struct: {names:?}"
721 );
722 assert!(names.contains(&"Color"), "missing Color enum: {names:?}");
723 assert!(
724 names.contains(&"Drawable"),
725 "missing Drawable trait: {names:?}"
726 );
727 assert!(names.contains(&"new"), "missing new method: {names:?}");
728
729 let top = symbols.iter().find(|s| s.name == "top_level").unwrap();
731 assert_eq!(top.kind, SymbolKind::Function);
732
733 let config_struct = symbols
734 .iter()
735 .find(|s| s.name == "Config" && s.kind == SymbolKind::Struct)
736 .unwrap();
737 assert_eq!(config_struct.kind, SymbolKind::Struct);
738
739 let new_method = symbols.iter().find(|s| s.name == "new").unwrap();
740 assert_eq!(new_method.kind, SymbolKind::Method);
741
742 assert!(top.signature.contains("fn top_level"));
744 assert!(top.token_cost > 0);
745 }
746
747 #[test]
748 fn parse_python_symbols() {
749 let file = make_python_file();
750 let symbols = extract_symbols(&file).unwrap();
751
752 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
753 assert!(
754 names.contains(&"standalone"),
755 "missing standalone: {names:?}"
756 );
757 assert!(names.contains(&"MyClass"), "missing MyClass: {names:?}");
758 assert!(names.contains(&"method"), "missing method: {names:?}");
759 assert!(names.contains(&"another"), "missing another: {names:?}");
760
761 let standalone = symbols.iter().find(|s| s.name == "standalone").unwrap();
762 assert_eq!(standalone.kind, SymbolKind::Function);
763
764 let method = symbols.iter().find(|s| s.name == "method").unwrap();
765 assert_eq!(method.kind, SymbolKind::Method);
766 }
767
768 #[test]
769 fn parse_typescript_symbols() {
770 let file = make_typescript_file();
771 let symbols = extract_symbols(&file).unwrap();
772
773 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
774 assert!(names.contains(&"greet"), "missing greet: {names:?}");
775 assert!(names.contains(&"Greeter"), "missing Greeter: {names:?}");
776 assert!(names.contains(&"sayHello"), "missing sayHello: {names:?}");
777 assert!(names.contains(&"add"), "missing add arrow fn: {names:?}");
778 }
779
780 #[test]
781 fn parse_empty_file() {
782 let file = SourceFile {
783 path: PathBuf::from("empty.rs"),
784 language: Language::Rust,
785 content: String::new(),
786 };
787 let symbols = extract_symbols(&file).unwrap();
788 assert!(symbols.is_empty());
789 }
790
791 #[test]
792 fn parse_file_with_syntax_errors_gives_partial_results() {
793 let file = SourceFile {
794 path: PathBuf::from("broken.rs"),
795 language: Language::Rust,
796 content: r#"
797fn valid_fn() -> bool { true }
798
799fn broken( {
800
801struct ValidStruct {
802 x: i32,
803}
804"#
805 .to_string(),
806 };
807 let symbols = extract_symbols(&file).unwrap();
808 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
810 assert!(
811 names.contains(&"valid_fn"),
812 "should extract valid symbols despite errors: {names:?}"
813 );
814 }
815
816 #[test]
817 fn extract_references_finds_calls() {
818 let file = SourceFile {
819 path: PathBuf::from("main.rs"),
820 language: Language::Rust,
821 content: r#"
822fn caller() {
823 helper();
824 let x = Config::new();
825}
826
827fn helper() {}
828"#
829 .to_string(),
830 };
831 let refs = extract_references(&file).unwrap();
832 let ref_names: Vec<&str> = refs.iter().map(|r| r.to_name.as_str()).collect();
833 assert!(
834 ref_names.contains(&"helper"),
835 "should find reference to helper: {ref_names:?}"
836 );
837 assert!(
838 ref_names.contains(&"Config"),
839 "should find reference to Config: {ref_names:?}"
840 );
841 }
842}