1use std::path::PathBuf;
2
3use argus_core::ArgusError;
4use tree_sitter::{Node, Parser};
5
6use crate::walker::{Language, SourceFile};
7
8#[derive(Debug, Clone)]
27pub struct Symbol {
28 pub name: String,
30 pub kind: SymbolKind,
32 pub file: PathBuf,
34 pub line: u32,
36 pub signature: String,
38 pub token_cost: usize,
40}
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
53pub enum SymbolKind {
54 Function,
55 Method,
56 Struct,
57 Enum,
58 Trait,
59 Impl,
60 Class,
61 Interface,
62 Module,
63}
64
65#[derive(Debug, Clone)]
82pub struct Reference {
83 pub from_file: PathBuf,
85 pub from_symbol: Option<String>,
87 pub to_name: String,
89 pub line: u32,
91}
92
93pub fn extract_symbols(file: &SourceFile) -> Result<Vec<Symbol>, ArgusError> {
119 let Some(ts_language) = file.language.tree_sitter_language() else {
120 return Ok(Vec::new());
121 };
122
123 let mut parser = Parser::new();
124 parser
125 .set_language(&ts_language)
126 .map_err(|e| ArgusError::Parse(format!("failed to set language: {e}")))?;
127
128 let Some(tree) = parser.parse(&file.content, None) else {
129 return Ok(Vec::new());
130 };
131
132 let mut symbols = Vec::new();
133 let source = file.content.as_bytes();
134 collect_symbols(
135 tree.root_node(),
136 source,
137 &file.path,
138 file.language,
139 false,
140 &mut symbols,
141 );
142
143 Ok(symbols)
144}
145
146pub fn extract_references(file: &SourceFile) -> Result<Vec<Reference>, ArgusError> {
168 let Some(ts_language) = file.language.tree_sitter_language() else {
169 return Ok(Vec::new());
170 };
171
172 let mut parser = Parser::new();
173 parser
174 .set_language(&ts_language)
175 .map_err(|e| ArgusError::Parse(format!("failed to set language: {e}")))?;
176
177 let Some(tree) = parser.parse(&file.content, None) else {
178 return Ok(Vec::new());
179 };
180
181 let mut refs = Vec::new();
182 collect_references(
183 tree.root_node(),
184 file.content.as_bytes(),
185 &file.path,
186 &None,
187 &mut refs,
188 );
189
190 Ok(refs)
191}
192
193fn collect_symbols(
194 node: Node,
195 source: &[u8],
196 file: &PathBuf,
197 language: Language,
198 inside_impl: bool,
199 symbols: &mut Vec<Symbol>,
200) {
201 match language {
202 Language::Rust => collect_rust_symbols(node, source, file, inside_impl, symbols),
203 Language::Python => collect_python_symbols(node, source, file, false, symbols),
204 Language::TypeScript | Language::JavaScript => {
205 collect_js_ts_symbols(node, source, file, false, symbols);
206 }
207 Language::Go => collect_go_symbols(node, source, file, symbols),
208 Language::Java => collect_java_symbols(node, source, file, false, symbols),
209 Language::C => collect_c_symbols(node, source, file, symbols),
210 Language::Cpp => collect_cpp_symbols(node, source, file, false, symbols),
211 Language::Ruby => collect_ruby_symbols(node, source, file, false, symbols),
212 Language::Php => collect_php_symbols(node, source, file, false, symbols),
213 Language::Kotlin => collect_kotlin_symbols(node, source, file, false, symbols),
214 Language::Swift => collect_swift_symbols(node, source, file, false, symbols),
215 Language::Unknown => {}
216 }
217}
218
219fn collect_rust_symbols(
220 node: Node,
221 source: &[u8],
222 file: &PathBuf,
223 inside_impl: bool,
224 symbols: &mut Vec<Symbol>,
225) {
226 let kind_str = node.kind();
227
228 match kind_str {
229 "function_item" => {
230 if let Some(name) = find_child_text(&node, "identifier", source) {
231 let sig = extract_signature(&node, source);
232 let kind = if inside_impl {
233 SymbolKind::Method
234 } else {
235 SymbolKind::Function
236 };
237 symbols.push(Symbol {
238 name,
239 kind,
240 file: file.clone(),
241 line: node.start_position().row as u32 + 1,
242 token_cost: sig.len() / 4,
243 signature: sig,
244 });
245 }
246 }
247 "struct_item" => {
248 if let Some(name) = find_child_text(&node, "type_identifier", source) {
249 let sig = extract_signature(&node, source);
250 symbols.push(Symbol {
251 name,
252 kind: SymbolKind::Struct,
253 file: file.clone(),
254 line: node.start_position().row as u32 + 1,
255 token_cost: sig.len() / 4,
256 signature: sig,
257 });
258 }
259 }
260 "enum_item" => {
261 if let Some(name) = find_child_text(&node, "type_identifier", source) {
262 let sig = extract_signature(&node, source);
263 symbols.push(Symbol {
264 name,
265 kind: SymbolKind::Enum,
266 file: file.clone(),
267 line: node.start_position().row as u32 + 1,
268 token_cost: sig.len() / 4,
269 signature: sig,
270 });
271 }
272 }
273 "trait_item" => {
274 if let Some(name) = find_child_text(&node, "type_identifier", source) {
275 let sig = extract_signature(&node, source);
276 symbols.push(Symbol {
277 name,
278 kind: SymbolKind::Trait,
279 file: file.clone(),
280 line: node.start_position().row as u32 + 1,
281 token_cost: sig.len() / 4,
282 signature: sig,
283 });
284 }
285 }
286 "impl_item" => {
287 if let Some(name) = find_child_text(&node, "type_identifier", source) {
288 let sig = extract_signature(&node, source);
289 symbols.push(Symbol {
290 name: name.clone(),
291 kind: SymbolKind::Impl,
292 file: file.clone(),
293 line: node.start_position().row as u32 + 1,
294 token_cost: sig.len() / 4,
295 signature: sig,
296 });
297 }
298 let mut cursor = node.walk();
300 for child in node.children(&mut cursor) {
301 collect_rust_symbols(child, source, file, true, symbols);
302 }
303 return; }
305 _ => {}
306 }
307
308 let mut cursor = node.walk();
310 for child in node.children(&mut cursor) {
311 collect_rust_symbols(child, source, file, inside_impl, symbols);
312 }
313}
314
315fn collect_python_symbols(
316 node: Node,
317 source: &[u8],
318 file: &PathBuf,
319 inside_class: bool,
320 symbols: &mut Vec<Symbol>,
321) {
322 let kind_str = node.kind();
323
324 match kind_str {
325 "function_definition" => {
326 if let Some(name) = find_child_text(&node, "identifier", source) {
327 let sig = extract_signature(&node, source);
328 let kind = if inside_class {
329 SymbolKind::Method
330 } else {
331 SymbolKind::Function
332 };
333 symbols.push(Symbol {
334 name,
335 kind,
336 file: file.clone(),
337 line: node.start_position().row as u32 + 1,
338 token_cost: sig.len() / 4,
339 signature: sig,
340 });
341 }
342 }
343 "class_definition" => {
344 if let Some(name) = find_child_text(&node, "identifier", source) {
345 let sig = extract_signature(&node, source);
346 symbols.push(Symbol {
347 name,
348 kind: SymbolKind::Class,
349 file: file.clone(),
350 line: node.start_position().row as u32 + 1,
351 token_cost: sig.len() / 4,
352 signature: sig,
353 });
354 }
355 let mut cursor = node.walk();
357 for child in node.children(&mut cursor) {
358 collect_python_symbols(child, source, file, true, symbols);
359 }
360 return;
361 }
362 _ => {}
363 }
364
365 let mut cursor = node.walk();
366 for child in node.children(&mut cursor) {
367 collect_python_symbols(child, source, file, inside_class, symbols);
368 }
369}
370
371fn collect_js_ts_symbols(
372 node: Node,
373 source: &[u8],
374 file: &PathBuf,
375 inside_class: bool,
376 symbols: &mut Vec<Symbol>,
377) {
378 let kind_str = node.kind();
379
380 match kind_str {
381 "function_declaration" => {
382 if let Some(name) = find_child_text(&node, "identifier", source) {
383 let sig = extract_signature(&node, source);
384 symbols.push(Symbol {
385 name,
386 kind: SymbolKind::Function,
387 file: file.clone(),
388 line: node.start_position().row as u32 + 1,
389 token_cost: sig.len() / 4,
390 signature: sig,
391 });
392 }
393 }
394 "class_declaration" => {
395 let name = find_child_text(&node, "type_identifier", source)
396 .or_else(|| find_child_text(&node, "identifier", source));
397 if let Some(name) = name {
398 let sig = extract_signature(&node, source);
399 symbols.push(Symbol {
400 name,
401 kind: SymbolKind::Class,
402 file: file.clone(),
403 line: node.start_position().row as u32 + 1,
404 token_cost: sig.len() / 4,
405 signature: sig,
406 });
407 }
408 let mut cursor = node.walk();
409 for child in node.children(&mut cursor) {
410 collect_js_ts_symbols(child, source, file, true, symbols);
411 }
412 return;
413 }
414 "method_definition" => {
415 if let Some(name) = find_child_text(&node, "property_identifier", source) {
416 let sig = extract_signature(&node, source);
417 symbols.push(Symbol {
418 name,
419 kind: SymbolKind::Method,
420 file: file.clone(),
421 line: node.start_position().row as u32 + 1,
422 token_cost: sig.len() / 4,
423 signature: sig,
424 });
425 }
426 }
427 "lexical_declaration" => {
428 let mut cursor = node.walk();
430 for child in node.children(&mut cursor) {
431 if child.kind() == "variable_declarator" {
432 let has_arrow = child_has_kind(&child, "arrow_function");
433 if has_arrow {
434 if let Some(name) = find_child_text(&child, "identifier", source) {
435 let sig = extract_signature(&node, source);
436 symbols.push(Symbol {
437 name,
438 kind: SymbolKind::Function,
439 file: file.clone(),
440 line: node.start_position().row as u32 + 1,
441 token_cost: sig.len() / 4,
442 signature: sig,
443 });
444 }
445 }
446 }
447 }
448 }
449 _ => {}
450 }
451
452 if !inside_class || kind_str != "class_declaration" {
453 let mut cursor = node.walk();
454 for child in node.children(&mut cursor) {
455 collect_js_ts_symbols(child, source, file, inside_class, symbols);
456 }
457 }
458}
459
460fn collect_go_symbols(node: Node, source: &[u8], file: &PathBuf, symbols: &mut Vec<Symbol>) {
461 let kind_str = node.kind();
462
463 match kind_str {
464 "function_declaration" => {
465 if let Some(name) = find_child_text(&node, "identifier", source) {
466 let sig = extract_signature(&node, source);
467 symbols.push(Symbol {
468 name,
469 kind: SymbolKind::Function,
470 file: file.clone(),
471 line: node.start_position().row as u32 + 1,
472 token_cost: sig.len() / 4,
473 signature: sig,
474 });
475 }
476 }
477 "method_declaration" => {
478 if let Some(name) = find_child_text(&node, "field_identifier", source) {
479 let sig = extract_signature(&node, source);
480 symbols.push(Symbol {
481 name,
482 kind: SymbolKind::Method,
483 file: file.clone(),
484 line: node.start_position().row as u32 + 1,
485 token_cost: sig.len() / 4,
486 signature: sig,
487 });
488 }
489 }
490 "type_declaration" => {
491 let mut cursor = node.walk();
492 for child in node.children(&mut cursor) {
493 if child.kind() == "type_spec" {
494 if let Some(name) = find_child_text(&child, "type_identifier", source) {
495 let has_struct = child_has_kind(&child, "struct_type");
496 let has_interface = child_has_kind(&child, "interface_type");
497 let kind = if has_struct {
498 SymbolKind::Struct
499 } else if has_interface {
500 SymbolKind::Interface
501 } else {
502 continue;
503 };
504 let sig = extract_signature(&child, source);
505 symbols.push(Symbol {
506 name,
507 kind,
508 file: file.clone(),
509 line: child.start_position().row as u32 + 1,
510 token_cost: sig.len() / 4,
511 signature: sig,
512 });
513 }
514 }
515 }
516 }
517 _ => {}
518 }
519
520 let mut cursor = node.walk();
521 for child in node.children(&mut cursor) {
522 collect_go_symbols(child, source, file, symbols);
523 }
524}
525
526fn collect_java_symbols(
527 node: Node,
528 source: &[u8],
529 file: &PathBuf,
530 inside_class: bool,
531 symbols: &mut Vec<Symbol>,
532) {
533 let kind_str = node.kind();
534
535 match kind_str {
536 "method_declaration" | "constructor_declaration" => {
537 if let Some(name) = find_child_text(&node, "identifier", source) {
538 let sig = extract_signature(&node, source);
539 let kind = if inside_class {
540 SymbolKind::Method
541 } else {
542 SymbolKind::Function
543 };
544 symbols.push(Symbol {
545 name,
546 kind,
547 file: file.clone(),
548 line: node.start_position().row as u32 + 1,
549 token_cost: sig.len() / 4,
550 signature: sig,
551 });
552 }
553 }
554 "class_declaration" => {
555 if let Some(name) = find_child_text(&node, "identifier", source) {
556 let sig = extract_signature(&node, source);
557 symbols.push(Symbol {
558 name,
559 kind: SymbolKind::Class,
560 file: file.clone(),
561 line: node.start_position().row as u32 + 1,
562 token_cost: sig.len() / 4,
563 signature: sig,
564 });
565 }
566 let mut cursor = node.walk();
567 for child in node.children(&mut cursor) {
568 collect_java_symbols(child, source, file, true, symbols);
569 }
570 return;
571 }
572 "interface_declaration" => {
573 if let Some(name) = find_child_text(&node, "identifier", source) {
574 let sig = extract_signature(&node, source);
575 symbols.push(Symbol {
576 name,
577 kind: SymbolKind::Interface,
578 file: file.clone(),
579 line: node.start_position().row as u32 + 1,
580 token_cost: sig.len() / 4,
581 signature: sig,
582 });
583 }
584 let mut cursor = node.walk();
585 for child in node.children(&mut cursor) {
586 collect_java_symbols(child, source, file, true, symbols);
587 }
588 return;
589 }
590 "enum_declaration" => {
591 if let Some(name) = find_child_text(&node, "identifier", source) {
592 let sig = extract_signature(&node, source);
593 symbols.push(Symbol {
594 name,
595 kind: SymbolKind::Enum,
596 file: file.clone(),
597 line: node.start_position().row as u32 + 1,
598 token_cost: sig.len() / 4,
599 signature: sig,
600 });
601 }
602 }
603 _ => {}
604 }
605
606 let mut cursor = node.walk();
607 for child in node.children(&mut cursor) {
608 collect_java_symbols(child, source, file, inside_class, symbols);
609 }
610}
611
612fn collect_c_symbols(node: Node, source: &[u8], file: &PathBuf, symbols: &mut Vec<Symbol>) {
613 let kind_str = node.kind();
614
615 match kind_str {
616 "function_definition" | "declaration" => {
617 if kind_str == "declaration" {
619 let has_func = child_has_kind(&node, "function_declarator");
620 if !has_func {
621 let mut cursor = node.walk();
623 for child in node.children(&mut cursor) {
624 collect_c_symbols(child, source, file, symbols);
625 }
626 return;
627 }
628 }
629 if let Some(name) = find_nested_function_name(&node, source) {
631 let sig = extract_signature(&node, source);
632 symbols.push(Symbol {
633 name,
634 kind: SymbolKind::Function,
635 file: file.clone(),
636 line: node.start_position().row as u32 + 1,
637 token_cost: sig.len() / 4,
638 signature: sig,
639 });
640 }
641 }
642 "struct_specifier" => {
643 if let Some(name) = find_child_text(&node, "type_identifier", source) {
644 let sig = extract_signature(&node, source);
645 symbols.push(Symbol {
646 name,
647 kind: SymbolKind::Struct,
648 file: file.clone(),
649 line: node.start_position().row as u32 + 1,
650 token_cost: sig.len() / 4,
651 signature: sig,
652 });
653 }
654 }
655 "enum_specifier" => {
656 if let Some(name) = find_child_text(&node, "type_identifier", source) {
657 let sig = extract_signature(&node, source);
658 symbols.push(Symbol {
659 name,
660 kind: SymbolKind::Enum,
661 file: file.clone(),
662 line: node.start_position().row as u32 + 1,
663 token_cost: sig.len() / 4,
664 signature: sig,
665 });
666 }
667 }
668 _ => {}
669 }
670
671 let mut cursor = node.walk();
672 for child in node.children(&mut cursor) {
673 collect_c_symbols(child, source, file, symbols);
674 }
675}
676
677fn collect_cpp_symbols(
678 node: Node,
679 source: &[u8],
680 file: &PathBuf,
681 inside_class: bool,
682 symbols: &mut Vec<Symbol>,
683) {
684 let kind_str = node.kind();
685
686 match kind_str {
687 "function_definition" => {
688 if let Some(name) = find_nested_function_name(&node, source)
689 .or_else(|| find_child_text(&node, "identifier", source))
690 {
691 let sig = extract_signature(&node, source);
692 let kind = if inside_class {
693 SymbolKind::Method
694 } else {
695 SymbolKind::Function
696 };
697 symbols.push(Symbol {
698 name,
699 kind,
700 file: file.clone(),
701 line: node.start_position().row as u32 + 1,
702 token_cost: sig.len() / 4,
703 signature: sig,
704 });
705 }
706 }
707 "class_specifier" => {
708 if let Some(name) = find_child_text(&node, "type_identifier", source) {
709 let sig = extract_signature(&node, source);
710 symbols.push(Symbol {
711 name,
712 kind: SymbolKind::Class,
713 file: file.clone(),
714 line: node.start_position().row as u32 + 1,
715 token_cost: sig.len() / 4,
716 signature: sig,
717 });
718 }
719 let mut cursor = node.walk();
720 for child in node.children(&mut cursor) {
721 collect_cpp_symbols(child, source, file, true, symbols);
722 }
723 return;
724 }
725 "struct_specifier" => {
726 if let Some(name) = find_child_text(&node, "type_identifier", source) {
727 let sig = extract_signature(&node, source);
728 symbols.push(Symbol {
729 name,
730 kind: SymbolKind::Struct,
731 file: file.clone(),
732 line: node.start_position().row as u32 + 1,
733 token_cost: sig.len() / 4,
734 signature: sig,
735 });
736 }
737 }
738 "enum_specifier" => {
739 if let Some(name) = find_child_text(&node, "type_identifier", source) {
740 let sig = extract_signature(&node, source);
741 symbols.push(Symbol {
742 name,
743 kind: SymbolKind::Enum,
744 file: file.clone(),
745 line: node.start_position().row as u32 + 1,
746 token_cost: sig.len() / 4,
747 signature: sig,
748 });
749 }
750 }
751 _ => {}
752 }
753
754 let mut cursor = node.walk();
755 for child in node.children(&mut cursor) {
756 collect_cpp_symbols(child, source, file, inside_class, symbols);
757 }
758}
759
760fn collect_ruby_symbols(
761 node: Node,
762 source: &[u8],
763 file: &PathBuf,
764 inside_class: bool,
765 symbols: &mut Vec<Symbol>,
766) {
767 let kind_str = node.kind();
768
769 match kind_str {
770 "method" => {
771 if let Some(name) = find_child_text(&node, "identifier", source) {
772 let sig = extract_signature(&node, source);
773 let kind = if inside_class {
774 SymbolKind::Method
775 } else {
776 SymbolKind::Function
777 };
778 symbols.push(Symbol {
779 name,
780 kind,
781 file: file.clone(),
782 line: node.start_position().row as u32 + 1,
783 token_cost: sig.len() / 4,
784 signature: sig,
785 });
786 }
787 }
788 "class" => {
789 let name = find_child_text(&node, "constant", source)
791 .or_else(|| find_child_text(&node, "scope_resolution", source));
792 if let Some(name) = name {
793 let sig = extract_signature(&node, source);
794 symbols.push(Symbol {
795 name,
796 kind: SymbolKind::Class,
797 file: file.clone(),
798 line: node.start_position().row as u32 + 1,
799 token_cost: sig.len() / 4,
800 signature: sig,
801 });
802 }
803 let mut cursor = node.walk();
804 for child in node.children(&mut cursor) {
805 collect_ruby_symbols(child, source, file, true, symbols);
806 }
807 return;
808 }
809 "module" => {
810 if let Some(name) = find_child_text(&node, "constant", source) {
811 let sig = extract_signature(&node, source);
812 symbols.push(Symbol {
813 name,
814 kind: SymbolKind::Module,
815 file: file.clone(),
816 line: node.start_position().row as u32 + 1,
817 token_cost: sig.len() / 4,
818 signature: sig,
819 });
820 }
821 let mut cursor = node.walk();
822 for child in node.children(&mut cursor) {
823 collect_ruby_symbols(child, source, file, true, symbols);
824 }
825 return;
826 }
827 _ => {}
828 }
829
830 let mut cursor = node.walk();
831 for child in node.children(&mut cursor) {
832 collect_ruby_symbols(child, source, file, inside_class, symbols);
833 }
834}
835
836fn collect_php_symbols(
837 node: Node,
838 source: &[u8],
839 file: &PathBuf,
840 inside_class: bool,
841 symbols: &mut Vec<Symbol>,
842) {
843 let kind_str = node.kind();
844
845 match kind_str {
846 "function_definition" | "method_declaration" => {
847 if let Some(name) = find_child_text(&node, "name", source) {
848 let sig = extract_signature(&node, source);
849 let kind = if inside_class || kind_str == "method_declaration" {
850 SymbolKind::Method
851 } else {
852 SymbolKind::Function
853 };
854 symbols.push(Symbol {
855 name,
856 kind,
857 file: file.clone(),
858 line: node.start_position().row as u32 + 1,
859 token_cost: sig.len() / 4,
860 signature: sig,
861 });
862 }
863 }
864 "class_declaration" | "interface_declaration" | "trait_declaration" => {
865 if let Some(name) = find_child_text(&node, "name", source) {
866 let sig = extract_signature(&node, source);
867 let kind = match kind_str {
868 "interface_declaration" => SymbolKind::Interface,
869 "trait_declaration" => SymbolKind::Trait,
870 _ => SymbolKind::Class,
871 };
872 symbols.push(Symbol {
873 name,
874 kind,
875 file: file.clone(),
876 line: node.start_position().row as u32 + 1,
877 token_cost: sig.len() / 4,
878 signature: sig,
879 });
880 }
881 let mut cursor = node.walk();
882 for child in node.children(&mut cursor) {
883 collect_php_symbols(child, source, file, true, symbols);
884 }
885 return;
886 }
887 "namespace_definition" => {
888 if let Some(name) = find_child_text(&node, "namespace_name", source)
889 .or_else(|| find_child_text(&node, "name", source))
890 {
891 let sig = extract_signature(&node, source);
892 symbols.push(Symbol {
893 name,
894 kind: SymbolKind::Module,
895 file: file.clone(),
896 line: node.start_position().row as u32 + 1,
897 token_cost: sig.len() / 4,
898 signature: sig,
899 });
900 }
901 }
902 _ => {}
903 }
904
905 let mut cursor = node.walk();
906 for child in node.children(&mut cursor) {
907 collect_php_symbols(child, source, file, inside_class, symbols);
908 }
909}
910
911fn collect_kotlin_symbols(
912 node: Node,
913 source: &[u8],
914 file: &PathBuf,
915 inside_class: bool,
916 symbols: &mut Vec<Symbol>,
917) {
918 let kind_str = node.kind();
919
920 match kind_str {
921 "function_declaration" => {
922 if let Some(name) = find_child_text(&node, "simple_identifier", source) {
923 let sig = extract_signature(&node, source);
924 let kind = if inside_class {
925 SymbolKind::Method
926 } else {
927 SymbolKind::Function
928 };
929 symbols.push(Symbol {
930 name,
931 kind,
932 file: file.clone(),
933 line: node.start_position().row as u32 + 1,
934 token_cost: sig.len() / 4,
935 signature: sig,
936 });
937 }
938 }
939 "class_declaration" | "object_declaration" => {
940 if let Some(name) = find_child_text(&node, "type_identifier", source)
941 .or_else(|| find_child_text(&node, "simple_identifier", source))
942 {
943 let sig = extract_signature(&node, source);
944 symbols.push(Symbol {
945 name,
946 kind: SymbolKind::Class,
947 file: file.clone(),
948 line: node.start_position().row as u32 + 1,
949 token_cost: sig.len() / 4,
950 signature: sig,
951 });
952 }
953 let mut cursor = node.walk();
954 for child in node.children(&mut cursor) {
955 collect_kotlin_symbols(child, source, file, true, symbols);
956 }
957 return;
958 }
959 "interface_declaration" => {
960 if let Some(name) = find_child_text(&node, "type_identifier", source) {
961 let sig = extract_signature(&node, source);
962 symbols.push(Symbol {
963 name,
964 kind: SymbolKind::Interface,
965 file: file.clone(),
966 line: node.start_position().row as u32 + 1,
967 token_cost: sig.len() / 4,
968 signature: sig,
969 });
970 }
971 let mut cursor = node.walk();
972 for child in node.children(&mut cursor) {
973 collect_kotlin_symbols(child, source, file, true, symbols);
974 }
975 return;
976 }
977 _ => {}
978 }
979
980 let mut cursor = node.walk();
981 for child in node.children(&mut cursor) {
982 collect_kotlin_symbols(child, source, file, inside_class, symbols);
983 }
984}
985
986fn collect_swift_symbols(
987 node: Node,
988 source: &[u8],
989 file: &PathBuf,
990 inside_class: bool,
991 symbols: &mut Vec<Symbol>,
992) {
993 let kind_str = node.kind();
994
995 match kind_str {
996 "function_declaration" => {
997 if let Some(name) = find_child_text(&node, "simple_identifier", source) {
998 let sig = extract_signature(&node, source);
999 let kind = if inside_class {
1000 SymbolKind::Method
1001 } else {
1002 SymbolKind::Function
1003 };
1004 symbols.push(Symbol {
1005 name,
1006 kind,
1007 file: file.clone(),
1008 line: node.start_position().row as u32 + 1,
1009 token_cost: sig.len() / 4,
1010 signature: sig,
1011 });
1012 }
1013 }
1014 "class_declaration" | "struct_declaration" | "enum_declaration" => {
1015 if let Some(name) = find_child_text(&node, "type_identifier", source)
1016 .or_else(|| find_child_text(&node, "simple_identifier", source))
1017 {
1018 let sig = extract_signature(&node, source);
1019 let kind = match kind_str {
1020 "struct_declaration" => SymbolKind::Struct,
1021 "enum_declaration" => SymbolKind::Enum,
1022 _ => SymbolKind::Class,
1023 };
1024 symbols.push(Symbol {
1025 name,
1026 kind,
1027 file: file.clone(),
1028 line: node.start_position().row as u32 + 1,
1029 token_cost: sig.len() / 4,
1030 signature: sig,
1031 });
1032 }
1033 let mut cursor = node.walk();
1034 for child in node.children(&mut cursor) {
1035 collect_swift_symbols(child, source, file, true, symbols);
1036 }
1037 return;
1038 }
1039 "protocol_declaration" => {
1040 if let Some(name) = find_child_text(&node, "type_identifier", source)
1041 .or_else(|| find_child_text(&node, "simple_identifier", source))
1042 {
1043 let sig = extract_signature(&node, source);
1044 symbols.push(Symbol {
1045 name,
1046 kind: SymbolKind::Interface,
1047 file: file.clone(),
1048 line: node.start_position().row as u32 + 1,
1049 token_cost: sig.len() / 4,
1050 signature: sig,
1051 });
1052 }
1053 let mut cursor = node.walk();
1054 for child in node.children(&mut cursor) {
1055 collect_swift_symbols(child, source, file, true, symbols);
1056 }
1057 return;
1058 }
1059 _ => {}
1060 }
1061
1062 let mut cursor = node.walk();
1063 for child in node.children(&mut cursor) {
1064 collect_swift_symbols(child, source, file, inside_class, symbols);
1065 }
1066}
1067
1068fn find_nested_function_name(node: &Node, source: &[u8]) -> Option<String> {
1073 let mut cursor = node.walk();
1074 for child in node.children(&mut cursor) {
1075 if child.kind() == "function_declarator" {
1076 return find_child_text(&child, "identifier", source)
1077 .or_else(|| find_child_text(&child, "field_identifier", source));
1078 }
1079 }
1080 None
1081}
1082
1083fn collect_references(
1084 node: Node,
1085 source: &[u8],
1086 file: &PathBuf,
1087 enclosing: &Option<String>,
1088 refs: &mut Vec<Reference>,
1089) {
1090 let kind_str = node.kind();
1091
1092 let new_enclosing = match kind_str {
1094 "function_item" | "function_definition" | "function_declaration" | "method_declaration" => {
1095 find_child_text(&node, "identifier", source)
1096 .or_else(|| find_child_text(&node, "field_identifier", source))
1097 }
1098 _ => None,
1099 };
1100 let current_enclosing = if new_enclosing.is_some() {
1101 &new_enclosing
1102 } else {
1103 enclosing
1104 };
1105
1106 if kind_str == "identifier" || kind_str == "type_identifier" {
1108 let parent_kind = node.parent().map(|p| p.kind().to_string());
1109 let is_definition = matches!(
1110 parent_kind.as_deref(),
1111 Some(
1112 "function_item"
1113 | "function_definition"
1114 | "function_declaration"
1115 | "struct_item"
1116 | "enum_item"
1117 | "trait_item"
1118 | "class_definition"
1119 | "class_declaration"
1120 | "method_definition"
1121 | "variable_declarator"
1122 | "type_spec"
1123 )
1124 );
1125
1126 if !is_definition {
1127 let name = node_text(&node, source);
1128 if !name.is_empty() {
1129 refs.push(Reference {
1130 from_file: file.clone(),
1131 from_symbol: current_enclosing.clone(),
1132 to_name: name,
1133 line: node.start_position().row as u32 + 1,
1134 });
1135 }
1136 }
1137 }
1138
1139 let mut cursor = node.walk();
1140 for child in node.children(&mut cursor) {
1141 collect_references(child, source, file, current_enclosing, refs);
1142 }
1143}
1144
1145fn extract_signature(node: &Node, source: &[u8]) -> String {
1147 let text = node_text(node, source);
1148
1149 let sig = if let Some(pos) = text.find('{') {
1151 &text[..pos]
1152 } else if let Some(pos) = text.find(':') {
1153 &text[..pos]
1155 } else {
1156 &text
1157 };
1158
1159 let collapsed: String = sig.split_whitespace().collect::<Vec<_>>().join(" ");
1161
1162 collapsed
1163}
1164
1165fn node_text(node: &Node, source: &[u8]) -> String {
1166 let start = node.start_byte();
1167 let end = node.end_byte();
1168 if start >= source.len() || end > source.len() {
1169 return String::new();
1170 }
1171 String::from_utf8_lossy(&source[start..end]).to_string()
1172}
1173
1174fn find_child_text(node: &Node, kind: &str, source: &[u8]) -> Option<String> {
1175 let mut cursor = node.walk();
1176 for child in node.children(&mut cursor) {
1177 if child.kind() == kind {
1178 let text = node_text(&child, source);
1179 if !text.is_empty() {
1180 return Some(text);
1181 }
1182 }
1183 }
1184 None
1185}
1186
1187fn child_has_kind(node: &Node, kind: &str) -> bool {
1188 let mut cursor = node.walk();
1189 for child in node.children(&mut cursor) {
1190 if child.kind() == kind {
1191 return true;
1192 }
1193 }
1194 false
1195}
1196
1197#[cfg(test)]
1198mod tests {
1199 use super::*;
1200
1201 fn make_rust_file() -> SourceFile {
1202 SourceFile {
1203 path: PathBuf::from("src/lib.rs"),
1204 language: Language::Rust,
1205 content: r#"
1206pub fn top_level(x: i32) -> bool {
1207 x > 0
1208}
1209
1210pub struct Config {
1211 name: String,
1212 value: u32,
1213}
1214
1215pub enum Color {
1216 Red,
1217 Green,
1218 Blue,
1219}
1220
1221pub trait Drawable {
1222 fn draw(&self);
1223}
1224
1225impl Config {
1226 pub fn new(name: String) -> Self {
1227 Self { name, value: 0 }
1228 }
1229}
1230"#
1231 .to_string(),
1232 }
1233 }
1234
1235 fn make_python_file() -> SourceFile {
1236 SourceFile {
1237 path: PathBuf::from("app.py"),
1238 language: Language::Python,
1239 content: r#"
1240def standalone():
1241 pass
1242
1243class MyClass:
1244 def method(self):
1245 pass
1246
1247 def another(self, x):
1248 return x + 1
1249"#
1250 .to_string(),
1251 }
1252 }
1253
1254 fn make_typescript_file() -> SourceFile {
1255 SourceFile {
1256 path: PathBuf::from("app.ts"),
1257 language: Language::TypeScript,
1258 content: r#"
1259function greet(name: string): string {
1260 return `Hello ${name}`;
1261}
1262
1263class Greeter {
1264 sayHello() {
1265 console.log("hello");
1266 }
1267}
1268
1269const add = (a: number, b: number) => a + b;
1270"#
1271 .to_string(),
1272 }
1273 }
1274
1275 #[test]
1276 fn parse_rust_symbols() {
1277 let file = make_rust_file();
1278 let symbols = extract_symbols(&file).unwrap();
1279
1280 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1281 assert!(names.contains(&"top_level"), "missing top_level: {names:?}");
1282 assert!(
1283 names.contains(&"Config"),
1284 "missing Config struct: {names:?}"
1285 );
1286 assert!(names.contains(&"Color"), "missing Color enum: {names:?}");
1287 assert!(
1288 names.contains(&"Drawable"),
1289 "missing Drawable trait: {names:?}"
1290 );
1291 assert!(names.contains(&"new"), "missing new method: {names:?}");
1292
1293 let top = symbols.iter().find(|s| s.name == "top_level").unwrap();
1295 assert_eq!(top.kind, SymbolKind::Function);
1296
1297 let config_struct = symbols
1298 .iter()
1299 .find(|s| s.name == "Config" && s.kind == SymbolKind::Struct)
1300 .unwrap();
1301 assert_eq!(config_struct.kind, SymbolKind::Struct);
1302
1303 let new_method = symbols.iter().find(|s| s.name == "new").unwrap();
1304 assert_eq!(new_method.kind, SymbolKind::Method);
1305
1306 assert!(top.signature.contains("fn top_level"));
1308 assert!(top.token_cost > 0);
1309 }
1310
1311 #[test]
1312 fn parse_python_symbols() {
1313 let file = make_python_file();
1314 let symbols = extract_symbols(&file).unwrap();
1315
1316 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1317 assert!(
1318 names.contains(&"standalone"),
1319 "missing standalone: {names:?}"
1320 );
1321 assert!(names.contains(&"MyClass"), "missing MyClass: {names:?}");
1322 assert!(names.contains(&"method"), "missing method: {names:?}");
1323 assert!(names.contains(&"another"), "missing another: {names:?}");
1324
1325 let standalone = symbols.iter().find(|s| s.name == "standalone").unwrap();
1326 assert_eq!(standalone.kind, SymbolKind::Function);
1327
1328 let method = symbols.iter().find(|s| s.name == "method").unwrap();
1329 assert_eq!(method.kind, SymbolKind::Method);
1330 }
1331
1332 #[test]
1333 fn parse_typescript_symbols() {
1334 let file = make_typescript_file();
1335 let symbols = extract_symbols(&file).unwrap();
1336
1337 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1338 assert!(names.contains(&"greet"), "missing greet: {names:?}");
1339 assert!(names.contains(&"Greeter"), "missing Greeter: {names:?}");
1340 assert!(names.contains(&"sayHello"), "missing sayHello: {names:?}");
1341 assert!(names.contains(&"add"), "missing add arrow fn: {names:?}");
1342 }
1343
1344 #[test]
1345 fn parse_empty_file() {
1346 let file = SourceFile {
1347 path: PathBuf::from("empty.rs"),
1348 language: Language::Rust,
1349 content: String::new(),
1350 };
1351 let symbols = extract_symbols(&file).unwrap();
1352 assert!(symbols.is_empty());
1353 }
1354
1355 #[test]
1356 fn parse_file_with_syntax_errors_gives_partial_results() {
1357 let file = SourceFile {
1358 path: PathBuf::from("broken.rs"),
1359 language: Language::Rust,
1360 content: r#"
1361fn valid_fn() -> bool { true }
1362
1363fn broken( {
1364
1365struct ValidStruct {
1366 x: i32,
1367}
1368"#
1369 .to_string(),
1370 };
1371 let symbols = extract_symbols(&file).unwrap();
1372 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1374 assert!(
1375 names.contains(&"valid_fn"),
1376 "should extract valid symbols despite errors: {names:?}"
1377 );
1378 }
1379
1380 #[test]
1381 fn extract_references_finds_calls() {
1382 let file = SourceFile {
1383 path: PathBuf::from("main.rs"),
1384 language: Language::Rust,
1385 content: r#"
1386fn caller() {
1387 helper();
1388 let x = Config::new();
1389}
1390
1391fn helper() {}
1392"#
1393 .to_string(),
1394 };
1395 let refs = extract_references(&file).unwrap();
1396 let ref_names: Vec<&str> = refs.iter().map(|r| r.to_name.as_str()).collect();
1397 assert!(
1398 ref_names.contains(&"helper"),
1399 "should find reference to helper: {ref_names:?}"
1400 );
1401 assert!(
1402 ref_names.contains(&"Config"),
1403 "should find reference to Config: {ref_names:?}"
1404 );
1405 }
1406
1407 #[test]
1408 fn parse_java_file() {
1409 let file = SourceFile {
1410 path: PathBuf::from("Main.java"),
1411 language: Language::Java,
1412 content: r#"
1413public class Main {
1414 public static void main(String[] args) {
1415 System.out.println("Hello");
1416 }
1417 public int add(int a, int b) {
1418 return a + b;
1419 }
1420}
1421
1422interface Runnable {
1423 void run();
1424}
1425
1426enum Color { RED, GREEN, BLUE }
1427"#
1428 .to_string(),
1429 };
1430 let symbols = extract_symbols(&file).unwrap();
1431 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1432 assert!(names.contains(&"Main"), "should find class Main: {names:?}");
1433 assert!(
1434 names.contains(&"main"),
1435 "should find method main: {names:?}"
1436 );
1437 assert!(names.contains(&"add"), "should find method add: {names:?}");
1438 assert!(
1439 names.contains(&"Runnable"),
1440 "should find interface Runnable: {names:?}"
1441 );
1442 assert!(
1443 names.contains(&"Color"),
1444 "should find enum Color: {names:?}"
1445 );
1446 }
1447
1448 #[test]
1449 fn parse_c_file() {
1450 let file = SourceFile {
1451 path: PathBuf::from("main.c"),
1452 language: Language::C,
1453 content: r#"
1454struct Point {
1455 int x;
1456 int y;
1457};
1458
1459enum Direction { NORTH, SOUTH, EAST, WEST };
1460
1461int add(int a, int b) {
1462 return a + b;
1463}
1464
1465int main() {
1466 return 0;
1467}
1468"#
1469 .to_string(),
1470 };
1471 let symbols = extract_symbols(&file).unwrap();
1472 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1473 assert!(
1474 names.contains(&"Point"),
1475 "should find struct Point: {names:?}"
1476 );
1477 assert!(
1478 names.contains(&"Direction"),
1479 "should find enum Direction: {names:?}"
1480 );
1481 assert!(
1482 names.contains(&"add"),
1483 "should find function add: {names:?}"
1484 );
1485 assert!(
1486 names.contains(&"main"),
1487 "should find function main: {names:?}"
1488 );
1489 }
1490
1491 #[test]
1492 fn parse_cpp_file() {
1493 let file = SourceFile {
1494 path: PathBuf::from("main.cpp"),
1495 language: Language::Cpp,
1496 content: r#"
1497class Calculator {
1498public:
1499 int add(int a, int b) {
1500 return a + b;
1501 }
1502};
1503
1504struct Point {
1505 int x, y;
1506};
1507
1508int main() {
1509 return 0;
1510}
1511"#
1512 .to_string(),
1513 };
1514 let symbols = extract_symbols(&file).unwrap();
1515 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1516 assert!(
1517 names.contains(&"Calculator"),
1518 "should find class Calculator: {names:?}"
1519 );
1520 assert!(names.contains(&"add"), "should find method add: {names:?}");
1521 assert!(
1522 names.contains(&"Point"),
1523 "should find struct Point: {names:?}"
1524 );
1525 assert!(
1526 names.contains(&"main"),
1527 "should find function main: {names:?}"
1528 );
1529 }
1530
1531 #[test]
1532 fn parse_ruby_file() {
1533 let file = SourceFile {
1534 path: PathBuf::from("app.rb"),
1535 language: Language::Ruby,
1536 content: r#"
1537module MyApp
1538 class Calculator
1539 def add(a, b)
1540 a + b
1541 end
1542
1543 def subtract(a, b)
1544 a - b
1545 end
1546 end
1547end
1548
1549def standalone_function
1550 puts "hello"
1551end
1552"#
1553 .to_string(),
1554 };
1555 let symbols = extract_symbols(&file).unwrap();
1556 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1557 assert!(
1558 names.contains(&"MyApp"),
1559 "should find module MyApp: {names:?}"
1560 );
1561 assert!(
1562 names.contains(&"Calculator"),
1563 "should find class Calculator: {names:?}"
1564 );
1565 assert!(names.contains(&"add"), "should find method add: {names:?}");
1566 assert!(
1567 names.contains(&"subtract"),
1568 "should find method subtract: {names:?}"
1569 );
1570 assert!(
1571 names.contains(&"standalone_function"),
1572 "should find function standalone_function: {names:?}"
1573 );
1574 }
1575}