Skip to main content

argus_repomap/
parser.rs

1use std::path::PathBuf;
2
3use argus_core::ArgusError;
4use tree_sitter::{Node, Parser};
5
6use crate::walker::{Language, SourceFile};
7
8/// A symbol extracted from source code via tree-sitter.
9///
10/// # Examples
11///
12/// ```
13/// use std::path::PathBuf;
14/// use argus_repomap::parser::{Symbol, SymbolKind};
15///
16/// let sym = Symbol {
17///     name: "main".into(),
18///     kind: SymbolKind::Function,
19///     file: PathBuf::from("src/main.rs"),
20///     line: 1,
21///     signature: "fn main()".into(),
22///     token_cost: 2,
23/// };
24/// assert_eq!(sym.kind, SymbolKind::Function);
25/// ```
26#[derive(Debug, Clone)]
27pub struct Symbol {
28    /// Symbol name (e.g. function name, struct name).
29    pub name: String,
30    /// What kind of symbol this is.
31    pub kind: SymbolKind,
32    /// File path (relative to repo root).
33    pub file: PathBuf,
34    /// Line number where the symbol starts (1-indexed).
35    pub line: u32,
36    /// Human-readable signature (e.g. `fn process(input: &str) -> Result<Output>`).
37    pub signature: String,
38    /// Estimated token cost for including this symbol in context.
39    pub token_cost: usize,
40}
41
42/// Classification of extracted symbols.
43///
44/// # Examples
45///
46/// ```
47/// use argus_repomap::parser::SymbolKind;
48///
49/// let kind = SymbolKind::Function;
50/// assert_eq!(format!("{kind:?}"), "Function");
51/// ```
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
53pub enum SymbolKind {
54    Function,
55    Method,
56    Struct,
57    Enum,
58    Trait,
59    Impl,
60    Class,
61    Interface,
62    Module,
63}
64
65/// A reference from one symbol to another.
66///
67/// # Examples
68///
69/// ```
70/// use std::path::PathBuf;
71/// use argus_repomap::parser::Reference;
72///
73/// let reference = Reference {
74///     from_file: PathBuf::from("src/main.rs"),
75///     from_symbol: Some("main".into()),
76///     to_name: "Config".into(),
77///     line: 5,
78/// };
79/// assert_eq!(reference.to_name, "Config");
80/// ```
81#[derive(Debug, Clone)]
82pub struct Reference {
83    /// File containing the reference.
84    pub from_file: PathBuf,
85    /// Enclosing symbol name, if any.
86    pub from_symbol: Option<String>,
87    /// Referenced identifier name.
88    pub to_name: String,
89    /// Line where the reference occurs.
90    pub line: u32,
91}
92
93/// Extract all symbols from a source file using tree-sitter.
94///
95/// Returns an empty vec for unparseable files. Tree-sitter is error-tolerant,
96/// so partial results are returned even for files with syntax errors.
97///
98/// # Errors
99///
100/// Returns [`ArgusError::Parse`] if the language grammar cannot be loaded.
101///
102/// # Examples
103///
104/// ```
105/// use std::path::PathBuf;
106/// use argus_repomap::walker::{Language, SourceFile};
107/// use argus_repomap::parser::extract_symbols;
108///
109/// let file = SourceFile {
110///     path: PathBuf::from("example.rs"),
111///     language: Language::Rust,
112///     content: "fn hello() {}".to_string(),
113/// };
114/// let symbols = extract_symbols(&file).unwrap();
115/// assert_eq!(symbols.len(), 1);
116/// assert_eq!(symbols[0].name, "hello");
117/// ```
118pub fn extract_symbols(file: &SourceFile) -> Result<Vec<Symbol>, ArgusError> {
119    let Some(ts_language) = file.language.tree_sitter_language() else {
120        return Ok(Vec::new());
121    };
122
123    let mut parser = Parser::new();
124    parser
125        .set_language(&ts_language)
126        .map_err(|e| ArgusError::Parse(format!("failed to set language: {e}")))?;
127
128    let Some(tree) = parser.parse(&file.content, None) else {
129        return Ok(Vec::new());
130    };
131
132    let mut symbols = Vec::new();
133    let source = file.content.as_bytes();
134    collect_symbols(
135        tree.root_node(),
136        source,
137        &file.path,
138        file.language,
139        false,
140        &mut symbols,
141    );
142
143    Ok(symbols)
144}
145
146/// Extract references (identifiers referring to other symbols) from a source file.
147///
148/// # Errors
149///
150/// Returns [`ArgusError::Parse`] if the language grammar cannot be loaded.
151///
152/// # Examples
153///
154/// ```
155/// use std::path::PathBuf;
156/// use argus_repomap::walker::{Language, SourceFile};
157/// use argus_repomap::parser::extract_references;
158///
159/// let file = SourceFile {
160///     path: PathBuf::from("example.rs"),
161///     language: Language::Rust,
162///     content: "fn main() { hello(); }".to_string(),
163/// };
164/// let refs = extract_references(&file).unwrap();
165/// assert!(refs.iter().any(|r| r.to_name == "hello"));
166/// ```
167pub fn extract_references(file: &SourceFile) -> Result<Vec<Reference>, ArgusError> {
168    let Some(ts_language) = file.language.tree_sitter_language() else {
169        return Ok(Vec::new());
170    };
171
172    let mut parser = Parser::new();
173    parser
174        .set_language(&ts_language)
175        .map_err(|e| ArgusError::Parse(format!("failed to set language: {e}")))?;
176
177    let Some(tree) = parser.parse(&file.content, None) else {
178        return Ok(Vec::new());
179    };
180
181    let mut refs = Vec::new();
182    collect_references(
183        tree.root_node(),
184        file.content.as_bytes(),
185        &file.path,
186        &None,
187        &mut refs,
188    );
189
190    Ok(refs)
191}
192
193fn collect_symbols(
194    node: Node,
195    source: &[u8],
196    file: &PathBuf,
197    language: Language,
198    inside_impl: bool,
199    symbols: &mut Vec<Symbol>,
200) {
201    match language {
202        Language::Rust => collect_rust_symbols(node, source, file, inside_impl, symbols),
203        Language::Python => collect_python_symbols(node, source, file, false, symbols),
204        Language::TypeScript | Language::JavaScript => {
205            collect_js_ts_symbols(node, source, file, false, symbols);
206        }
207        Language::Go => collect_go_symbols(node, source, file, symbols),
208        Language::Java => collect_java_symbols(node, source, file, false, symbols),
209        Language::C => collect_c_symbols(node, source, file, symbols),
210        Language::Cpp => collect_cpp_symbols(node, source, file, false, symbols),
211        Language::Ruby => collect_ruby_symbols(node, source, file, false, symbols),
212        Language::Php => collect_php_symbols(node, source, file, false, symbols),
213        Language::Kotlin => collect_kotlin_symbols(node, source, file, false, symbols),
214        Language::Swift => collect_swift_symbols(node, source, file, false, symbols),
215        Language::Unknown => {}
216    }
217}
218
219fn collect_rust_symbols(
220    node: Node,
221    source: &[u8],
222    file: &PathBuf,
223    inside_impl: bool,
224    symbols: &mut Vec<Symbol>,
225) {
226    let kind_str = node.kind();
227
228    match kind_str {
229        "function_item" => {
230            if let Some(name) = find_child_text(&node, "identifier", source) {
231                let sig = extract_signature(&node, source);
232                let kind = if inside_impl {
233                    SymbolKind::Method
234                } else {
235                    SymbolKind::Function
236                };
237                symbols.push(Symbol {
238                    name,
239                    kind,
240                    file: file.clone(),
241                    line: node.start_position().row as u32 + 1,
242                    token_cost: sig.len() / 4,
243                    signature: sig,
244                });
245            }
246        }
247        "struct_item" => {
248            if let Some(name) = find_child_text(&node, "type_identifier", source) {
249                let sig = extract_signature(&node, source);
250                symbols.push(Symbol {
251                    name,
252                    kind: SymbolKind::Struct,
253                    file: file.clone(),
254                    line: node.start_position().row as u32 + 1,
255                    token_cost: sig.len() / 4,
256                    signature: sig,
257                });
258            }
259        }
260        "enum_item" => {
261            if let Some(name) = find_child_text(&node, "type_identifier", source) {
262                let sig = extract_signature(&node, source);
263                symbols.push(Symbol {
264                    name,
265                    kind: SymbolKind::Enum,
266                    file: file.clone(),
267                    line: node.start_position().row as u32 + 1,
268                    token_cost: sig.len() / 4,
269                    signature: sig,
270                });
271            }
272        }
273        "trait_item" => {
274            if let Some(name) = find_child_text(&node, "type_identifier", source) {
275                let sig = extract_signature(&node, source);
276                symbols.push(Symbol {
277                    name,
278                    kind: SymbolKind::Trait,
279                    file: file.clone(),
280                    line: node.start_position().row as u32 + 1,
281                    token_cost: sig.len() / 4,
282                    signature: sig,
283                });
284            }
285        }
286        "impl_item" => {
287            if let Some(name) = find_child_text(&node, "type_identifier", source) {
288                let sig = extract_signature(&node, source);
289                symbols.push(Symbol {
290                    name: name.clone(),
291                    kind: SymbolKind::Impl,
292                    file: file.clone(),
293                    line: node.start_position().row as u32 + 1,
294                    token_cost: sig.len() / 4,
295                    signature: sig,
296                });
297            }
298            // Recurse into impl body to find methods
299            let mut cursor = node.walk();
300            for child in node.children(&mut cursor) {
301                collect_rust_symbols(child, source, file, true, symbols);
302            }
303            return; // Don't recurse again below
304        }
305        _ => {}
306    }
307
308    // Recurse into children (except for impl which we already handled)
309    let mut cursor = node.walk();
310    for child in node.children(&mut cursor) {
311        collect_rust_symbols(child, source, file, inside_impl, symbols);
312    }
313}
314
315fn collect_python_symbols(
316    node: Node,
317    source: &[u8],
318    file: &PathBuf,
319    inside_class: bool,
320    symbols: &mut Vec<Symbol>,
321) {
322    let kind_str = node.kind();
323
324    match kind_str {
325        "function_definition" => {
326            if let Some(name) = find_child_text(&node, "identifier", source) {
327                let sig = extract_signature(&node, source);
328                let kind = if inside_class {
329                    SymbolKind::Method
330                } else {
331                    SymbolKind::Function
332                };
333                symbols.push(Symbol {
334                    name,
335                    kind,
336                    file: file.clone(),
337                    line: node.start_position().row as u32 + 1,
338                    token_cost: sig.len() / 4,
339                    signature: sig,
340                });
341            }
342        }
343        "class_definition" => {
344            if let Some(name) = find_child_text(&node, "identifier", source) {
345                let sig = extract_signature(&node, source);
346                symbols.push(Symbol {
347                    name,
348                    kind: SymbolKind::Class,
349                    file: file.clone(),
350                    line: node.start_position().row as u32 + 1,
351                    token_cost: sig.len() / 4,
352                    signature: sig,
353                });
354            }
355            // Recurse into class body to find methods
356            let mut cursor = node.walk();
357            for child in node.children(&mut cursor) {
358                collect_python_symbols(child, source, file, true, symbols);
359            }
360            return;
361        }
362        _ => {}
363    }
364
365    let mut cursor = node.walk();
366    for child in node.children(&mut cursor) {
367        collect_python_symbols(child, source, file, inside_class, symbols);
368    }
369}
370
371fn collect_js_ts_symbols(
372    node: Node,
373    source: &[u8],
374    file: &PathBuf,
375    inside_class: bool,
376    symbols: &mut Vec<Symbol>,
377) {
378    let kind_str = node.kind();
379
380    match kind_str {
381        "function_declaration" => {
382            if let Some(name) = find_child_text(&node, "identifier", source) {
383                let sig = extract_signature(&node, source);
384                symbols.push(Symbol {
385                    name,
386                    kind: SymbolKind::Function,
387                    file: file.clone(),
388                    line: node.start_position().row as u32 + 1,
389                    token_cost: sig.len() / 4,
390                    signature: sig,
391                });
392            }
393        }
394        "class_declaration" => {
395            let name = find_child_text(&node, "type_identifier", source)
396                .or_else(|| find_child_text(&node, "identifier", source));
397            if let Some(name) = name {
398                let sig = extract_signature(&node, source);
399                symbols.push(Symbol {
400                    name,
401                    kind: SymbolKind::Class,
402                    file: file.clone(),
403                    line: node.start_position().row as u32 + 1,
404                    token_cost: sig.len() / 4,
405                    signature: sig,
406                });
407            }
408            let mut cursor = node.walk();
409            for child in node.children(&mut cursor) {
410                collect_js_ts_symbols(child, source, file, true, symbols);
411            }
412            return;
413        }
414        "method_definition" => {
415            if let Some(name) = find_child_text(&node, "property_identifier", source) {
416                let sig = extract_signature(&node, source);
417                symbols.push(Symbol {
418                    name,
419                    kind: SymbolKind::Method,
420                    file: file.clone(),
421                    line: node.start_position().row as u32 + 1,
422                    token_cost: sig.len() / 4,
423                    signature: sig,
424                });
425            }
426        }
427        "lexical_declaration" => {
428            // Arrow functions assigned to const: const foo = () => {}
429            let mut cursor = node.walk();
430            for child in node.children(&mut cursor) {
431                if child.kind() == "variable_declarator" {
432                    let has_arrow = child_has_kind(&child, "arrow_function");
433                    if has_arrow {
434                        if let Some(name) = find_child_text(&child, "identifier", source) {
435                            let sig = extract_signature(&node, source);
436                            symbols.push(Symbol {
437                                name,
438                                kind: SymbolKind::Function,
439                                file: file.clone(),
440                                line: node.start_position().row as u32 + 1,
441                                token_cost: sig.len() / 4,
442                                signature: sig,
443                            });
444                        }
445                    }
446                }
447            }
448        }
449        _ => {}
450    }
451
452    if !inside_class || kind_str != "class_declaration" {
453        let mut cursor = node.walk();
454        for child in node.children(&mut cursor) {
455            collect_js_ts_symbols(child, source, file, inside_class, symbols);
456        }
457    }
458}
459
460fn collect_go_symbols(node: Node, source: &[u8], file: &PathBuf, symbols: &mut Vec<Symbol>) {
461    let kind_str = node.kind();
462
463    match kind_str {
464        "function_declaration" => {
465            if let Some(name) = find_child_text(&node, "identifier", source) {
466                let sig = extract_signature(&node, source);
467                symbols.push(Symbol {
468                    name,
469                    kind: SymbolKind::Function,
470                    file: file.clone(),
471                    line: node.start_position().row as u32 + 1,
472                    token_cost: sig.len() / 4,
473                    signature: sig,
474                });
475            }
476        }
477        "method_declaration" => {
478            if let Some(name) = find_child_text(&node, "field_identifier", source) {
479                let sig = extract_signature(&node, source);
480                symbols.push(Symbol {
481                    name,
482                    kind: SymbolKind::Method,
483                    file: file.clone(),
484                    line: node.start_position().row as u32 + 1,
485                    token_cost: sig.len() / 4,
486                    signature: sig,
487                });
488            }
489        }
490        "type_declaration" => {
491            let mut cursor = node.walk();
492            for child in node.children(&mut cursor) {
493                if child.kind() == "type_spec" {
494                    if let Some(name) = find_child_text(&child, "type_identifier", source) {
495                        let has_struct = child_has_kind(&child, "struct_type");
496                        let has_interface = child_has_kind(&child, "interface_type");
497                        let kind = if has_struct {
498                            SymbolKind::Struct
499                        } else if has_interface {
500                            SymbolKind::Interface
501                        } else {
502                            continue;
503                        };
504                        let sig = extract_signature(&child, source);
505                        symbols.push(Symbol {
506                            name,
507                            kind,
508                            file: file.clone(),
509                            line: child.start_position().row as u32 + 1,
510                            token_cost: sig.len() / 4,
511                            signature: sig,
512                        });
513                    }
514                }
515            }
516        }
517        _ => {}
518    }
519
520    let mut cursor = node.walk();
521    for child in node.children(&mut cursor) {
522        collect_go_symbols(child, source, file, symbols);
523    }
524}
525
526fn collect_java_symbols(
527    node: Node,
528    source: &[u8],
529    file: &PathBuf,
530    inside_class: bool,
531    symbols: &mut Vec<Symbol>,
532) {
533    let kind_str = node.kind();
534
535    match kind_str {
536        "method_declaration" | "constructor_declaration" => {
537            if let Some(name) = find_child_text(&node, "identifier", source) {
538                let sig = extract_signature(&node, source);
539                let kind = if inside_class {
540                    SymbolKind::Method
541                } else {
542                    SymbolKind::Function
543                };
544                symbols.push(Symbol {
545                    name,
546                    kind,
547                    file: file.clone(),
548                    line: node.start_position().row as u32 + 1,
549                    token_cost: sig.len() / 4,
550                    signature: sig,
551                });
552            }
553        }
554        "class_declaration" => {
555            if let Some(name) = find_child_text(&node, "identifier", source) {
556                let sig = extract_signature(&node, source);
557                symbols.push(Symbol {
558                    name,
559                    kind: SymbolKind::Class,
560                    file: file.clone(),
561                    line: node.start_position().row as u32 + 1,
562                    token_cost: sig.len() / 4,
563                    signature: sig,
564                });
565            }
566            let mut cursor = node.walk();
567            for child in node.children(&mut cursor) {
568                collect_java_symbols(child, source, file, true, symbols);
569            }
570            return;
571        }
572        "interface_declaration" => {
573            if let Some(name) = find_child_text(&node, "identifier", source) {
574                let sig = extract_signature(&node, source);
575                symbols.push(Symbol {
576                    name,
577                    kind: SymbolKind::Interface,
578                    file: file.clone(),
579                    line: node.start_position().row as u32 + 1,
580                    token_cost: sig.len() / 4,
581                    signature: sig,
582                });
583            }
584            let mut cursor = node.walk();
585            for child in node.children(&mut cursor) {
586                collect_java_symbols(child, source, file, true, symbols);
587            }
588            return;
589        }
590        "enum_declaration" => {
591            if let Some(name) = find_child_text(&node, "identifier", source) {
592                let sig = extract_signature(&node, source);
593                symbols.push(Symbol {
594                    name,
595                    kind: SymbolKind::Enum,
596                    file: file.clone(),
597                    line: node.start_position().row as u32 + 1,
598                    token_cost: sig.len() / 4,
599                    signature: sig,
600                });
601            }
602        }
603        _ => {}
604    }
605
606    let mut cursor = node.walk();
607    for child in node.children(&mut cursor) {
608        collect_java_symbols(child, source, file, inside_class, symbols);
609    }
610}
611
612fn collect_c_symbols(node: Node, source: &[u8], file: &PathBuf, symbols: &mut Vec<Symbol>) {
613    let kind_str = node.kind();
614
615    match kind_str {
616        "function_definition" | "declaration" => {
617            // For declarations, only match function declarations (with function_declarator)
618            if kind_str == "declaration" {
619                let has_func = child_has_kind(&node, "function_declarator");
620                if !has_func {
621                    // Skip non-function declarations, but recurse
622                    let mut cursor = node.walk();
623                    for child in node.children(&mut cursor) {
624                        collect_c_symbols(child, source, file, symbols);
625                    }
626                    return;
627                }
628            }
629            // Find the function name via function_declarator -> identifier
630            if let Some(name) = find_nested_function_name(&node, source) {
631                let sig = extract_signature(&node, source);
632                symbols.push(Symbol {
633                    name,
634                    kind: SymbolKind::Function,
635                    file: file.clone(),
636                    line: node.start_position().row as u32 + 1,
637                    token_cost: sig.len() / 4,
638                    signature: sig,
639                });
640            }
641        }
642        "struct_specifier" => {
643            if let Some(name) = find_child_text(&node, "type_identifier", source) {
644                let sig = extract_signature(&node, source);
645                symbols.push(Symbol {
646                    name,
647                    kind: SymbolKind::Struct,
648                    file: file.clone(),
649                    line: node.start_position().row as u32 + 1,
650                    token_cost: sig.len() / 4,
651                    signature: sig,
652                });
653            }
654        }
655        "enum_specifier" => {
656            if let Some(name) = find_child_text(&node, "type_identifier", source) {
657                let sig = extract_signature(&node, source);
658                symbols.push(Symbol {
659                    name,
660                    kind: SymbolKind::Enum,
661                    file: file.clone(),
662                    line: node.start_position().row as u32 + 1,
663                    token_cost: sig.len() / 4,
664                    signature: sig,
665                });
666            }
667        }
668        _ => {}
669    }
670
671    let mut cursor = node.walk();
672    for child in node.children(&mut cursor) {
673        collect_c_symbols(child, source, file, symbols);
674    }
675}
676
677fn collect_cpp_symbols(
678    node: Node,
679    source: &[u8],
680    file: &PathBuf,
681    inside_class: bool,
682    symbols: &mut Vec<Symbol>,
683) {
684    let kind_str = node.kind();
685
686    match kind_str {
687        "function_definition" => {
688            if let Some(name) = find_nested_function_name(&node, source)
689                .or_else(|| find_child_text(&node, "identifier", source))
690            {
691                let sig = extract_signature(&node, source);
692                let kind = if inside_class {
693                    SymbolKind::Method
694                } else {
695                    SymbolKind::Function
696                };
697                symbols.push(Symbol {
698                    name,
699                    kind,
700                    file: file.clone(),
701                    line: node.start_position().row as u32 + 1,
702                    token_cost: sig.len() / 4,
703                    signature: sig,
704                });
705            }
706        }
707        "class_specifier" => {
708            if let Some(name) = find_child_text(&node, "type_identifier", source) {
709                let sig = extract_signature(&node, source);
710                symbols.push(Symbol {
711                    name,
712                    kind: SymbolKind::Class,
713                    file: file.clone(),
714                    line: node.start_position().row as u32 + 1,
715                    token_cost: sig.len() / 4,
716                    signature: sig,
717                });
718            }
719            let mut cursor = node.walk();
720            for child in node.children(&mut cursor) {
721                collect_cpp_symbols(child, source, file, true, symbols);
722            }
723            return;
724        }
725        "struct_specifier" => {
726            if let Some(name) = find_child_text(&node, "type_identifier", source) {
727                let sig = extract_signature(&node, source);
728                symbols.push(Symbol {
729                    name,
730                    kind: SymbolKind::Struct,
731                    file: file.clone(),
732                    line: node.start_position().row as u32 + 1,
733                    token_cost: sig.len() / 4,
734                    signature: sig,
735                });
736            }
737        }
738        "enum_specifier" => {
739            if let Some(name) = find_child_text(&node, "type_identifier", source) {
740                let sig = extract_signature(&node, source);
741                symbols.push(Symbol {
742                    name,
743                    kind: SymbolKind::Enum,
744                    file: file.clone(),
745                    line: node.start_position().row as u32 + 1,
746                    token_cost: sig.len() / 4,
747                    signature: sig,
748                });
749            }
750        }
751        _ => {}
752    }
753
754    let mut cursor = node.walk();
755    for child in node.children(&mut cursor) {
756        collect_cpp_symbols(child, source, file, inside_class, symbols);
757    }
758}
759
760fn collect_ruby_symbols(
761    node: Node,
762    source: &[u8],
763    file: &PathBuf,
764    inside_class: bool,
765    symbols: &mut Vec<Symbol>,
766) {
767    let kind_str = node.kind();
768
769    match kind_str {
770        "method" => {
771            if let Some(name) = find_child_text(&node, "identifier", source) {
772                let sig = extract_signature(&node, source);
773                let kind = if inside_class {
774                    SymbolKind::Method
775                } else {
776                    SymbolKind::Function
777                };
778                symbols.push(Symbol {
779                    name,
780                    kind,
781                    file: file.clone(),
782                    line: node.start_position().row as u32 + 1,
783                    token_cost: sig.len() / 4,
784                    signature: sig,
785                });
786            }
787        }
788        "class" => {
789            // Ruby class names can be constant or scope_resolution
790            let name = find_child_text(&node, "constant", source)
791                .or_else(|| find_child_text(&node, "scope_resolution", source));
792            if let Some(name) = name {
793                let sig = extract_signature(&node, source);
794                symbols.push(Symbol {
795                    name,
796                    kind: SymbolKind::Class,
797                    file: file.clone(),
798                    line: node.start_position().row as u32 + 1,
799                    token_cost: sig.len() / 4,
800                    signature: sig,
801                });
802            }
803            let mut cursor = node.walk();
804            for child in node.children(&mut cursor) {
805                collect_ruby_symbols(child, source, file, true, symbols);
806            }
807            return;
808        }
809        "module" => {
810            if let Some(name) = find_child_text(&node, "constant", source) {
811                let sig = extract_signature(&node, source);
812                symbols.push(Symbol {
813                    name,
814                    kind: SymbolKind::Module,
815                    file: file.clone(),
816                    line: node.start_position().row as u32 + 1,
817                    token_cost: sig.len() / 4,
818                    signature: sig,
819                });
820            }
821            let mut cursor = node.walk();
822            for child in node.children(&mut cursor) {
823                collect_ruby_symbols(child, source, file, true, symbols);
824            }
825            return;
826        }
827        _ => {}
828    }
829
830    let mut cursor = node.walk();
831    for child in node.children(&mut cursor) {
832        collect_ruby_symbols(child, source, file, inside_class, symbols);
833    }
834}
835
836fn collect_php_symbols(
837    node: Node,
838    source: &[u8],
839    file: &PathBuf,
840    inside_class: bool,
841    symbols: &mut Vec<Symbol>,
842) {
843    let kind_str = node.kind();
844
845    match kind_str {
846        "function_definition" | "method_declaration" => {
847            if let Some(name) = find_child_text(&node, "name", source) {
848                let sig = extract_signature(&node, source);
849                let kind = if inside_class || kind_str == "method_declaration" {
850                    SymbolKind::Method
851                } else {
852                    SymbolKind::Function
853                };
854                symbols.push(Symbol {
855                    name,
856                    kind,
857                    file: file.clone(),
858                    line: node.start_position().row as u32 + 1,
859                    token_cost: sig.len() / 4,
860                    signature: sig,
861                });
862            }
863        }
864        "class_declaration" | "interface_declaration" | "trait_declaration" => {
865            if let Some(name) = find_child_text(&node, "name", source) {
866                let sig = extract_signature(&node, source);
867                let kind = match kind_str {
868                    "interface_declaration" => SymbolKind::Interface,
869                    "trait_declaration" => SymbolKind::Trait,
870                    _ => SymbolKind::Class,
871                };
872                symbols.push(Symbol {
873                    name,
874                    kind,
875                    file: file.clone(),
876                    line: node.start_position().row as u32 + 1,
877                    token_cost: sig.len() / 4,
878                    signature: sig,
879                });
880            }
881            let mut cursor = node.walk();
882            for child in node.children(&mut cursor) {
883                collect_php_symbols(child, source, file, true, symbols);
884            }
885            return;
886        }
887        "namespace_definition" => {
888            if let Some(name) = find_child_text(&node, "namespace_name", source)
889                .or_else(|| find_child_text(&node, "name", source))
890            {
891                let sig = extract_signature(&node, source);
892                symbols.push(Symbol {
893                    name,
894                    kind: SymbolKind::Module,
895                    file: file.clone(),
896                    line: node.start_position().row as u32 + 1,
897                    token_cost: sig.len() / 4,
898                    signature: sig,
899                });
900            }
901        }
902        _ => {}
903    }
904
905    let mut cursor = node.walk();
906    for child in node.children(&mut cursor) {
907        collect_php_symbols(child, source, file, inside_class, symbols);
908    }
909}
910
911fn collect_kotlin_symbols(
912    node: Node,
913    source: &[u8],
914    file: &PathBuf,
915    inside_class: bool,
916    symbols: &mut Vec<Symbol>,
917) {
918    let kind_str = node.kind();
919
920    match kind_str {
921        "function_declaration" => {
922            if let Some(name) = find_child_text(&node, "simple_identifier", source) {
923                let sig = extract_signature(&node, source);
924                let kind = if inside_class {
925                    SymbolKind::Method
926                } else {
927                    SymbolKind::Function
928                };
929                symbols.push(Symbol {
930                    name,
931                    kind,
932                    file: file.clone(),
933                    line: node.start_position().row as u32 + 1,
934                    token_cost: sig.len() / 4,
935                    signature: sig,
936                });
937            }
938        }
939        "class_declaration" | "object_declaration" => {
940            if let Some(name) = find_child_text(&node, "type_identifier", source)
941                .or_else(|| find_child_text(&node, "simple_identifier", source))
942            {
943                let sig = extract_signature(&node, source);
944                symbols.push(Symbol {
945                    name,
946                    kind: SymbolKind::Class,
947                    file: file.clone(),
948                    line: node.start_position().row as u32 + 1,
949                    token_cost: sig.len() / 4,
950                    signature: sig,
951                });
952            }
953            let mut cursor = node.walk();
954            for child in node.children(&mut cursor) {
955                collect_kotlin_symbols(child, source, file, true, symbols);
956            }
957            return;
958        }
959        "interface_declaration" => {
960            if let Some(name) = find_child_text(&node, "type_identifier", source) {
961                let sig = extract_signature(&node, source);
962                symbols.push(Symbol {
963                    name,
964                    kind: SymbolKind::Interface,
965                    file: file.clone(),
966                    line: node.start_position().row as u32 + 1,
967                    token_cost: sig.len() / 4,
968                    signature: sig,
969                });
970            }
971            let mut cursor = node.walk();
972            for child in node.children(&mut cursor) {
973                collect_kotlin_symbols(child, source, file, true, symbols);
974            }
975            return;
976        }
977        _ => {}
978    }
979
980    let mut cursor = node.walk();
981    for child in node.children(&mut cursor) {
982        collect_kotlin_symbols(child, source, file, inside_class, symbols);
983    }
984}
985
986fn collect_swift_symbols(
987    node: Node,
988    source: &[u8],
989    file: &PathBuf,
990    inside_class: bool,
991    symbols: &mut Vec<Symbol>,
992) {
993    let kind_str = node.kind();
994
995    match kind_str {
996        "function_declaration" => {
997            if let Some(name) = find_child_text(&node, "simple_identifier", source) {
998                let sig = extract_signature(&node, source);
999                let kind = if inside_class {
1000                    SymbolKind::Method
1001                } else {
1002                    SymbolKind::Function
1003                };
1004                symbols.push(Symbol {
1005                    name,
1006                    kind,
1007                    file: file.clone(),
1008                    line: node.start_position().row as u32 + 1,
1009                    token_cost: sig.len() / 4,
1010                    signature: sig,
1011                });
1012            }
1013        }
1014        "class_declaration" | "struct_declaration" | "enum_declaration" => {
1015            if let Some(name) = find_child_text(&node, "type_identifier", source)
1016                .or_else(|| find_child_text(&node, "simple_identifier", source))
1017            {
1018                let sig = extract_signature(&node, source);
1019                let kind = match kind_str {
1020                    "struct_declaration" => SymbolKind::Struct,
1021                    "enum_declaration" => SymbolKind::Enum,
1022                    _ => SymbolKind::Class,
1023                };
1024                symbols.push(Symbol {
1025                    name,
1026                    kind,
1027                    file: file.clone(),
1028                    line: node.start_position().row as u32 + 1,
1029                    token_cost: sig.len() / 4,
1030                    signature: sig,
1031                });
1032            }
1033            let mut cursor = node.walk();
1034            for child in node.children(&mut cursor) {
1035                collect_swift_symbols(child, source, file, true, symbols);
1036            }
1037            return;
1038        }
1039        "protocol_declaration" => {
1040            if let Some(name) = find_child_text(&node, "type_identifier", source)
1041                .or_else(|| find_child_text(&node, "simple_identifier", source))
1042            {
1043                let sig = extract_signature(&node, source);
1044                symbols.push(Symbol {
1045                    name,
1046                    kind: SymbolKind::Interface,
1047                    file: file.clone(),
1048                    line: node.start_position().row as u32 + 1,
1049                    token_cost: sig.len() / 4,
1050                    signature: sig,
1051                });
1052            }
1053            let mut cursor = node.walk();
1054            for child in node.children(&mut cursor) {
1055                collect_swift_symbols(child, source, file, true, symbols);
1056            }
1057            return;
1058        }
1059        _ => {}
1060    }
1061
1062    let mut cursor = node.walk();
1063    for child in node.children(&mut cursor) {
1064        collect_swift_symbols(child, source, file, inside_class, symbols);
1065    }
1066}
1067
1068/// Find the function name from a function_declarator child node.
1069///
1070/// In C/C++, function definitions have: type function_declarator(params) body
1071/// The declarator contains the identifier.
1072fn find_nested_function_name(node: &Node, source: &[u8]) -> Option<String> {
1073    let mut cursor = node.walk();
1074    for child in node.children(&mut cursor) {
1075        if child.kind() == "function_declarator" {
1076            return find_child_text(&child, "identifier", source)
1077                .or_else(|| find_child_text(&child, "field_identifier", source));
1078        }
1079    }
1080    None
1081}
1082
1083fn collect_references(
1084    node: Node,
1085    source: &[u8],
1086    file: &PathBuf,
1087    enclosing: &Option<String>,
1088    refs: &mut Vec<Reference>,
1089) {
1090    let kind_str = node.kind();
1091
1092    // Track enclosing symbol for context
1093    let new_enclosing = match kind_str {
1094        "function_item" | "function_definition" | "function_declaration" | "method_declaration" => {
1095            find_child_text(&node, "identifier", source)
1096                .or_else(|| find_child_text(&node, "field_identifier", source))
1097        }
1098        _ => None,
1099    };
1100    let current_enclosing = if new_enclosing.is_some() {
1101        &new_enclosing
1102    } else {
1103        enclosing
1104    };
1105
1106    // Collect identifier references (excluding definition sites)
1107    if kind_str == "identifier" || kind_str == "type_identifier" {
1108        let parent_kind = node.parent().map(|p| p.kind().to_string());
1109        let is_definition = matches!(
1110            parent_kind.as_deref(),
1111            Some(
1112                "function_item"
1113                    | "function_definition"
1114                    | "function_declaration"
1115                    | "struct_item"
1116                    | "enum_item"
1117                    | "trait_item"
1118                    | "class_definition"
1119                    | "class_declaration"
1120                    | "method_definition"
1121                    | "variable_declarator"
1122                    | "type_spec"
1123            )
1124        );
1125
1126        if !is_definition {
1127            let name = node_text(&node, source);
1128            if !name.is_empty() {
1129                refs.push(Reference {
1130                    from_file: file.clone(),
1131                    from_symbol: current_enclosing.clone(),
1132                    to_name: name,
1133                    line: node.start_position().row as u32 + 1,
1134                });
1135            }
1136        }
1137    }
1138
1139    let mut cursor = node.walk();
1140    for child in node.children(&mut cursor) {
1141        collect_references(child, source, file, current_enclosing, refs);
1142    }
1143}
1144
1145/// Extract the signature of a node: text from start to opening `{` or `:`.
1146fn extract_signature(node: &Node, source: &[u8]) -> String {
1147    let text = node_text(node, source);
1148
1149    // Find the opening brace or colon (for Python)
1150    let sig = if let Some(pos) = text.find('{') {
1151        &text[..pos]
1152    } else if let Some(pos) = text.find(':') {
1153        // Python uses : instead of {
1154        &text[..pos]
1155    } else {
1156        &text
1157    };
1158
1159    // Collapse whitespace and trim
1160    let collapsed: String = sig.split_whitespace().collect::<Vec<_>>().join(" ");
1161
1162    collapsed
1163}
1164
1165fn node_text(node: &Node, source: &[u8]) -> String {
1166    let start = node.start_byte();
1167    let end = node.end_byte();
1168    if start >= source.len() || end > source.len() {
1169        return String::new();
1170    }
1171    String::from_utf8_lossy(&source[start..end]).to_string()
1172}
1173
1174fn find_child_text(node: &Node, kind: &str, source: &[u8]) -> Option<String> {
1175    let mut cursor = node.walk();
1176    for child in node.children(&mut cursor) {
1177        if child.kind() == kind {
1178            let text = node_text(&child, source);
1179            if !text.is_empty() {
1180                return Some(text);
1181            }
1182        }
1183    }
1184    None
1185}
1186
1187fn child_has_kind(node: &Node, kind: &str) -> bool {
1188    let mut cursor = node.walk();
1189    for child in node.children(&mut cursor) {
1190        if child.kind() == kind {
1191            return true;
1192        }
1193    }
1194    false
1195}
1196
1197#[cfg(test)]
1198mod tests {
1199    use super::*;
1200
1201    fn make_rust_file() -> SourceFile {
1202        SourceFile {
1203            path: PathBuf::from("src/lib.rs"),
1204            language: Language::Rust,
1205            content: r#"
1206pub fn top_level(x: i32) -> bool {
1207    x > 0
1208}
1209
1210pub struct Config {
1211    name: String,
1212    value: u32,
1213}
1214
1215pub enum Color {
1216    Red,
1217    Green,
1218    Blue,
1219}
1220
1221pub trait Drawable {
1222    fn draw(&self);
1223}
1224
1225impl Config {
1226    pub fn new(name: String) -> Self {
1227        Self { name, value: 0 }
1228    }
1229}
1230"#
1231            .to_string(),
1232        }
1233    }
1234
1235    fn make_python_file() -> SourceFile {
1236        SourceFile {
1237            path: PathBuf::from("app.py"),
1238            language: Language::Python,
1239            content: r#"
1240def standalone():
1241    pass
1242
1243class MyClass:
1244    def method(self):
1245        pass
1246
1247    def another(self, x):
1248        return x + 1
1249"#
1250            .to_string(),
1251        }
1252    }
1253
1254    fn make_typescript_file() -> SourceFile {
1255        SourceFile {
1256            path: PathBuf::from("app.ts"),
1257            language: Language::TypeScript,
1258            content: r#"
1259function greet(name: string): string {
1260    return `Hello ${name}`;
1261}
1262
1263class Greeter {
1264    sayHello() {
1265        console.log("hello");
1266    }
1267}
1268
1269const add = (a: number, b: number) => a + b;
1270"#
1271            .to_string(),
1272        }
1273    }
1274
1275    #[test]
1276    fn parse_rust_symbols() {
1277        let file = make_rust_file();
1278        let symbols = extract_symbols(&file).unwrap();
1279
1280        let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1281        assert!(names.contains(&"top_level"), "missing top_level: {names:?}");
1282        assert!(
1283            names.contains(&"Config"),
1284            "missing Config struct: {names:?}"
1285        );
1286        assert!(names.contains(&"Color"), "missing Color enum: {names:?}");
1287        assert!(
1288            names.contains(&"Drawable"),
1289            "missing Drawable trait: {names:?}"
1290        );
1291        assert!(names.contains(&"new"), "missing new method: {names:?}");
1292
1293        // Check kinds
1294        let top = symbols.iter().find(|s| s.name == "top_level").unwrap();
1295        assert_eq!(top.kind, SymbolKind::Function);
1296
1297        let config_struct = symbols
1298            .iter()
1299            .find(|s| s.name == "Config" && s.kind == SymbolKind::Struct)
1300            .unwrap();
1301        assert_eq!(config_struct.kind, SymbolKind::Struct);
1302
1303        let new_method = symbols.iter().find(|s| s.name == "new").unwrap();
1304        assert_eq!(new_method.kind, SymbolKind::Method);
1305
1306        // Check signatures have content
1307        assert!(top.signature.contains("fn top_level"));
1308        assert!(top.token_cost > 0);
1309    }
1310
1311    #[test]
1312    fn parse_python_symbols() {
1313        let file = make_python_file();
1314        let symbols = extract_symbols(&file).unwrap();
1315
1316        let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1317        assert!(
1318            names.contains(&"standalone"),
1319            "missing standalone: {names:?}"
1320        );
1321        assert!(names.contains(&"MyClass"), "missing MyClass: {names:?}");
1322        assert!(names.contains(&"method"), "missing method: {names:?}");
1323        assert!(names.contains(&"another"), "missing another: {names:?}");
1324
1325        let standalone = symbols.iter().find(|s| s.name == "standalone").unwrap();
1326        assert_eq!(standalone.kind, SymbolKind::Function);
1327
1328        let method = symbols.iter().find(|s| s.name == "method").unwrap();
1329        assert_eq!(method.kind, SymbolKind::Method);
1330    }
1331
1332    #[test]
1333    fn parse_typescript_symbols() {
1334        let file = make_typescript_file();
1335        let symbols = extract_symbols(&file).unwrap();
1336
1337        let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1338        assert!(names.contains(&"greet"), "missing greet: {names:?}");
1339        assert!(names.contains(&"Greeter"), "missing Greeter: {names:?}");
1340        assert!(names.contains(&"sayHello"), "missing sayHello: {names:?}");
1341        assert!(names.contains(&"add"), "missing add arrow fn: {names:?}");
1342    }
1343
1344    #[test]
1345    fn parse_empty_file() {
1346        let file = SourceFile {
1347            path: PathBuf::from("empty.rs"),
1348            language: Language::Rust,
1349            content: String::new(),
1350        };
1351        let symbols = extract_symbols(&file).unwrap();
1352        assert!(symbols.is_empty());
1353    }
1354
1355    #[test]
1356    fn parse_file_with_syntax_errors_gives_partial_results() {
1357        let file = SourceFile {
1358            path: PathBuf::from("broken.rs"),
1359            language: Language::Rust,
1360            content: r#"
1361fn valid_fn() -> bool { true }
1362
1363fn broken( {
1364
1365struct ValidStruct {
1366    x: i32,
1367}
1368"#
1369            .to_string(),
1370        };
1371        let symbols = extract_symbols(&file).unwrap();
1372        // tree-sitter is error-tolerant: we should still get valid_fn and ValidStruct
1373        let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1374        assert!(
1375            names.contains(&"valid_fn"),
1376            "should extract valid symbols despite errors: {names:?}"
1377        );
1378    }
1379
1380    #[test]
1381    fn extract_references_finds_calls() {
1382        let file = SourceFile {
1383            path: PathBuf::from("main.rs"),
1384            language: Language::Rust,
1385            content: r#"
1386fn caller() {
1387    helper();
1388    let x = Config::new();
1389}
1390
1391fn helper() {}
1392"#
1393            .to_string(),
1394        };
1395        let refs = extract_references(&file).unwrap();
1396        let ref_names: Vec<&str> = refs.iter().map(|r| r.to_name.as_str()).collect();
1397        assert!(
1398            ref_names.contains(&"helper"),
1399            "should find reference to helper: {ref_names:?}"
1400        );
1401        assert!(
1402            ref_names.contains(&"Config"),
1403            "should find reference to Config: {ref_names:?}"
1404        );
1405    }
1406
1407    #[test]
1408    fn parse_java_file() {
1409        let file = SourceFile {
1410            path: PathBuf::from("Main.java"),
1411            language: Language::Java,
1412            content: r#"
1413public class Main {
1414    public static void main(String[] args) {
1415        System.out.println("Hello");
1416    }
1417    public int add(int a, int b) {
1418        return a + b;
1419    }
1420}
1421
1422interface Runnable {
1423    void run();
1424}
1425
1426enum Color { RED, GREEN, BLUE }
1427"#
1428            .to_string(),
1429        };
1430        let symbols = extract_symbols(&file).unwrap();
1431        let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1432        assert!(names.contains(&"Main"), "should find class Main: {names:?}");
1433        assert!(
1434            names.contains(&"main"),
1435            "should find method main: {names:?}"
1436        );
1437        assert!(names.contains(&"add"), "should find method add: {names:?}");
1438        assert!(
1439            names.contains(&"Runnable"),
1440            "should find interface Runnable: {names:?}"
1441        );
1442        assert!(
1443            names.contains(&"Color"),
1444            "should find enum Color: {names:?}"
1445        );
1446    }
1447
1448    #[test]
1449    fn parse_c_file() {
1450        let file = SourceFile {
1451            path: PathBuf::from("main.c"),
1452            language: Language::C,
1453            content: r#"
1454struct Point {
1455    int x;
1456    int y;
1457};
1458
1459enum Direction { NORTH, SOUTH, EAST, WEST };
1460
1461int add(int a, int b) {
1462    return a + b;
1463}
1464
1465int main() {
1466    return 0;
1467}
1468"#
1469            .to_string(),
1470        };
1471        let symbols = extract_symbols(&file).unwrap();
1472        let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1473        assert!(
1474            names.contains(&"Point"),
1475            "should find struct Point: {names:?}"
1476        );
1477        assert!(
1478            names.contains(&"Direction"),
1479            "should find enum Direction: {names:?}"
1480        );
1481        assert!(
1482            names.contains(&"add"),
1483            "should find function add: {names:?}"
1484        );
1485        assert!(
1486            names.contains(&"main"),
1487            "should find function main: {names:?}"
1488        );
1489    }
1490
1491    #[test]
1492    fn parse_cpp_file() {
1493        let file = SourceFile {
1494            path: PathBuf::from("main.cpp"),
1495            language: Language::Cpp,
1496            content: r#"
1497class Calculator {
1498public:
1499    int add(int a, int b) {
1500        return a + b;
1501    }
1502};
1503
1504struct Point {
1505    int x, y;
1506};
1507
1508int main() {
1509    return 0;
1510}
1511"#
1512            .to_string(),
1513        };
1514        let symbols = extract_symbols(&file).unwrap();
1515        let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1516        assert!(
1517            names.contains(&"Calculator"),
1518            "should find class Calculator: {names:?}"
1519        );
1520        assert!(names.contains(&"add"), "should find method add: {names:?}");
1521        assert!(
1522            names.contains(&"Point"),
1523            "should find struct Point: {names:?}"
1524        );
1525        assert!(
1526            names.contains(&"main"),
1527            "should find function main: {names:?}"
1528        );
1529    }
1530
1531    #[test]
1532    fn parse_ruby_file() {
1533        let file = SourceFile {
1534            path: PathBuf::from("app.rb"),
1535            language: Language::Ruby,
1536            content: r#"
1537module MyApp
1538  class Calculator
1539    def add(a, b)
1540      a + b
1541    end
1542
1543    def subtract(a, b)
1544      a - b
1545    end
1546  end
1547end
1548
1549def standalone_function
1550  puts "hello"
1551end
1552"#
1553            .to_string(),
1554        };
1555        let symbols = extract_symbols(&file).unwrap();
1556        let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1557        assert!(
1558            names.contains(&"MyApp"),
1559            "should find module MyApp: {names:?}"
1560        );
1561        assert!(
1562            names.contains(&"Calculator"),
1563            "should find class Calculator: {names:?}"
1564        );
1565        assert!(names.contains(&"add"), "should find method add: {names:?}");
1566        assert!(
1567            names.contains(&"subtract"),
1568            "should find method subtract: {names:?}"
1569        );
1570        assert!(
1571            names.contains(&"standalone_function"),
1572            "should find function standalone_function: {names:?}"
1573        );
1574    }
1575}