infiniloom_engine/parser/
extraction.rs

1//! Symbol extraction utilities for parsing
2//!
3//! This module contains standalone functions for extracting metadata from AST nodes:
4//! - Signatures
5//! - Docstrings
6//! - Visibility modifiers
7//! - Function calls
8//! - Inheritance relationships
9
10use super::language::Language;
11use crate::types::{SymbolKind, Visibility};
12use std::collections::HashSet;
13use tree_sitter::Node;
14
15/// Find a safe character boundary at or before the given byte index.
16/// This prevents panics when slicing strings with multi-byte UTF-8 characters.
17fn safe_char_boundary(s: &str, mut index: usize) -> usize {
18    if index >= s.len() {
19        return s.len();
20    }
21    // Walk backwards to find a valid char boundary
22    while index > 0 && !s.is_char_boundary(index) {
23        index -= 1;
24    }
25    index
26}
27
28/// Extract function/method signature
29pub fn extract_signature(node: Node<'_>, source_code: &str, language: Language) -> Option<String> {
30    let sig_node = match language {
31        Language::Python => {
32            if node.kind() == "function_definition" {
33                let start = node.start_byte();
34                let mut end = start;
35                for byte in &source_code.as_bytes()[start..] {
36                    end += 1;
37                    if *byte == b':' || *byte == b'\n' {
38                        break;
39                    }
40                }
41                return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
42            }
43            None
44        },
45        Language::JavaScript | Language::TypeScript => {
46            if node.kind().contains("function") || node.kind().contains("method") {
47                let start = node.start_byte();
48                let mut end = start;
49                let mut brace_count = 0;
50                for byte in &source_code.as_bytes()[start..] {
51                    if *byte == b'{' {
52                        brace_count += 1;
53                        if brace_count == 1 {
54                            break;
55                        }
56                    }
57                    end += 1;
58                }
59                return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
60            }
61            None
62        },
63        Language::Rust => {
64            if node.kind() == "function_item" {
65                for child in node.children(&mut node.walk()) {
66                    if child.kind() == "block" {
67                        let start = node.start_byte();
68                        let end = child.start_byte();
69                        return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
70                    }
71                }
72            }
73            None
74        },
75        Language::Go => {
76            if node.kind() == "function_declaration" || node.kind() == "method_declaration" {
77                for child in node.children(&mut node.walk()) {
78                    if child.kind() == "block" {
79                        let start = node.start_byte();
80                        let end = child.start_byte();
81                        return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
82                    }
83                }
84            }
85            None
86        },
87        Language::Java => {
88            if node.kind() == "method_declaration" {
89                for child in node.children(&mut node.walk()) {
90                    if child.kind() == "block" {
91                        let start = node.start_byte();
92                        let end = child.start_byte();
93                        return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
94                    }
95                }
96            }
97            None
98        },
99        Language::C
100        | Language::Cpp
101        | Language::CSharp
102        | Language::Php
103        | Language::Kotlin
104        | Language::Swift
105        | Language::Scala => {
106            for child in node.children(&mut node.walk()) {
107                if child.kind() == "block"
108                    || child.kind() == "compound_statement"
109                    || child.kind() == "function_body"
110                {
111                    let start = node.start_byte();
112                    let end = child.start_byte();
113                    return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
114                }
115            }
116            None
117        },
118        Language::Ruby | Language::Lua => {
119            let start = node.start_byte();
120            let mut end = start;
121            for byte in &source_code.as_bytes()[start..] {
122                end += 1;
123                if *byte == b'\n' {
124                    break;
125                }
126            }
127            Some(source_code[start..end].trim().to_owned())
128        },
129        Language::Bash => {
130            let start = node.start_byte();
131            let mut end = start;
132            for byte in &source_code.as_bytes()[start..] {
133                if *byte == b'{' {
134                    break;
135                }
136                end += 1;
137            }
138            Some(source_code[start..end].trim().to_owned())
139        },
140        Language::Haskell
141        | Language::OCaml
142        | Language::FSharp
143        | Language::Elixir
144        | Language::Clojure
145        | Language::R => {
146            let start = node.start_byte();
147            let mut end = start;
148            for byte in &source_code.as_bytes()[start..] {
149                end += 1;
150                if *byte == b'\n' || *byte == b'=' {
151                    break;
152                }
153            }
154            Some(source_code[start..end].trim().to_owned())
155        },
156    };
157
158    sig_node.or_else(|| {
159        let start = node.start_byte();
160        let end = std::cmp::min(start + 200, source_code.len());
161        // Ensure we slice at valid UTF-8 character boundaries
162        let safe_start = safe_char_boundary(source_code, start);
163        let safe_end = safe_char_boundary(source_code, end);
164        if safe_start >= safe_end {
165            return None;
166        }
167        let text = &source_code[safe_start..safe_end];
168        text.lines().next().map(|s| s.trim().to_owned())
169    })
170}
171
172/// Extract docstring/documentation comment
173pub fn extract_docstring(node: Node<'_>, source_code: &str, language: Language) -> Option<String> {
174    match language {
175        Language::Python => {
176            let mut cursor = node.walk();
177            for child in node.children(&mut cursor) {
178                if child.kind() == "block" {
179                    for stmt in child.children(&mut child.walk()) {
180                        if stmt.kind() == "expression_statement" {
181                            for expr in stmt.children(&mut stmt.walk()) {
182                                if expr.kind() == "string" {
183                                    if let Ok(text) = expr.utf8_text(source_code.as_bytes()) {
184                                        return Some(
185                                            text.trim_matches(|c| c == '"' || c == '\'')
186                                                .trim()
187                                                .to_owned(),
188                                        );
189                                    }
190                                }
191                            }
192                        }
193                    }
194                }
195            }
196            None
197        },
198        Language::JavaScript | Language::TypeScript => {
199            if let Some(prev_sibling) = node.prev_sibling() {
200                if prev_sibling.kind() == "comment" {
201                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
202                        if text.starts_with("/**") {
203                            return Some(clean_jsdoc(text));
204                        }
205                    }
206                }
207            }
208            None
209        },
210        Language::Rust => {
211            let start_byte = node.start_byte();
212            let lines_before: Vec<_> = source_code[..start_byte]
213                .lines()
214                .rev()
215                .take_while(|line| line.trim().starts_with("///") || line.trim().is_empty())
216                .collect();
217
218            if !lines_before.is_empty() {
219                let doc: Vec<String> = lines_before
220                    .into_iter()
221                    .rev()
222                    .filter_map(|line| {
223                        let trimmed = line.trim();
224                        trimmed.strip_prefix("///").map(|s| s.trim().to_owned())
225                    })
226                    .collect();
227
228                if !doc.is_empty() {
229                    return Some(doc.join(" "));
230                }
231            }
232            None
233        },
234        Language::Go => {
235            if let Some(prev_sibling) = node.prev_sibling() {
236                if prev_sibling.kind() == "comment" {
237                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
238                        return Some(text.trim_start_matches("//").trim().to_owned());
239                    }
240                }
241            }
242            None
243        },
244        Language::Java => {
245            if let Some(prev_sibling) = node.prev_sibling() {
246                if prev_sibling.kind() == "block_comment" {
247                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
248                        if text.starts_with("/**") {
249                            return Some(clean_javadoc(text));
250                        }
251                    }
252                }
253            }
254            None
255        },
256        Language::C | Language::Cpp => {
257            if let Some(prev_sibling) = node.prev_sibling() {
258                if prev_sibling.kind() == "comment" {
259                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
260                        if text.starts_with("/**") || text.starts_with("/*") {
261                            return Some(clean_jsdoc(text));
262                        }
263                        return Some(text.trim_start_matches("//").trim().to_owned());
264                    }
265                }
266            }
267            None
268        },
269        Language::CSharp => {
270            let start_byte = node.start_byte();
271            let lines_before: Vec<_> = source_code[..start_byte]
272                .lines()
273                .rev()
274                .take_while(|line| line.trim().starts_with("///") || line.trim().is_empty())
275                .collect();
276
277            if !lines_before.is_empty() {
278                let doc: Vec<String> = lines_before
279                    .into_iter()
280                    .rev()
281                    .filter_map(|line| {
282                        let trimmed = line.trim();
283                        trimmed.strip_prefix("///").map(|s| s.trim().to_owned())
284                    })
285                    .collect();
286
287                if !doc.is_empty() {
288                    return Some(doc.join(" "));
289                }
290            }
291            None
292        },
293        Language::Ruby => {
294            if let Some(prev_sibling) = node.prev_sibling() {
295                if prev_sibling.kind() == "comment" {
296                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
297                        return Some(text.trim_start_matches('#').trim().to_owned());
298                    }
299                }
300            }
301            None
302        },
303        Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
304            if let Some(prev_sibling) = node.prev_sibling() {
305                let kind = prev_sibling.kind();
306                if kind == "comment" || kind == "multiline_comment" || kind == "block_comment" {
307                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
308                        if text.starts_with("/**") {
309                            return Some(clean_jsdoc(text));
310                        }
311                    }
312                }
313            }
314            None
315        },
316        Language::Bash => {
317            if let Some(prev_sibling) = node.prev_sibling() {
318                if prev_sibling.kind() == "comment" {
319                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
320                        return Some(text.trim_start_matches('#').trim().to_owned());
321                    }
322                }
323            }
324            None
325        },
326        Language::Haskell => {
327            if let Some(prev_sibling) = node.prev_sibling() {
328                if prev_sibling.kind() == "comment" {
329                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
330                        let cleaned = text
331                            .trim_start_matches("{-")
332                            .trim_end_matches("-}")
333                            .trim_start_matches("--")
334                            .trim();
335                        return Some(cleaned.to_owned());
336                    }
337                }
338            }
339            None
340        },
341        Language::Elixir => {
342            if let Some(prev_sibling) = node.prev_sibling() {
343                if prev_sibling.kind() == "comment" {
344                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
345                        return Some(text.trim_start_matches('#').trim().to_owned());
346                    }
347                }
348            }
349            None
350        },
351        Language::Clojure => None,
352        Language::OCaml | Language::FSharp => {
353            if let Some(prev_sibling) = node.prev_sibling() {
354                if prev_sibling.kind() == "comment" {
355                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
356                        let cleaned = text
357                            .trim_start_matches("(**")
358                            .trim_start_matches("(*")
359                            .trim_end_matches("*)")
360                            .trim();
361                        return Some(cleaned.to_owned());
362                    }
363                }
364            }
365            None
366        },
367        Language::Lua => {
368            if let Some(prev_sibling) = node.prev_sibling() {
369                if prev_sibling.kind() == "comment" {
370                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
371                        let cleaned = text
372                            .trim_start_matches("--[[")
373                            .trim_end_matches("]]")
374                            .trim_start_matches("--")
375                            .trim();
376                        return Some(cleaned.to_owned());
377                    }
378                }
379            }
380            None
381        },
382        Language::R => {
383            if let Some(prev_sibling) = node.prev_sibling() {
384                if prev_sibling.kind() == "comment" {
385                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
386                        return Some(text.trim_start_matches('#').trim().to_owned());
387                    }
388                }
389            }
390            None
391        },
392    }
393}
394
395/// Extract parent class/struct name for methods
396pub fn extract_parent(node: Node<'_>, source_code: &str) -> Option<String> {
397    let mut current = node.parent()?;
398
399    while let Some(parent) = current.parent() {
400        if ["class_definition", "class_declaration", "struct_item", "impl_item"]
401            .contains(&parent.kind())
402        {
403            for child in parent.children(&mut parent.walk()) {
404                if child.kind() == "identifier" || child.kind() == "type_identifier" {
405                    if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
406                        return Some(name.to_owned());
407                    }
408                }
409            }
410        }
411        current = parent;
412    }
413
414    None
415}
416
417/// Extract visibility modifier from a node
418pub fn extract_visibility(node: Node<'_>, source_code: &str, language: Language) -> Visibility {
419    match language {
420        Language::Python => {
421            if let Some(name_node) = node.child_by_field_name("name") {
422                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
423                    if name.starts_with("__") && !name.ends_with("__") {
424                        return Visibility::Private;
425                    } else if name.starts_with('_') {
426                        return Visibility::Protected;
427                    }
428                }
429            }
430            Visibility::Public
431        },
432        Language::Rust => {
433            for child in node.children(&mut node.walk()) {
434                if child.kind() == "visibility_modifier" {
435                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
436                        if text.contains("pub(crate)") || text.contains("pub(super)") {
437                            return Visibility::Internal;
438                        } else if text.starts_with("pub") {
439                            return Visibility::Public;
440                        }
441                    }
442                }
443            }
444            Visibility::Private
445        },
446        Language::JavaScript | Language::TypeScript => {
447            for child in node.children(&mut node.walk()) {
448                let kind = child.kind();
449                if kind == "private" || kind == "accessibility_modifier" {
450                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
451                        return match text {
452                            "private" => Visibility::Private,
453                            "protected" => Visibility::Protected,
454                            _ => Visibility::Public,
455                        };
456                    }
457                }
458            }
459            if let Some(name_node) = node.child_by_field_name("name") {
460                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
461                    if name.starts_with('#') {
462                        return Visibility::Private;
463                    }
464                }
465            }
466            Visibility::Public
467        },
468        Language::Go => {
469            if let Some(name_node) = node.child_by_field_name("name") {
470                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
471                    if let Some(first_char) = name.chars().next() {
472                        if first_char.is_lowercase() {
473                            return Visibility::Private;
474                        }
475                    }
476                }
477            }
478            Visibility::Public
479        },
480        Language::Java => {
481            for child in node.children(&mut node.walk()) {
482                if child.kind() == "modifiers" {
483                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
484                        if text.contains("private") {
485                            return Visibility::Private;
486                        } else if text.contains("protected") {
487                            return Visibility::Protected;
488                        } else if text.contains("public") {
489                            return Visibility::Public;
490                        }
491                    }
492                }
493            }
494            Visibility::Internal
495        },
496        Language::C | Language::Cpp => {
497            for child in node.children(&mut node.walk()) {
498                if child.kind() == "storage_class_specifier" {
499                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
500                        if text == "static" {
501                            return Visibility::Private;
502                        }
503                    }
504                }
505            }
506            Visibility::Public
507        },
508        Language::CSharp | Language::Kotlin | Language::Swift | Language::Scala => {
509            for child in node.children(&mut node.walk()) {
510                let kind = child.kind();
511                if kind == "modifier" || kind == "modifiers" || kind == "visibility_modifier" {
512                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
513                        if text.contains("private") {
514                            return Visibility::Private;
515                        } else if text.contains("protected") {
516                            return Visibility::Protected;
517                        } else if text.contains("internal") {
518                            return Visibility::Internal;
519                        } else if text.contains("public") {
520                            return Visibility::Public;
521                        }
522                    }
523                }
524            }
525            Visibility::Internal
526        },
527        Language::Ruby => {
528            if let Some(name_node) = node.child_by_field_name("name") {
529                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
530                    if name.starts_with('_') {
531                        return Visibility::Private;
532                    }
533                }
534            }
535            Visibility::Public
536        },
537        Language::Php => {
538            for child in node.children(&mut node.walk()) {
539                if child.kind() == "visibility_modifier" {
540                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
541                        return match text {
542                            "private" => Visibility::Private,
543                            "protected" => Visibility::Protected,
544                            "public" => Visibility::Public,
545                            _ => Visibility::Public,
546                        };
547                    }
548                }
549            }
550            Visibility::Public
551        },
552        Language::Bash => Visibility::Public,
553        Language::Haskell
554        | Language::Elixir
555        | Language::Clojure
556        | Language::OCaml
557        | Language::FSharp
558        | Language::Lua
559        | Language::R => Visibility::Public,
560    }
561}
562
563/// Extract function calls from a function/method body
564pub fn extract_calls(node: Node<'_>, source_code: &str, language: Language) -> Vec<String> {
565    let mut calls = HashSet::new();
566
567    let body_node = find_body_node(node, language);
568    if let Some(body) = body_node {
569        collect_calls_recursive(body, source_code, language, &mut calls);
570    }
571
572    if calls.is_empty() {
573        collect_calls_recursive(node, source_code, language, &mut calls);
574    }
575
576    calls.into_iter().collect()
577}
578
579/// Find the body node of a function/method
580pub fn find_body_node(node: Node<'_>, language: Language) -> Option<Node<'_>> {
581    match language {
582        Language::Python => {
583            for child in node.children(&mut node.walk()) {
584                if child.kind() == "block" {
585                    return Some(child);
586                }
587            }
588        },
589        Language::Rust => {
590            for child in node.children(&mut node.walk()) {
591                if child.kind() == "block" {
592                    return Some(child);
593                }
594            }
595        },
596        Language::JavaScript | Language::TypeScript => {
597            for child in node.children(&mut node.walk()) {
598                let kind = child.kind();
599                if kind == "statement_block" {
600                    return Some(child);
601                }
602                if kind == "arrow_function" {
603                    if let Some(body) = find_body_node(child, language) {
604                        return Some(body);
605                    }
606                    return Some(child);
607                }
608            }
609            if node.kind() == "arrow_function" {
610                for child in node.children(&mut node.walk()) {
611                    let kind = child.kind();
612                    if kind != "formal_parameters"
613                        && kind != "identifier"
614                        && kind != "=>"
615                        && kind != "("
616                        && kind != ")"
617                        && kind != ","
618                    {
619                        return Some(child);
620                    }
621                }
622                return Some(node);
623            }
624        },
625        Language::Go => {
626            for child in node.children(&mut node.walk()) {
627                if child.kind() == "block" {
628                    return Some(child);
629                }
630            }
631        },
632        Language::Java => {
633            for child in node.children(&mut node.walk()) {
634                if child.kind() == "block" {
635                    return Some(child);
636                }
637            }
638        },
639        Language::C | Language::Cpp => {
640            for child in node.children(&mut node.walk()) {
641                if child.kind() == "compound_statement" {
642                    return Some(child);
643                }
644            }
645        },
646        Language::CSharp | Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
647            for child in node.children(&mut node.walk()) {
648                let kind = child.kind();
649                if kind == "block" || kind == "compound_statement" || kind == "function_body" {
650                    return Some(child);
651                }
652            }
653        },
654        Language::Ruby => {
655            for child in node.children(&mut node.walk()) {
656                if child.kind() == "body_statement" || child.kind() == "do_block" {
657                    return Some(child);
658                }
659            }
660        },
661        Language::Bash => {
662            for child in node.children(&mut node.walk()) {
663                if child.kind() == "compound_statement" {
664                    return Some(child);
665                }
666            }
667        },
668        Language::Haskell
669        | Language::Elixir
670        | Language::Clojure
671        | Language::OCaml
672        | Language::FSharp
673        | Language::R => {
674            return Some(node);
675        },
676        Language::Lua => {
677            for child in node.children(&mut node.walk()) {
678                if child.kind() == "block" {
679                    return Some(child);
680                }
681            }
682        },
683    }
684    None
685}
686
687/// Recursively collect function calls from a node
688pub fn collect_calls_recursive(
689    node: Node<'_>,
690    source_code: &str,
691    language: Language,
692    calls: &mut HashSet<String>,
693) {
694    let kind = node.kind();
695
696    let call_name = match language {
697        Language::Python => {
698            if kind == "call" {
699                node.child_by_field_name("function").and_then(|f| {
700                    if f.kind() == "identifier" {
701                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
702                    } else if f.kind() == "attribute" {
703                        f.child_by_field_name("attribute")
704                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
705                            .map(String::from)
706                    } else {
707                        None
708                    }
709                })
710            } else {
711                None
712            }
713        },
714        Language::Rust => {
715            if kind == "call_expression" {
716                node.child_by_field_name("function").and_then(|f| {
717                    if f.kind() == "identifier" {
718                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
719                    } else if f.kind() == "field_expression" {
720                        f.child_by_field_name("field")
721                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
722                            .map(String::from)
723                    } else if f.kind() == "scoped_identifier" {
724                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
725                    } else {
726                        None
727                    }
728                })
729            } else if kind == "macro_invocation" {
730                node.child_by_field_name("macro")
731                    .and_then(|m| m.utf8_text(source_code.as_bytes()).ok())
732                    .map(|s| format!("{}!", s))
733            } else {
734                None
735            }
736        },
737        Language::JavaScript | Language::TypeScript => {
738            if kind == "call_expression" {
739                node.child_by_field_name("function").and_then(|f| {
740                    if f.kind() == "identifier" {
741                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
742                    } else if f.kind() == "member_expression" {
743                        f.child_by_field_name("property")
744                            .and_then(|p| p.utf8_text(source_code.as_bytes()).ok())
745                            .map(String::from)
746                    } else {
747                        None
748                    }
749                })
750            } else {
751                None
752            }
753        },
754        Language::Go => {
755            if kind == "call_expression" {
756                node.child_by_field_name("function").and_then(|f| {
757                    if f.kind() == "identifier" {
758                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
759                    } else if f.kind() == "selector_expression" {
760                        f.child_by_field_name("field")
761                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
762                            .map(String::from)
763                    } else {
764                        None
765                    }
766                })
767            } else {
768                None
769            }
770        },
771        Language::Java => {
772            if kind == "method_invocation" {
773                node.child_by_field_name("name")
774                    .and_then(|n| n.utf8_text(source_code.as_bytes()).ok())
775                    .map(String::from)
776            } else {
777                None
778            }
779        },
780        Language::C | Language::Cpp => {
781            if kind == "call_expression" {
782                node.child_by_field_name("function").and_then(|f| {
783                    if f.kind() == "identifier" {
784                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
785                    } else if f.kind() == "field_expression" {
786                        f.child_by_field_name("field")
787                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
788                            .map(String::from)
789                    } else {
790                        None
791                    }
792                })
793            } else {
794                None
795            }
796        },
797        Language::CSharp | Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
798            if kind == "invocation_expression" || kind == "call_expression" {
799                node.children(&mut node.walk())
800                    .find(|child| child.kind() == "identifier" || child.kind() == "simple_name")
801                    .and_then(|child| child.utf8_text(source_code.as_bytes()).ok())
802                    .map(|s| s.to_owned())
803            } else {
804                None
805            }
806        },
807        Language::Ruby => {
808            if kind == "call" || kind == "method_call" {
809                node.child_by_field_name("method")
810                    .and_then(|m| m.utf8_text(source_code.as_bytes()).ok())
811                    .map(String::from)
812            } else {
813                None
814            }
815        },
816        Language::Bash => {
817            if kind == "command" {
818                node.child_by_field_name("name")
819                    .and_then(|n| n.utf8_text(source_code.as_bytes()).ok())
820                    .map(String::from)
821            } else {
822                None
823            }
824        },
825        Language::Haskell
826        | Language::Elixir
827        | Language::Clojure
828        | Language::OCaml
829        | Language::FSharp
830        | Language::Lua
831        | Language::R => {
832            if kind == "function_call" || kind == "call" || kind == "application" {
833                node.children(&mut node.walk())
834                    .find(|child| child.kind() == "identifier" || child.kind() == "variable")
835                    .and_then(|child| child.utf8_text(source_code.as_bytes()).ok())
836                    .map(|s| s.to_owned())
837            } else {
838                None
839            }
840        },
841    };
842
843    if let Some(name) = call_name {
844        if !is_builtin(&name, language) {
845            calls.insert(name);
846        }
847    }
848
849    for child in node.children(&mut node.walk()) {
850        collect_calls_recursive(child, source_code, language, calls);
851    }
852}
853
854/// Check if a function name is a common built-in
855pub fn is_builtin(name: &str, language: Language) -> bool {
856    match language {
857        Language::Python => {
858            matches!(
859                name,
860                "print"
861                    | "len"
862                    | "range"
863                    | "str"
864                    | "int"
865                    | "float"
866                    | "list"
867                    | "dict"
868                    | "set"
869                    | "tuple"
870                    | "bool"
871                    | "type"
872                    | "isinstance"
873                    | "hasattr"
874                    | "getattr"
875                    | "setattr"
876                    | "super"
877                    | "iter"
878                    | "next"
879                    | "open"
880                    | "input"
881                    | "format"
882                    | "enumerate"
883                    | "zip"
884                    | "map"
885                    | "filter"
886                    | "sorted"
887                    | "reversed"
888                    | "sum"
889                    | "min"
890                    | "max"
891                    | "abs"
892                    | "round"
893                    | "ord"
894                    | "chr"
895                    | "hex"
896                    | "bin"
897                    | "oct"
898            )
899        },
900        Language::JavaScript | Language::TypeScript => {
901            matches!(
902                name,
903                "console"
904                    | "log"
905                    | "error"
906                    | "warn"
907                    | "parseInt"
908                    | "parseFloat"
909                    | "setTimeout"
910                    | "setInterval"
911                    | "clearTimeout"
912                    | "clearInterval"
913                    | "JSON"
914                    | "stringify"
915                    | "parse"
916                    | "toString"
917                    | "valueOf"
918                    | "push"
919                    | "pop"
920                    | "shift"
921                    | "unshift"
922                    | "slice"
923                    | "splice"
924                    | "map"
925                    | "filter"
926                    | "reduce"
927                    | "forEach"
928                    | "find"
929                    | "findIndex"
930                    | "includes"
931                    | "indexOf"
932                    | "join"
933                    | "split"
934                    | "replace"
935            )
936        },
937        Language::Rust => {
938            matches!(
939                name,
940                "println!"
941                    | "print!"
942                    | "eprintln!"
943                    | "eprint!"
944                    | "format!"
945                    | "vec!"
946                    | "panic!"
947                    | "assert!"
948                    | "assert_eq!"
949                    | "assert_ne!"
950                    | "debug!"
951                    | "info!"
952                    | "warn!"
953                    | "error!"
954                    | "trace!"
955                    | "unwrap"
956                    | "expect"
957                    | "ok"
958                    | "err"
959                    | "some"
960                    | "none"
961                    | "clone"
962                    | "to_string"
963                    | "into"
964                    | "from"
965                    | "default"
966                    | "iter"
967                    | "into_iter"
968                    | "collect"
969                    | "map"
970                    | "filter"
971            )
972        },
973        Language::Go => {
974            matches!(
975                name,
976                "fmt"
977                    | "Println"
978                    | "Printf"
979                    | "Sprintf"
980                    | "Errorf"
981                    | "make"
982                    | "new"
983                    | "len"
984                    | "cap"
985                    | "append"
986                    | "copy"
987                    | "delete"
988                    | "close"
989                    | "panic"
990                    | "recover"
991                    | "print"
992            )
993        },
994        Language::Java => {
995            matches!(
996                name,
997                "println"
998                    | "print"
999                    | "printf"
1000                    | "toString"
1001                    | "equals"
1002                    | "hashCode"
1003                    | "getClass"
1004                    | "clone"
1005                    | "notify"
1006                    | "wait"
1007                    | "get"
1008                    | "set"
1009                    | "add"
1010                    | "remove"
1011                    | "size"
1012                    | "isEmpty"
1013                    | "contains"
1014                    | "iterator"
1015                    | "valueOf"
1016                    | "parseInt"
1017            )
1018        },
1019        Language::C | Language::Cpp => {
1020            matches!(
1021                name,
1022                "printf"
1023                    | "scanf"
1024                    | "malloc"
1025                    | "free"
1026                    | "memcpy"
1027                    | "memset"
1028                    | "strlen"
1029                    | "strcpy"
1030                    | "strcmp"
1031                    | "strcat"
1032                    | "sizeof"
1033                    | "cout"
1034                    | "cin"
1035                    | "endl"
1036                    | "cerr"
1037                    | "clog"
1038            )
1039        },
1040        Language::CSharp => {
1041            matches!(
1042                name,
1043                "WriteLine"
1044                    | "Write"
1045                    | "ReadLine"
1046                    | "ToString"
1047                    | "Equals"
1048                    | "GetHashCode"
1049                    | "GetType"
1050                    | "Add"
1051                    | "Remove"
1052                    | "Contains"
1053                    | "Count"
1054                    | "Clear"
1055                    | "ToList"
1056                    | "ToArray"
1057            )
1058        },
1059        Language::Ruby => {
1060            matches!(
1061                name,
1062                "puts"
1063                    | "print"
1064                    | "p"
1065                    | "gets"
1066                    | "each"
1067                    | "map"
1068                    | "select"
1069                    | "reject"
1070                    | "reduce"
1071                    | "inject"
1072                    | "find"
1073                    | "any?"
1074                    | "all?"
1075                    | "include?"
1076                    | "empty?"
1077                    | "nil?"
1078                    | "length"
1079                    | "size"
1080            )
1081        },
1082        Language::Php => {
1083            matches!(
1084                name,
1085                "echo"
1086                    | "print"
1087                    | "var_dump"
1088                    | "print_r"
1089                    | "isset"
1090                    | "empty"
1091                    | "array"
1092                    | "count"
1093                    | "strlen"
1094                    | "strpos"
1095                    | "substr"
1096                    | "explode"
1097                    | "implode"
1098                    | "json_encode"
1099                    | "json_decode"
1100            )
1101        },
1102        Language::Kotlin => {
1103            matches!(
1104                name,
1105                "println"
1106                    | "print"
1107                    | "readLine"
1108                    | "toString"
1109                    | "equals"
1110                    | "hashCode"
1111                    | "map"
1112                    | "filter"
1113                    | "forEach"
1114                    | "let"
1115                    | "also"
1116                    | "apply"
1117                    | "run"
1118                    | "with"
1119                    | "listOf"
1120                    | "mapOf"
1121                    | "setOf"
1122            )
1123        },
1124        Language::Swift => {
1125            matches!(
1126                name,
1127                "print"
1128                    | "debugPrint"
1129                    | "dump"
1130                    | "map"
1131                    | "filter"
1132                    | "reduce"
1133                    | "forEach"
1134                    | "contains"
1135                    | "count"
1136                    | "isEmpty"
1137                    | "append"
1138            )
1139        },
1140        Language::Scala => {
1141            matches!(
1142                name,
1143                "println"
1144                    | "print"
1145                    | "map"
1146                    | "filter"
1147                    | "flatMap"
1148                    | "foreach"
1149                    | "reduce"
1150                    | "fold"
1151                    | "foldLeft"
1152                    | "foldRight"
1153                    | "collect"
1154            )
1155        },
1156        Language::Bash
1157        | Language::Haskell
1158        | Language::Elixir
1159        | Language::Clojure
1160        | Language::OCaml
1161        | Language::FSharp
1162        | Language::Lua
1163        | Language::R => false,
1164    }
1165}
1166
1167/// Clean JSDoc comment
1168pub fn clean_jsdoc(text: &str) -> String {
1169    text.lines()
1170        .map(|line| {
1171            line.trim()
1172                .trim_start_matches("/**")
1173                .trim_start_matches("/*")
1174                .trim_start_matches('*')
1175                .trim_end_matches("*/")
1176                .trim()
1177        })
1178        .filter(|line| !line.is_empty())
1179        .collect::<Vec<_>>()
1180        .join(" ")
1181}
1182
1183/// Clean JavaDoc comment
1184pub fn clean_javadoc(text: &str) -> String {
1185    clean_jsdoc(text)
1186}
1187
1188/// Extract class inheritance (extends) and interface implementations (implements)
1189pub fn extract_inheritance(
1190    node: Node<'_>,
1191    source_code: &str,
1192    language: Language,
1193) -> (Option<String>, Vec<String>) {
1194    let mut extends = None;
1195    let mut implements = Vec::new();
1196
1197    match language {
1198        Language::Python => {
1199            // Python: class Foo(Bar, Baz): - all are considered base classes
1200            if node.kind() == "class_definition" {
1201                if let Some(args) = node.child_by_field_name("superclasses") {
1202                    for child in args.children(&mut args.walk()) {
1203                        if child.kind() == "identifier" || child.kind() == "attribute" {
1204                            if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
1205                                if extends.is_none() {
1206                                    extends = Some(name.to_owned());
1207                                } else {
1208                                    implements.push(name.to_owned());
1209                                }
1210                            }
1211                        }
1212                    }
1213                }
1214            }
1215        },
1216        Language::JavaScript | Language::TypeScript => {
1217            // JS/TS: class Foo extends Bar implements Baz
1218            if node.kind() == "class_declaration" || node.kind() == "class" {
1219                for child in node.children(&mut node.walk()) {
1220                    if child.kind() == "class_heritage" {
1221                        for heritage in child.children(&mut child.walk()) {
1222                            if heritage.kind() == "extends_clause" {
1223                                for type_node in heritage.children(&mut heritage.walk()) {
1224                                    if type_node.kind() == "identifier"
1225                                        || type_node.kind() == "type_identifier"
1226                                    {
1227                                        if let Ok(name) =
1228                                            type_node.utf8_text(source_code.as_bytes())
1229                                        {
1230                                            extends = Some(name.to_owned());
1231                                        }
1232                                    }
1233                                }
1234                            } else if heritage.kind() == "implements_clause" {
1235                                for type_node in heritage.children(&mut heritage.walk()) {
1236                                    if type_node.kind() == "identifier"
1237                                        || type_node.kind() == "type_identifier"
1238                                    {
1239                                        if let Ok(name) =
1240                                            type_node.utf8_text(source_code.as_bytes())
1241                                        {
1242                                            implements.push(name.to_owned());
1243                                        }
1244                                    }
1245                                }
1246                            }
1247                        }
1248                    }
1249                }
1250            }
1251        },
1252        Language::Rust => {
1253            // Rust doesn't have class inheritance, but has trait implementations
1254            // impl Trait for Struct
1255            if node.kind() == "impl_item" {
1256                let mut has_for = false;
1257                for child in node.children(&mut node.walk()) {
1258                    if child.kind() == "for" {
1259                        has_for = true;
1260                    }
1261                    if child.kind() == "type_identifier" || child.kind() == "generic_type" {
1262                        if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
1263                            if has_for {
1264                                // This is the struct being implemented
1265                            } else {
1266                                // This is the trait being implemented
1267                                implements.push(name.to_owned());
1268                            }
1269                        }
1270                    }
1271                }
1272            }
1273        },
1274        Language::Go => {
1275            // Go uses embedding for "inheritance"
1276            if node.kind() == "type_declaration" {
1277                for child in node.children(&mut node.walk()) {
1278                    if child.kind() == "type_spec" {
1279                        for spec_child in child.children(&mut child.walk()) {
1280                            if spec_child.kind() == "struct_type" {
1281                                for field in spec_child.children(&mut spec_child.walk()) {
1282                                    if field.kind() == "field_declaration" {
1283                                        // Embedded field (no name, just type)
1284                                        let has_name = field.child_by_field_name("name").is_some();
1285                                        if !has_name {
1286                                            if let Some(type_node) =
1287                                                field.child_by_field_name("type")
1288                                            {
1289                                                if let Ok(name) =
1290                                                    type_node.utf8_text(source_code.as_bytes())
1291                                                {
1292                                                    implements.push(name.to_owned());
1293                                                }
1294                                            }
1295                                        }
1296                                    }
1297                                }
1298                            }
1299                        }
1300                    }
1301                }
1302            }
1303        },
1304        Language::Java => {
1305            // Java: class Foo extends Bar implements Baz, Qux
1306            if node.kind() == "class_declaration" {
1307                for child in node.children(&mut node.walk()) {
1308                    if child.kind() == "superclass" {
1309                        for type_node in child.children(&mut child.walk()) {
1310                            if type_node.kind() == "type_identifier" {
1311                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1312                                    extends = Some(name.to_owned());
1313                                }
1314                            }
1315                        }
1316                    } else if child.kind() == "super_interfaces" {
1317                        for type_list in child.children(&mut child.walk()) {
1318                            if type_list.kind() == "type_list" {
1319                                for type_node in type_list.children(&mut type_list.walk()) {
1320                                    if type_node.kind() == "type_identifier" {
1321                                        if let Ok(name) =
1322                                            type_node.utf8_text(source_code.as_bytes())
1323                                        {
1324                                            implements.push(name.to_owned());
1325                                        }
1326                                    }
1327                                }
1328                            }
1329                        }
1330                    }
1331                }
1332            }
1333        },
1334        Language::C | Language::Cpp => {
1335            // C++: class Foo : public Bar, public Baz
1336            if node.kind() == "class_specifier" || node.kind() == "struct_specifier" {
1337                for child in node.children(&mut node.walk()) {
1338                    if child.kind() == "base_class_clause" {
1339                        for base in child.children(&mut child.walk()) {
1340                            if base.kind() == "type_identifier" {
1341                                if let Ok(name) = base.utf8_text(source_code.as_bytes()) {
1342                                    if extends.is_none() {
1343                                        extends = Some(name.to_owned());
1344                                    } else {
1345                                        implements.push(name.to_owned());
1346                                    }
1347                                }
1348                            }
1349                        }
1350                    }
1351                }
1352            }
1353        },
1354        Language::CSharp => {
1355            // C#: class Foo : Bar, IBaz
1356            if node.kind() == "class_declaration" {
1357                for child in node.children(&mut node.walk()) {
1358                    if child.kind() == "base_list" {
1359                        for base in child.children(&mut child.walk()) {
1360                            if base.kind() == "identifier" || base.kind() == "generic_name" {
1361                                if let Ok(name) = base.utf8_text(source_code.as_bytes()) {
1362                                    if name.starts_with('I') && name.len() > 1 {
1363                                        // Convention: interfaces start with I
1364                                        implements.push(name.to_owned());
1365                                    } else if extends.is_none() {
1366                                        extends = Some(name.to_owned());
1367                                    } else {
1368                                        implements.push(name.to_owned());
1369                                    }
1370                                }
1371                            }
1372                        }
1373                    }
1374                }
1375            }
1376        },
1377        Language::Ruby => {
1378            // Ruby: class Foo < Bar; include Baz
1379            if node.kind() == "class" {
1380                for child in node.children(&mut node.walk()) {
1381                    if child.kind() == "superclass" {
1382                        for type_node in child.children(&mut child.walk()) {
1383                            if type_node.kind() == "constant" {
1384                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1385                                    extends = Some(name.to_owned());
1386                                }
1387                            }
1388                        }
1389                    }
1390                }
1391            }
1392        },
1393        Language::Php => {
1394            // PHP: class Foo extends Bar implements Baz
1395            if node.kind() == "class_declaration" {
1396                for child in node.children(&mut node.walk()) {
1397                    if child.kind() == "base_clause" {
1398                        for type_node in child.children(&mut child.walk()) {
1399                            if type_node.kind() == "name" {
1400                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1401                                    extends = Some(name.to_owned());
1402                                }
1403                            }
1404                        }
1405                    } else if child.kind() == "class_interface_clause" {
1406                        for type_node in child.children(&mut child.walk()) {
1407                            if type_node.kind() == "name" {
1408                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1409                                    implements.push(name.to_owned());
1410                                }
1411                            }
1412                        }
1413                    }
1414                }
1415            }
1416        },
1417        Language::Kotlin => {
1418            // Kotlin: class Foo : Bar(), Baz
1419            if node.kind() == "class_declaration" {
1420                for child in node.children(&mut node.walk()) {
1421                    if child.kind() == "delegation_specifiers" {
1422                        for spec in child.children(&mut child.walk()) {
1423                            if spec.kind() == "delegation_specifier" {
1424                                for type_node in spec.children(&mut spec.walk()) {
1425                                    if type_node.kind() == "user_type" {
1426                                        if let Ok(name) =
1427                                            type_node.utf8_text(source_code.as_bytes())
1428                                        {
1429                                            if extends.is_none() {
1430                                                extends = Some(name.to_owned());
1431                                            } else {
1432                                                implements.push(name.to_owned());
1433                                            }
1434                                        }
1435                                    }
1436                                }
1437                            }
1438                        }
1439                    }
1440                }
1441            }
1442        },
1443        Language::Swift => {
1444            // Swift: class Foo: Bar, Protocol
1445            if node.kind() == "class_declaration" {
1446                for child in node.children(&mut node.walk()) {
1447                    if child.kind() == "type_inheritance_clause" {
1448                        for type_node in child.children(&mut child.walk()) {
1449                            if type_node.kind() == "type_identifier" {
1450                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1451                                    if extends.is_none() {
1452                                        extends = Some(name.to_owned());
1453                                    } else {
1454                                        implements.push(name.to_owned());
1455                                    }
1456                                }
1457                            }
1458                        }
1459                    }
1460                }
1461            }
1462        },
1463        Language::Scala => {
1464            // Scala: class Foo extends Bar with Baz
1465            if node.kind() == "class_definition" {
1466                for child in node.children(&mut node.walk()) {
1467                    if child.kind() == "extends_clause" {
1468                        for type_node in child.children(&mut child.walk()) {
1469                            if type_node.kind() == "type_identifier" {
1470                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1471                                    if extends.is_none() {
1472                                        extends = Some(name.to_owned());
1473                                    } else {
1474                                        implements.push(name.to_owned());
1475                                    }
1476                                }
1477                            }
1478                        }
1479                    }
1480                }
1481            }
1482        },
1483        Language::Bash
1484        | Language::Haskell
1485        | Language::Elixir
1486        | Language::Clojure
1487        | Language::OCaml
1488        | Language::FSharp
1489        | Language::Lua
1490        | Language::R => {},
1491    }
1492
1493    (extends, implements)
1494}
1495
1496/// Map capture name to SymbolKind
1497pub fn map_symbol_kind(capture_name: &str) -> SymbolKind {
1498    match capture_name {
1499        "function" => SymbolKind::Function,
1500        "class" => SymbolKind::Class,
1501        "method" => SymbolKind::Method,
1502        "struct" => SymbolKind::Struct,
1503        "enum" => SymbolKind::Enum,
1504        "interface" => SymbolKind::Interface,
1505        "trait" => SymbolKind::Trait,
1506        _ => SymbolKind::Function,
1507    }
1508}