infiniloom_engine/parser/
extraction.rs

1//! Symbol extraction utilities for parsing
2//!
3//! This module contains standalone functions for extracting metadata from AST nodes:
4//! - Signatures
5//! - Docstrings
6//! - Visibility modifiers
7//! - Function calls
8//! - Inheritance relationships
9
10use super::language::Language;
11use crate::types::{SymbolKind, Visibility};
12use std::collections::HashSet;
13use tree_sitter::Node;
14
15/// Extract function/method signature
16pub fn extract_signature(node: Node<'_>, source_code: &str, language: Language) -> Option<String> {
17    let sig_node = match language {
18        Language::Python => {
19            if node.kind() == "function_definition" {
20                let start = node.start_byte();
21                let mut end = start;
22                for byte in &source_code.as_bytes()[start..] {
23                    end += 1;
24                    if *byte == b':' || *byte == b'\n' {
25                        break;
26                    }
27                }
28                return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
29            }
30            None
31        },
32        Language::JavaScript | Language::TypeScript => {
33            if node.kind().contains("function") || node.kind().contains("method") {
34                let start = node.start_byte();
35                let mut end = start;
36                let mut brace_count = 0;
37                for byte in &source_code.as_bytes()[start..] {
38                    if *byte == b'{' {
39                        brace_count += 1;
40                        if brace_count == 1 {
41                            break;
42                        }
43                    }
44                    end += 1;
45                }
46                return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
47            }
48            None
49        },
50        Language::Rust => {
51            if node.kind() == "function_item" {
52                for child in node.children(&mut node.walk()) {
53                    if child.kind() == "block" {
54                        let start = node.start_byte();
55                        let end = child.start_byte();
56                        return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
57                    }
58                }
59            }
60            None
61        },
62        Language::Go => {
63            if node.kind() == "function_declaration" || node.kind() == "method_declaration" {
64                for child in node.children(&mut node.walk()) {
65                    if child.kind() == "block" {
66                        let start = node.start_byte();
67                        let end = child.start_byte();
68                        return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
69                    }
70                }
71            }
72            None
73        },
74        Language::Java => {
75            if node.kind() == "method_declaration" {
76                for child in node.children(&mut node.walk()) {
77                    if child.kind() == "block" {
78                        let start = node.start_byte();
79                        let end = child.start_byte();
80                        return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
81                    }
82                }
83            }
84            None
85        },
86        Language::C
87        | Language::Cpp
88        | Language::CSharp
89        | Language::Php
90        | Language::Kotlin
91        | Language::Swift
92        | Language::Scala => {
93            for child in node.children(&mut node.walk()) {
94                if child.kind() == "block"
95                    || child.kind() == "compound_statement"
96                    || child.kind() == "function_body"
97                {
98                    let start = node.start_byte();
99                    let end = child.start_byte();
100                    return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
101                }
102            }
103            None
104        },
105        Language::Ruby | Language::Lua => {
106            let start = node.start_byte();
107            let mut end = start;
108            for byte in &source_code.as_bytes()[start..] {
109                end += 1;
110                if *byte == b'\n' {
111                    break;
112                }
113            }
114            Some(source_code[start..end].trim().to_owned())
115        },
116        Language::Bash => {
117            let start = node.start_byte();
118            let mut end = start;
119            for byte in &source_code.as_bytes()[start..] {
120                if *byte == b'{' {
121                    break;
122                }
123                end += 1;
124            }
125            Some(source_code[start..end].trim().to_owned())
126        },
127        Language::Haskell
128        | Language::OCaml
129        | Language::FSharp
130        | Language::Elixir
131        | Language::Clojure
132        | Language::R => {
133            let start = node.start_byte();
134            let mut end = start;
135            for byte in &source_code.as_bytes()[start..] {
136                end += 1;
137                if *byte == b'\n' || *byte == b'=' {
138                    break;
139                }
140            }
141            Some(source_code[start..end].trim().to_owned())
142        },
143    };
144
145    sig_node.or_else(|| {
146        let start = node.start_byte();
147        let end = std::cmp::min(start + 200, source_code.len());
148        let text = &source_code[start..end];
149        text.lines().next().map(|s| s.trim().to_owned())
150    })
151}
152
153/// Extract docstring/documentation comment
154pub fn extract_docstring(node: Node<'_>, source_code: &str, language: Language) -> Option<String> {
155    match language {
156        Language::Python => {
157            let mut cursor = node.walk();
158            for child in node.children(&mut cursor) {
159                if child.kind() == "block" {
160                    for stmt in child.children(&mut child.walk()) {
161                        if stmt.kind() == "expression_statement" {
162                            for expr in stmt.children(&mut stmt.walk()) {
163                                if expr.kind() == "string" {
164                                    if let Ok(text) = expr.utf8_text(source_code.as_bytes()) {
165                                        return Some(
166                                            text.trim_matches(|c| c == '"' || c == '\'')
167                                                .trim()
168                                                .to_owned(),
169                                        );
170                                    }
171                                }
172                            }
173                        }
174                    }
175                }
176            }
177            None
178        },
179        Language::JavaScript | Language::TypeScript => {
180            if let Some(prev_sibling) = node.prev_sibling() {
181                if prev_sibling.kind() == "comment" {
182                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
183                        if text.starts_with("/**") {
184                            return Some(clean_jsdoc(text));
185                        }
186                    }
187                }
188            }
189            None
190        },
191        Language::Rust => {
192            let start_byte = node.start_byte();
193            let lines_before: Vec<_> = source_code[..start_byte]
194                .lines()
195                .rev()
196                .take_while(|line| line.trim().starts_with("///") || line.trim().is_empty())
197                .collect();
198
199            if !lines_before.is_empty() {
200                let doc: Vec<String> = lines_before
201                    .into_iter()
202                    .rev()
203                    .filter_map(|line| {
204                        let trimmed = line.trim();
205                        trimmed.strip_prefix("///").map(|s| s.trim().to_owned())
206                    })
207                    .collect();
208
209                if !doc.is_empty() {
210                    return Some(doc.join(" "));
211                }
212            }
213            None
214        },
215        Language::Go => {
216            if let Some(prev_sibling) = node.prev_sibling() {
217                if prev_sibling.kind() == "comment" {
218                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
219                        return Some(text.trim_start_matches("//").trim().to_owned());
220                    }
221                }
222            }
223            None
224        },
225        Language::Java => {
226            if let Some(prev_sibling) = node.prev_sibling() {
227                if prev_sibling.kind() == "block_comment" {
228                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
229                        if text.starts_with("/**") {
230                            return Some(clean_javadoc(text));
231                        }
232                    }
233                }
234            }
235            None
236        },
237        Language::C | Language::Cpp => {
238            if let Some(prev_sibling) = node.prev_sibling() {
239                if prev_sibling.kind() == "comment" {
240                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
241                        if text.starts_with("/**") || text.starts_with("/*") {
242                            return Some(clean_jsdoc(text));
243                        }
244                        return Some(text.trim_start_matches("//").trim().to_owned());
245                    }
246                }
247            }
248            None
249        },
250        Language::CSharp => {
251            let start_byte = node.start_byte();
252            let lines_before: Vec<_> = source_code[..start_byte]
253                .lines()
254                .rev()
255                .take_while(|line| line.trim().starts_with("///") || line.trim().is_empty())
256                .collect();
257
258            if !lines_before.is_empty() {
259                let doc: Vec<String> = lines_before
260                    .into_iter()
261                    .rev()
262                    .filter_map(|line| {
263                        let trimmed = line.trim();
264                        trimmed.strip_prefix("///").map(|s| s.trim().to_owned())
265                    })
266                    .collect();
267
268                if !doc.is_empty() {
269                    return Some(doc.join(" "));
270                }
271            }
272            None
273        },
274        Language::Ruby => {
275            if let Some(prev_sibling) = node.prev_sibling() {
276                if prev_sibling.kind() == "comment" {
277                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
278                        return Some(text.trim_start_matches('#').trim().to_owned());
279                    }
280                }
281            }
282            None
283        },
284        Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
285            if let Some(prev_sibling) = node.prev_sibling() {
286                let kind = prev_sibling.kind();
287                if kind == "comment" || kind == "multiline_comment" || kind == "block_comment" {
288                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
289                        if text.starts_with("/**") {
290                            return Some(clean_jsdoc(text));
291                        }
292                    }
293                }
294            }
295            None
296        },
297        Language::Bash => {
298            if let Some(prev_sibling) = node.prev_sibling() {
299                if prev_sibling.kind() == "comment" {
300                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
301                        return Some(text.trim_start_matches('#').trim().to_owned());
302                    }
303                }
304            }
305            None
306        },
307        Language::Haskell => {
308            if let Some(prev_sibling) = node.prev_sibling() {
309                if prev_sibling.kind() == "comment" {
310                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
311                        let cleaned = text
312                            .trim_start_matches("{-")
313                            .trim_end_matches("-}")
314                            .trim_start_matches("--")
315                            .trim();
316                        return Some(cleaned.to_owned());
317                    }
318                }
319            }
320            None
321        },
322        Language::Elixir => {
323            if let Some(prev_sibling) = node.prev_sibling() {
324                if prev_sibling.kind() == "comment" {
325                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
326                        return Some(text.trim_start_matches('#').trim().to_owned());
327                    }
328                }
329            }
330            None
331        },
332        Language::Clojure => None,
333        Language::OCaml | Language::FSharp => {
334            if let Some(prev_sibling) = node.prev_sibling() {
335                if prev_sibling.kind() == "comment" {
336                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
337                        let cleaned = text
338                            .trim_start_matches("(**")
339                            .trim_start_matches("(*")
340                            .trim_end_matches("*)")
341                            .trim();
342                        return Some(cleaned.to_owned());
343                    }
344                }
345            }
346            None
347        },
348        Language::Lua => {
349            if let Some(prev_sibling) = node.prev_sibling() {
350                if prev_sibling.kind() == "comment" {
351                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
352                        let cleaned = text
353                            .trim_start_matches("--[[")
354                            .trim_end_matches("]]")
355                            .trim_start_matches("--")
356                            .trim();
357                        return Some(cleaned.to_owned());
358                    }
359                }
360            }
361            None
362        },
363        Language::R => {
364            if let Some(prev_sibling) = node.prev_sibling() {
365                if prev_sibling.kind() == "comment" {
366                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
367                        return Some(text.trim_start_matches('#').trim().to_owned());
368                    }
369                }
370            }
371            None
372        },
373    }
374}
375
376/// Extract parent class/struct name for methods
377pub fn extract_parent(node: Node<'_>, source_code: &str) -> Option<String> {
378    let mut current = node.parent()?;
379
380    while let Some(parent) = current.parent() {
381        if ["class_definition", "class_declaration", "struct_item", "impl_item"]
382            .contains(&parent.kind())
383        {
384            for child in parent.children(&mut parent.walk()) {
385                if child.kind() == "identifier" || child.kind() == "type_identifier" {
386                    if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
387                        return Some(name.to_owned());
388                    }
389                }
390            }
391        }
392        current = parent;
393    }
394
395    None
396}
397
398/// Extract visibility modifier from a node
399pub fn extract_visibility(node: Node<'_>, source_code: &str, language: Language) -> Visibility {
400    match language {
401        Language::Python => {
402            if let Some(name_node) = node.child_by_field_name("name") {
403                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
404                    if name.starts_with("__") && !name.ends_with("__") {
405                        return Visibility::Private;
406                    } else if name.starts_with('_') {
407                        return Visibility::Protected;
408                    }
409                }
410            }
411            Visibility::Public
412        },
413        Language::Rust => {
414            for child in node.children(&mut node.walk()) {
415                if child.kind() == "visibility_modifier" {
416                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
417                        if text.contains("pub(crate)") || text.contains("pub(super)") {
418                            return Visibility::Internal;
419                        } else if text.starts_with("pub") {
420                            return Visibility::Public;
421                        }
422                    }
423                }
424            }
425            Visibility::Private
426        },
427        Language::JavaScript | Language::TypeScript => {
428            for child in node.children(&mut node.walk()) {
429                let kind = child.kind();
430                if kind == "private" || kind == "accessibility_modifier" {
431                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
432                        return match text {
433                            "private" => Visibility::Private,
434                            "protected" => Visibility::Protected,
435                            _ => Visibility::Public,
436                        };
437                    }
438                }
439            }
440            if let Some(name_node) = node.child_by_field_name("name") {
441                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
442                    if name.starts_with('#') {
443                        return Visibility::Private;
444                    }
445                }
446            }
447            Visibility::Public
448        },
449        Language::Go => {
450            if let Some(name_node) = node.child_by_field_name("name") {
451                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
452                    if let Some(first_char) = name.chars().next() {
453                        if first_char.is_lowercase() {
454                            return Visibility::Private;
455                        }
456                    }
457                }
458            }
459            Visibility::Public
460        },
461        Language::Java => {
462            for child in node.children(&mut node.walk()) {
463                if child.kind() == "modifiers" {
464                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
465                        if text.contains("private") {
466                            return Visibility::Private;
467                        } else if text.contains("protected") {
468                            return Visibility::Protected;
469                        } else if text.contains("public") {
470                            return Visibility::Public;
471                        }
472                    }
473                }
474            }
475            Visibility::Internal
476        },
477        Language::C | Language::Cpp => {
478            for child in node.children(&mut node.walk()) {
479                if child.kind() == "storage_class_specifier" {
480                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
481                        if text == "static" {
482                            return Visibility::Private;
483                        }
484                    }
485                }
486            }
487            Visibility::Public
488        },
489        Language::CSharp | Language::Kotlin | Language::Swift | Language::Scala => {
490            for child in node.children(&mut node.walk()) {
491                let kind = child.kind();
492                if kind == "modifier" || kind == "modifiers" || kind == "visibility_modifier" {
493                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
494                        if text.contains("private") {
495                            return Visibility::Private;
496                        } else if text.contains("protected") {
497                            return Visibility::Protected;
498                        } else if text.contains("internal") {
499                            return Visibility::Internal;
500                        } else if text.contains("public") {
501                            return Visibility::Public;
502                        }
503                    }
504                }
505            }
506            Visibility::Internal
507        },
508        Language::Ruby => {
509            if let Some(name_node) = node.child_by_field_name("name") {
510                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
511                    if name.starts_with('_') {
512                        return Visibility::Private;
513                    }
514                }
515            }
516            Visibility::Public
517        },
518        Language::Php => {
519            for child in node.children(&mut node.walk()) {
520                if child.kind() == "visibility_modifier" {
521                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
522                        return match text {
523                            "private" => Visibility::Private,
524                            "protected" => Visibility::Protected,
525                            "public" => Visibility::Public,
526                            _ => Visibility::Public,
527                        };
528                    }
529                }
530            }
531            Visibility::Public
532        },
533        Language::Bash => Visibility::Public,
534        Language::Haskell
535        | Language::Elixir
536        | Language::Clojure
537        | Language::OCaml
538        | Language::FSharp
539        | Language::Lua
540        | Language::R => Visibility::Public,
541    }
542}
543
544/// Extract function calls from a function/method body
545pub fn extract_calls(node: Node<'_>, source_code: &str, language: Language) -> Vec<String> {
546    let mut calls = HashSet::new();
547
548    let body_node = find_body_node(node, language);
549    if let Some(body) = body_node {
550        collect_calls_recursive(body, source_code, language, &mut calls);
551    }
552
553    if calls.is_empty() {
554        collect_calls_recursive(node, source_code, language, &mut calls);
555    }
556
557    calls.into_iter().collect()
558}
559
560/// Find the body node of a function/method
561pub fn find_body_node(node: Node<'_>, language: Language) -> Option<Node<'_>> {
562    match language {
563        Language::Python => {
564            for child in node.children(&mut node.walk()) {
565                if child.kind() == "block" {
566                    return Some(child);
567                }
568            }
569        },
570        Language::Rust => {
571            for child in node.children(&mut node.walk()) {
572                if child.kind() == "block" {
573                    return Some(child);
574                }
575            }
576        },
577        Language::JavaScript | Language::TypeScript => {
578            for child in node.children(&mut node.walk()) {
579                let kind = child.kind();
580                if kind == "statement_block" {
581                    return Some(child);
582                }
583                if kind == "arrow_function" {
584                    if let Some(body) = find_body_node(child, language) {
585                        return Some(body);
586                    }
587                    return Some(child);
588                }
589            }
590            if node.kind() == "arrow_function" {
591                for child in node.children(&mut node.walk()) {
592                    let kind = child.kind();
593                    if kind != "formal_parameters"
594                        && kind != "identifier"
595                        && kind != "=>"
596                        && kind != "("
597                        && kind != ")"
598                        && kind != ","
599                    {
600                        return Some(child);
601                    }
602                }
603                return Some(node);
604            }
605        },
606        Language::Go => {
607            for child in node.children(&mut node.walk()) {
608                if child.kind() == "block" {
609                    return Some(child);
610                }
611            }
612        },
613        Language::Java => {
614            for child in node.children(&mut node.walk()) {
615                if child.kind() == "block" {
616                    return Some(child);
617                }
618            }
619        },
620        Language::C | Language::Cpp => {
621            for child in node.children(&mut node.walk()) {
622                if child.kind() == "compound_statement" {
623                    return Some(child);
624                }
625            }
626        },
627        Language::CSharp
628        | Language::Php
629        | Language::Kotlin
630        | Language::Swift
631        | Language::Scala => {
632            for child in node.children(&mut node.walk()) {
633                let kind = child.kind();
634                if kind == "block" || kind == "compound_statement" || kind == "function_body" {
635                    return Some(child);
636                }
637            }
638        },
639        Language::Ruby => {
640            for child in node.children(&mut node.walk()) {
641                if child.kind() == "body_statement" || child.kind() == "do_block" {
642                    return Some(child);
643                }
644            }
645        },
646        Language::Bash => {
647            for child in node.children(&mut node.walk()) {
648                if child.kind() == "compound_statement" {
649                    return Some(child);
650                }
651            }
652        },
653        Language::Haskell
654        | Language::Elixir
655        | Language::Clojure
656        | Language::OCaml
657        | Language::FSharp
658        | Language::R => {
659            return Some(node);
660        },
661        Language::Lua => {
662            for child in node.children(&mut node.walk()) {
663                if child.kind() == "block" {
664                    return Some(child);
665                }
666            }
667        },
668    }
669    None
670}
671
672/// Recursively collect function calls from a node
673pub fn collect_calls_recursive(
674    node: Node<'_>,
675    source_code: &str,
676    language: Language,
677    calls: &mut HashSet<String>,
678) {
679    let kind = node.kind();
680
681    let call_name = match language {
682        Language::Python => {
683            if kind == "call" {
684                node.child_by_field_name("function").and_then(|f| {
685                    if f.kind() == "identifier" {
686                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
687                    } else if f.kind() == "attribute" {
688                        f.child_by_field_name("attribute")
689                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
690                            .map(String::from)
691                    } else {
692                        None
693                    }
694                })
695            } else {
696                None
697            }
698        },
699        Language::Rust => {
700            if kind == "call_expression" {
701                node.child_by_field_name("function").and_then(|f| {
702                    if f.kind() == "identifier" {
703                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
704                    } else if f.kind() == "field_expression" {
705                        f.child_by_field_name("field")
706                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
707                            .map(String::from)
708                    } else if f.kind() == "scoped_identifier" {
709                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
710                    } else {
711                        None
712                    }
713                })
714            } else if kind == "macro_invocation" {
715                node.child_by_field_name("macro")
716                    .and_then(|m| m.utf8_text(source_code.as_bytes()).ok())
717                    .map(|s| format!("{}!", s))
718            } else {
719                None
720            }
721        },
722        Language::JavaScript | Language::TypeScript => {
723            if kind == "call_expression" {
724                node.child_by_field_name("function").and_then(|f| {
725                    if f.kind() == "identifier" {
726                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
727                    } else if f.kind() == "member_expression" {
728                        f.child_by_field_name("property")
729                            .and_then(|p| p.utf8_text(source_code.as_bytes()).ok())
730                            .map(String::from)
731                    } else {
732                        None
733                    }
734                })
735            } else {
736                None
737            }
738        },
739        Language::Go => {
740            if kind == "call_expression" {
741                node.child_by_field_name("function").and_then(|f| {
742                    if f.kind() == "identifier" {
743                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
744                    } else if f.kind() == "selector_expression" {
745                        f.child_by_field_name("field")
746                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
747                            .map(String::from)
748                    } else {
749                        None
750                    }
751                })
752            } else {
753                None
754            }
755        },
756        Language::Java => {
757            if kind == "method_invocation" {
758                node.child_by_field_name("name")
759                    .and_then(|n| n.utf8_text(source_code.as_bytes()).ok())
760                    .map(String::from)
761            } else {
762                None
763            }
764        },
765        Language::C | Language::Cpp => {
766            if kind == "call_expression" {
767                node.child_by_field_name("function").and_then(|f| {
768                    if f.kind() == "identifier" {
769                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
770                    } else if f.kind() == "field_expression" {
771                        f.child_by_field_name("field")
772                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
773                            .map(String::from)
774                    } else {
775                        None
776                    }
777                })
778            } else {
779                None
780            }
781        },
782        Language::CSharp
783        | Language::Php
784        | Language::Kotlin
785        | Language::Swift
786        | Language::Scala => {
787            if kind == "invocation_expression" || kind == "call_expression" {
788                node.children(&mut node.walk())
789                    .find(|child| {
790                        child.kind() == "identifier" || child.kind() == "simple_name"
791                    })
792                    .and_then(|child| child.utf8_text(source_code.as_bytes()).ok())
793                    .map(|s| s.to_owned())
794            } else {
795                None
796            }
797        },
798        Language::Ruby => {
799            if kind == "call" || kind == "method_call" {
800                node.child_by_field_name("method")
801                    .and_then(|m| m.utf8_text(source_code.as_bytes()).ok())
802                    .map(String::from)
803            } else {
804                None
805            }
806        },
807        Language::Bash => {
808            if kind == "command" {
809                node.child_by_field_name("name")
810                    .and_then(|n| n.utf8_text(source_code.as_bytes()).ok())
811                    .map(String::from)
812            } else {
813                None
814            }
815        },
816        Language::Haskell
817        | Language::Elixir
818        | Language::Clojure
819        | Language::OCaml
820        | Language::FSharp
821        | Language::Lua
822        | Language::R => {
823            if kind == "function_call" || kind == "call" || kind == "application" {
824                node.children(&mut node.walk())
825                    .find(|child| {
826                        child.kind() == "identifier" || child.kind() == "variable"
827                    })
828                    .and_then(|child| child.utf8_text(source_code.as_bytes()).ok())
829                    .map(|s| s.to_owned())
830            } else {
831                None
832            }
833        },
834    };
835
836    if let Some(name) = call_name {
837        if !is_builtin(&name, language) {
838            calls.insert(name);
839        }
840    }
841
842    for child in node.children(&mut node.walk()) {
843        collect_calls_recursive(child, source_code, language, calls);
844    }
845}
846
847/// Check if a function name is a common built-in
848pub fn is_builtin(name: &str, language: Language) -> bool {
849    match language {
850        Language::Python => {
851            matches!(
852                name,
853                "print"
854                    | "len"
855                    | "range"
856                    | "str"
857                    | "int"
858                    | "float"
859                    | "list"
860                    | "dict"
861                    | "set"
862                    | "tuple"
863                    | "bool"
864                    | "type"
865                    | "isinstance"
866                    | "hasattr"
867                    | "getattr"
868                    | "setattr"
869                    | "super"
870                    | "iter"
871                    | "next"
872                    | "open"
873                    | "input"
874                    | "format"
875                    | "enumerate"
876                    | "zip"
877                    | "map"
878                    | "filter"
879                    | "sorted"
880                    | "reversed"
881                    | "sum"
882                    | "min"
883                    | "max"
884                    | "abs"
885                    | "round"
886                    | "ord"
887                    | "chr"
888                    | "hex"
889                    | "bin"
890                    | "oct"
891            )
892        },
893        Language::JavaScript | Language::TypeScript => {
894            matches!(
895                name,
896                "console"
897                    | "log"
898                    | "error"
899                    | "warn"
900                    | "parseInt"
901                    | "parseFloat"
902                    | "setTimeout"
903                    | "setInterval"
904                    | "clearTimeout"
905                    | "clearInterval"
906                    | "JSON"
907                    | "stringify"
908                    | "parse"
909                    | "toString"
910                    | "valueOf"
911                    | "push"
912                    | "pop"
913                    | "shift"
914                    | "unshift"
915                    | "slice"
916                    | "splice"
917                    | "map"
918                    | "filter"
919                    | "reduce"
920                    | "forEach"
921                    | "find"
922                    | "findIndex"
923                    | "includes"
924                    | "indexOf"
925                    | "join"
926                    | "split"
927                    | "replace"
928            )
929        },
930        Language::Rust => {
931            matches!(
932                name,
933                "println!"
934                    | "print!"
935                    | "eprintln!"
936                    | "eprint!"
937                    | "format!"
938                    | "vec!"
939                    | "panic!"
940                    | "assert!"
941                    | "assert_eq!"
942                    | "assert_ne!"
943                    | "debug!"
944                    | "info!"
945                    | "warn!"
946                    | "error!"
947                    | "trace!"
948                    | "unwrap"
949                    | "expect"
950                    | "ok"
951                    | "err"
952                    | "some"
953                    | "none"
954                    | "clone"
955                    | "to_string"
956                    | "into"
957                    | "from"
958                    | "default"
959                    | "iter"
960                    | "into_iter"
961                    | "collect"
962                    | "map"
963                    | "filter"
964            )
965        },
966        Language::Go => {
967            matches!(
968                name,
969                "fmt"
970                    | "Println"
971                    | "Printf"
972                    | "Sprintf"
973                    | "Errorf"
974                    | "make"
975                    | "new"
976                    | "len"
977                    | "cap"
978                    | "append"
979                    | "copy"
980                    | "delete"
981                    | "close"
982                    | "panic"
983                    | "recover"
984                    | "print"
985            )
986        },
987        Language::Java => {
988            matches!(
989                name,
990                "println"
991                    | "print"
992                    | "printf"
993                    | "toString"
994                    | "equals"
995                    | "hashCode"
996                    | "getClass"
997                    | "clone"
998                    | "notify"
999                    | "wait"
1000                    | "get"
1001                    | "set"
1002                    | "add"
1003                    | "remove"
1004                    | "size"
1005                    | "isEmpty"
1006                    | "contains"
1007                    | "iterator"
1008                    | "valueOf"
1009                    | "parseInt"
1010            )
1011        },
1012        Language::C | Language::Cpp => {
1013            matches!(
1014                name,
1015                "printf"
1016                    | "scanf"
1017                    | "malloc"
1018                    | "free"
1019                    | "memcpy"
1020                    | "memset"
1021                    | "strlen"
1022                    | "strcpy"
1023                    | "strcmp"
1024                    | "strcat"
1025                    | "sizeof"
1026                    | "cout"
1027                    | "cin"
1028                    | "endl"
1029                    | "cerr"
1030                    | "clog"
1031            )
1032        },
1033        Language::CSharp => {
1034            matches!(
1035                name,
1036                "WriteLine"
1037                    | "Write"
1038                    | "ReadLine"
1039                    | "ToString"
1040                    | "Equals"
1041                    | "GetHashCode"
1042                    | "GetType"
1043                    | "Add"
1044                    | "Remove"
1045                    | "Contains"
1046                    | "Count"
1047                    | "Clear"
1048                    | "ToList"
1049                    | "ToArray"
1050            )
1051        },
1052        Language::Ruby => {
1053            matches!(
1054                name,
1055                "puts"
1056                    | "print"
1057                    | "p"
1058                    | "gets"
1059                    | "each"
1060                    | "map"
1061                    | "select"
1062                    | "reject"
1063                    | "reduce"
1064                    | "inject"
1065                    | "find"
1066                    | "any?"
1067                    | "all?"
1068                    | "include?"
1069                    | "empty?"
1070                    | "nil?"
1071                    | "length"
1072                    | "size"
1073            )
1074        },
1075        Language::Php => {
1076            matches!(
1077                name,
1078                "echo"
1079                    | "print"
1080                    | "var_dump"
1081                    | "print_r"
1082                    | "isset"
1083                    | "empty"
1084                    | "array"
1085                    | "count"
1086                    | "strlen"
1087                    | "strpos"
1088                    | "substr"
1089                    | "explode"
1090                    | "implode"
1091                    | "json_encode"
1092                    | "json_decode"
1093            )
1094        },
1095        Language::Kotlin => {
1096            matches!(
1097                name,
1098                "println"
1099                    | "print"
1100                    | "readLine"
1101                    | "toString"
1102                    | "equals"
1103                    | "hashCode"
1104                    | "map"
1105                    | "filter"
1106                    | "forEach"
1107                    | "let"
1108                    | "also"
1109                    | "apply"
1110                    | "run"
1111                    | "with"
1112                    | "listOf"
1113                    | "mapOf"
1114                    | "setOf"
1115            )
1116        },
1117        Language::Swift => {
1118            matches!(
1119                name,
1120                "print"
1121                    | "debugPrint"
1122                    | "dump"
1123                    | "map"
1124                    | "filter"
1125                    | "reduce"
1126                    | "forEach"
1127                    | "contains"
1128                    | "count"
1129                    | "isEmpty"
1130                    | "append"
1131            )
1132        },
1133        Language::Scala => {
1134            matches!(
1135                name,
1136                "println"
1137                    | "print"
1138                    | "map"
1139                    | "filter"
1140                    | "flatMap"
1141                    | "foreach"
1142                    | "reduce"
1143                    | "fold"
1144                    | "foldLeft"
1145                    | "foldRight"
1146                    | "collect"
1147            )
1148        },
1149        Language::Bash
1150        | Language::Haskell
1151        | Language::Elixir
1152        | Language::Clojure
1153        | Language::OCaml
1154        | Language::FSharp
1155        | Language::Lua
1156        | Language::R => false,
1157    }
1158}
1159
1160/// Clean JSDoc comment
1161pub fn clean_jsdoc(text: &str) -> String {
1162    text.lines()
1163        .map(|line| {
1164            line.trim()
1165                .trim_start_matches("/**")
1166                .trim_start_matches("/*")
1167                .trim_start_matches('*')
1168                .trim_end_matches("*/")
1169                .trim()
1170        })
1171        .filter(|line| !line.is_empty())
1172        .collect::<Vec<_>>()
1173        .join(" ")
1174}
1175
1176/// Clean JavaDoc comment
1177pub fn clean_javadoc(text: &str) -> String {
1178    clean_jsdoc(text)
1179}
1180
1181/// Extract class inheritance (extends) and interface implementations (implements)
1182pub fn extract_inheritance(
1183    node: Node<'_>,
1184    source_code: &str,
1185    language: Language,
1186) -> (Option<String>, Vec<String>) {
1187    let mut extends = None;
1188    let mut implements = Vec::new();
1189
1190    match language {
1191        Language::Python => {
1192            // Python: class Foo(Bar, Baz): - all are considered base classes
1193            if node.kind() == "class_definition" {
1194                if let Some(args) = node.child_by_field_name("superclasses") {
1195                    for child in args.children(&mut args.walk()) {
1196                        if child.kind() == "identifier" || child.kind() == "attribute" {
1197                            if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
1198                                if extends.is_none() {
1199                                    extends = Some(name.to_owned());
1200                                } else {
1201                                    implements.push(name.to_owned());
1202                                }
1203                            }
1204                        }
1205                    }
1206                }
1207            }
1208        },
1209        Language::JavaScript | Language::TypeScript => {
1210            // JS/TS: class Foo extends Bar implements Baz
1211            if node.kind() == "class_declaration" || node.kind() == "class" {
1212                for child in node.children(&mut node.walk()) {
1213                    if child.kind() == "class_heritage" {
1214                        for heritage in child.children(&mut child.walk()) {
1215                            if heritage.kind() == "extends_clause" {
1216                                for type_node in heritage.children(&mut heritage.walk()) {
1217                                    if type_node.kind() == "identifier"
1218                                        || type_node.kind() == "type_identifier"
1219                                    {
1220                                        if let Ok(name) = type_node.utf8_text(source_code.as_bytes())
1221                                        {
1222                                            extends = Some(name.to_owned());
1223                                        }
1224                                    }
1225                                }
1226                            } else if heritage.kind() == "implements_clause" {
1227                                for type_node in heritage.children(&mut heritage.walk()) {
1228                                    if type_node.kind() == "identifier"
1229                                        || type_node.kind() == "type_identifier"
1230                                    {
1231                                        if let Ok(name) = type_node.utf8_text(source_code.as_bytes())
1232                                        {
1233                                            implements.push(name.to_owned());
1234                                        }
1235                                    }
1236                                }
1237                            }
1238                        }
1239                    }
1240                }
1241            }
1242        },
1243        Language::Rust => {
1244            // Rust doesn't have class inheritance, but has trait implementations
1245            // impl Trait for Struct
1246            if node.kind() == "impl_item" {
1247                let mut has_for = false;
1248                for child in node.children(&mut node.walk()) {
1249                    if child.kind() == "for" {
1250                        has_for = true;
1251                    }
1252                    if child.kind() == "type_identifier" || child.kind() == "generic_type" {
1253                        if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
1254                            if has_for {
1255                                // This is the struct being implemented
1256                            } else {
1257                                // This is the trait being implemented
1258                                implements.push(name.to_owned());
1259                            }
1260                        }
1261                    }
1262                }
1263            }
1264        },
1265        Language::Go => {
1266            // Go uses embedding for "inheritance"
1267            if node.kind() == "type_declaration" {
1268                for child in node.children(&mut node.walk()) {
1269                    if child.kind() == "type_spec" {
1270                        for spec_child in child.children(&mut child.walk()) {
1271                            if spec_child.kind() == "struct_type" {
1272                                for field in spec_child.children(&mut spec_child.walk()) {
1273                                    if field.kind() == "field_declaration" {
1274                                        // Embedded field (no name, just type)
1275                                        let has_name = field.child_by_field_name("name").is_some();
1276                                        if !has_name {
1277                                            if let Some(type_node) = field.child_by_field_name("type")
1278                                            {
1279                                                if let Ok(name) =
1280                                                    type_node.utf8_text(source_code.as_bytes())
1281                                                {
1282                                                    implements.push(name.to_owned());
1283                                                }
1284                                            }
1285                                        }
1286                                    }
1287                                }
1288                            }
1289                        }
1290                    }
1291                }
1292            }
1293        },
1294        Language::Java => {
1295            // Java: class Foo extends Bar implements Baz, Qux
1296            if node.kind() == "class_declaration" {
1297                for child in node.children(&mut node.walk()) {
1298                    if child.kind() == "superclass" {
1299                        for type_node in child.children(&mut child.walk()) {
1300                            if type_node.kind() == "type_identifier" {
1301                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1302                                    extends = Some(name.to_owned());
1303                                }
1304                            }
1305                        }
1306                    } else if child.kind() == "super_interfaces" {
1307                        for type_list in child.children(&mut child.walk()) {
1308                            if type_list.kind() == "type_list" {
1309                                for type_node in type_list.children(&mut type_list.walk()) {
1310                                    if type_node.kind() == "type_identifier" {
1311                                        if let Ok(name) = type_node.utf8_text(source_code.as_bytes())
1312                                        {
1313                                            implements.push(name.to_owned());
1314                                        }
1315                                    }
1316                                }
1317                            }
1318                        }
1319                    }
1320                }
1321            }
1322        },
1323        Language::C | Language::Cpp => {
1324            // C++: class Foo : public Bar, public Baz
1325            if node.kind() == "class_specifier" || node.kind() == "struct_specifier" {
1326                for child in node.children(&mut node.walk()) {
1327                    if child.kind() == "base_class_clause" {
1328                        for base in child.children(&mut child.walk()) {
1329                            if base.kind() == "type_identifier" {
1330                                if let Ok(name) = base.utf8_text(source_code.as_bytes()) {
1331                                    if extends.is_none() {
1332                                        extends = Some(name.to_owned());
1333                                    } else {
1334                                        implements.push(name.to_owned());
1335                                    }
1336                                }
1337                            }
1338                        }
1339                    }
1340                }
1341            }
1342        },
1343        Language::CSharp => {
1344            // C#: class Foo : Bar, IBaz
1345            if node.kind() == "class_declaration" {
1346                for child in node.children(&mut node.walk()) {
1347                    if child.kind() == "base_list" {
1348                        for base in child.children(&mut child.walk()) {
1349                            if base.kind() == "identifier" || base.kind() == "generic_name" {
1350                                if let Ok(name) = base.utf8_text(source_code.as_bytes()) {
1351                                    if name.starts_with('I') && name.len() > 1 {
1352                                        // Convention: interfaces start with I
1353                                        implements.push(name.to_owned());
1354                                    } else if extends.is_none() {
1355                                        extends = Some(name.to_owned());
1356                                    } else {
1357                                        implements.push(name.to_owned());
1358                                    }
1359                                }
1360                            }
1361                        }
1362                    }
1363                }
1364            }
1365        },
1366        Language::Ruby => {
1367            // Ruby: class Foo < Bar; include Baz
1368            if node.kind() == "class" {
1369                for child in node.children(&mut node.walk()) {
1370                    if child.kind() == "superclass" {
1371                        for type_node in child.children(&mut child.walk()) {
1372                            if type_node.kind() == "constant" {
1373                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1374                                    extends = Some(name.to_owned());
1375                                }
1376                            }
1377                        }
1378                    }
1379                }
1380            }
1381        },
1382        Language::Php => {
1383            // PHP: class Foo extends Bar implements Baz
1384            if node.kind() == "class_declaration" {
1385                for child in node.children(&mut node.walk()) {
1386                    if child.kind() == "base_clause" {
1387                        for type_node in child.children(&mut child.walk()) {
1388                            if type_node.kind() == "name" {
1389                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1390                                    extends = Some(name.to_owned());
1391                                }
1392                            }
1393                        }
1394                    } else if child.kind() == "class_interface_clause" {
1395                        for type_node in child.children(&mut child.walk()) {
1396                            if type_node.kind() == "name" {
1397                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1398                                    implements.push(name.to_owned());
1399                                }
1400                            }
1401                        }
1402                    }
1403                }
1404            }
1405        },
1406        Language::Kotlin => {
1407            // Kotlin: class Foo : Bar(), Baz
1408            if node.kind() == "class_declaration" {
1409                for child in node.children(&mut node.walk()) {
1410                    if child.kind() == "delegation_specifiers" {
1411                        for spec in child.children(&mut child.walk()) {
1412                            if spec.kind() == "delegation_specifier" {
1413                                for type_node in spec.children(&mut spec.walk()) {
1414                                    if type_node.kind() == "user_type" {
1415                                        if let Ok(name) = type_node.utf8_text(source_code.as_bytes())
1416                                        {
1417                                            if extends.is_none() {
1418                                                extends = Some(name.to_owned());
1419                                            } else {
1420                                                implements.push(name.to_owned());
1421                                            }
1422                                        }
1423                                    }
1424                                }
1425                            }
1426                        }
1427                    }
1428                }
1429            }
1430        },
1431        Language::Swift => {
1432            // Swift: class Foo: Bar, Protocol
1433            if node.kind() == "class_declaration" {
1434                for child in node.children(&mut node.walk()) {
1435                    if child.kind() == "type_inheritance_clause" {
1436                        for type_node in child.children(&mut child.walk()) {
1437                            if type_node.kind() == "type_identifier" {
1438                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1439                                    if extends.is_none() {
1440                                        extends = Some(name.to_owned());
1441                                    } else {
1442                                        implements.push(name.to_owned());
1443                                    }
1444                                }
1445                            }
1446                        }
1447                    }
1448                }
1449            }
1450        },
1451        Language::Scala => {
1452            // Scala: class Foo extends Bar with Baz
1453            if node.kind() == "class_definition" {
1454                for child in node.children(&mut node.walk()) {
1455                    if child.kind() == "extends_clause" {
1456                        for type_node in child.children(&mut child.walk()) {
1457                            if type_node.kind() == "type_identifier" {
1458                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1459                                    if extends.is_none() {
1460                                        extends = Some(name.to_owned());
1461                                    } else {
1462                                        implements.push(name.to_owned());
1463                                    }
1464                                }
1465                            }
1466                        }
1467                    }
1468                }
1469            }
1470        },
1471        Language::Bash
1472        | Language::Haskell
1473        | Language::Elixir
1474        | Language::Clojure
1475        | Language::OCaml
1476        | Language::FSharp
1477        | Language::Lua
1478        | Language::R => {},
1479    }
1480
1481    (extends, implements)
1482}
1483
1484/// Map capture name to SymbolKind
1485pub fn map_symbol_kind(capture_name: &str) -> SymbolKind {
1486    match capture_name {
1487        "function" => SymbolKind::Function,
1488        "class" => SymbolKind::Class,
1489        "method" => SymbolKind::Method,
1490        "struct" => SymbolKind::Struct,
1491        "enum" => SymbolKind::Enum,
1492        "interface" => SymbolKind::Interface,
1493        "trait" => SymbolKind::Trait,
1494        _ => SymbolKind::Function,
1495    }
1496}