infiniloom_engine/parser/
extraction.rs

1//! Symbol extraction utilities for parsing
2//!
3//! This module contains standalone functions for extracting metadata from AST nodes:
4//! - Signatures
5//! - Docstrings
6//! - Visibility modifiers
7//! - Function calls
8//! - Inheritance relationships
9
10use super::language::Language;
11use crate::types::{SymbolKind, Visibility};
12use std::collections::HashSet;
13use tree_sitter::Node;
14
15/// Find a safe character boundary at or before the given byte index.
16/// This prevents panics when slicing strings with multi-byte UTF-8 characters.
17fn safe_char_boundary(s: &str, mut index: usize) -> usize {
18    if index >= s.len() {
19        return s.len();
20    }
21    // Walk backwards to find a valid char boundary
22    while index > 0 && !s.is_char_boundary(index) {
23        index -= 1;
24    }
25    index
26}
27
28/// Extract function/method signature
29pub fn extract_signature(node: Node<'_>, source_code: &str, language: Language) -> Option<String> {
30    let sig_node = match language {
31        Language::Python => {
32            if node.kind() == "function_definition" {
33                let start = node.start_byte();
34                let mut end = start;
35                for byte in &source_code.as_bytes()[start..] {
36                    end += 1;
37                    if *byte == b':' || *byte == b'\n' {
38                        break;
39                    }
40                }
41                return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
42            }
43            None
44        },
45        Language::JavaScript | Language::TypeScript => {
46            if node.kind().contains("function") || node.kind().contains("method") {
47                let start = node.start_byte();
48                let mut end = start;
49                let mut brace_count = 0;
50                for byte in &source_code.as_bytes()[start..] {
51                    if *byte == b'{' {
52                        brace_count += 1;
53                        if brace_count == 1 {
54                            break;
55                        }
56                    }
57                    end += 1;
58                }
59                return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
60            }
61            None
62        },
63        Language::Rust => {
64            if node.kind() == "function_item" {
65                for child in node.children(&mut node.walk()) {
66                    if child.kind() == "block" {
67                        let start = node.start_byte();
68                        let end = child.start_byte();
69                        return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
70                    }
71                }
72            }
73            None
74        },
75        Language::Go => {
76            if node.kind() == "function_declaration" || node.kind() == "method_declaration" {
77                for child in node.children(&mut node.walk()) {
78                    if child.kind() == "block" {
79                        let start = node.start_byte();
80                        let end = child.start_byte();
81                        return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
82                    }
83                }
84            }
85            None
86        },
87        Language::Java => {
88            if node.kind() == "method_declaration" {
89                for child in node.children(&mut node.walk()) {
90                    if child.kind() == "block" {
91                        let start = node.start_byte();
92                        let end = child.start_byte();
93                        return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
94                    }
95                }
96            }
97            None
98        },
99        Language::C
100        | Language::Cpp
101        | Language::CSharp
102        | Language::Php
103        | Language::Kotlin
104        | Language::Swift
105        | Language::Scala => {
106            for child in node.children(&mut node.walk()) {
107                if child.kind() == "block"
108                    || child.kind() == "compound_statement"
109                    || child.kind() == "function_body"
110                {
111                    let start = node.start_byte();
112                    let end = child.start_byte();
113                    return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
114                }
115            }
116            None
117        },
118        Language::Ruby | Language::Lua => {
119            let start = node.start_byte();
120            let mut end = start;
121            for byte in &source_code.as_bytes()[start..] {
122                end += 1;
123                if *byte == b'\n' {
124                    break;
125                }
126            }
127            Some(source_code[start..end].trim().to_owned())
128        },
129        Language::Bash => {
130            let start = node.start_byte();
131            let mut end = start;
132            for byte in &source_code.as_bytes()[start..] {
133                if *byte == b'{' {
134                    break;
135                }
136                end += 1;
137            }
138            Some(source_code[start..end].trim().to_owned())
139        },
140        Language::Haskell
141        | Language::OCaml
142        | Language::FSharp
143        | Language::Elixir
144        | Language::Clojure
145        | Language::R => {
146            let start = node.start_byte();
147            let mut end = start;
148            for byte in &source_code.as_bytes()[start..] {
149                end += 1;
150                if *byte == b'\n' || *byte == b'=' {
151                    break;
152                }
153            }
154            Some(source_code[start..end].trim().to_owned())
155        },
156    };
157
158    sig_node.or_else(|| {
159        let start = node.start_byte();
160        let end = std::cmp::min(start + 200, source_code.len());
161        // Ensure we slice at valid UTF-8 character boundaries
162        let safe_start = safe_char_boundary(source_code, start);
163        let safe_end = safe_char_boundary(source_code, end);
164        if safe_start >= safe_end {
165            return None;
166        }
167        let text = &source_code[safe_start..safe_end];
168        text.lines().next().map(|s| s.trim().to_owned())
169    })
170}
171
172/// Extract docstring/documentation comment
173pub fn extract_docstring(node: Node<'_>, source_code: &str, language: Language) -> Option<String> {
174    match language {
175        Language::Python => {
176            let mut cursor = node.walk();
177            for child in node.children(&mut cursor) {
178                if child.kind() == "block" {
179                    for stmt in child.children(&mut child.walk()) {
180                        if stmt.kind() == "expression_statement" {
181                            for expr in stmt.children(&mut stmt.walk()) {
182                                if expr.kind() == "string" {
183                                    if let Ok(text) = expr.utf8_text(source_code.as_bytes()) {
184                                        return Some(
185                                            text.trim_matches(|c| c == '"' || c == '\'')
186                                                .trim()
187                                                .to_owned(),
188                                        );
189                                    }
190                                }
191                            }
192                        }
193                    }
194                }
195            }
196            None
197        },
198        Language::JavaScript | Language::TypeScript => {
199            if let Some(prev_sibling) = node.prev_sibling() {
200                if prev_sibling.kind() == "comment" {
201                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
202                        if text.starts_with("/**") {
203                            return Some(clean_jsdoc(text));
204                        }
205                    }
206                }
207            }
208            None
209        },
210        Language::Rust => {
211            let start_byte = node.start_byte();
212            // SAFETY: Use floor_char_boundary to avoid panics on multi-byte UTF-8 characters
213            let safe_boundary = source_code.floor_char_boundary(start_byte);
214            let lines_before: Vec<_> = source_code[..safe_boundary]
215                .lines()
216                .rev()
217                .take_while(|line| line.trim().starts_with("///") || line.trim().is_empty())
218                .collect();
219
220            if !lines_before.is_empty() {
221                let doc: Vec<String> = lines_before
222                    .into_iter()
223                    .rev()
224                    .filter_map(|line| {
225                        let trimmed = line.trim();
226                        trimmed.strip_prefix("///").map(|s| s.trim().to_owned())
227                    })
228                    .collect();
229
230                if !doc.is_empty() {
231                    return Some(doc.join(" "));
232                }
233            }
234            None
235        },
236        Language::Go => {
237            if let Some(prev_sibling) = node.prev_sibling() {
238                if prev_sibling.kind() == "comment" {
239                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
240                        return Some(text.trim_start_matches("//").trim().to_owned());
241                    }
242                }
243            }
244            None
245        },
246        Language::Java => {
247            if let Some(prev_sibling) = node.prev_sibling() {
248                if prev_sibling.kind() == "block_comment" {
249                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
250                        if text.starts_with("/**") {
251                            return Some(clean_javadoc(text));
252                        }
253                    }
254                }
255            }
256            None
257        },
258        Language::C | Language::Cpp => {
259            if let Some(prev_sibling) = node.prev_sibling() {
260                if prev_sibling.kind() == "comment" {
261                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
262                        if text.starts_with("/**") || text.starts_with("/*") {
263                            return Some(clean_jsdoc(text));
264                        }
265                        return Some(text.trim_start_matches("//").trim().to_owned());
266                    }
267                }
268            }
269            None
270        },
271        Language::CSharp => {
272            let start_byte = node.start_byte();
273            // SAFETY: Use floor_char_boundary to avoid panics on multi-byte UTF-8 characters
274            let safe_boundary = source_code.floor_char_boundary(start_byte);
275            let lines_before: Vec<_> = source_code[..safe_boundary]
276                .lines()
277                .rev()
278                .take_while(|line| line.trim().starts_with("///") || line.trim().is_empty())
279                .collect();
280
281            if !lines_before.is_empty() {
282                let doc: Vec<String> = lines_before
283                    .into_iter()
284                    .rev()
285                    .filter_map(|line| {
286                        let trimmed = line.trim();
287                        trimmed.strip_prefix("///").map(|s| s.trim().to_owned())
288                    })
289                    .collect();
290
291                if !doc.is_empty() {
292                    return Some(doc.join(" "));
293                }
294            }
295            None
296        },
297        Language::Ruby => {
298            if let Some(prev_sibling) = node.prev_sibling() {
299                if prev_sibling.kind() == "comment" {
300                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
301                        return Some(text.trim_start_matches('#').trim().to_owned());
302                    }
303                }
304            }
305            None
306        },
307        Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
308            if let Some(prev_sibling) = node.prev_sibling() {
309                let kind = prev_sibling.kind();
310                if kind == "comment" || kind == "multiline_comment" || kind == "block_comment" {
311                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
312                        if text.starts_with("/**") {
313                            return Some(clean_jsdoc(text));
314                        }
315                    }
316                }
317            }
318            None
319        },
320        Language::Bash => {
321            if let Some(prev_sibling) = node.prev_sibling() {
322                if prev_sibling.kind() == "comment" {
323                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
324                        return Some(text.trim_start_matches('#').trim().to_owned());
325                    }
326                }
327            }
328            None
329        },
330        Language::Haskell => {
331            if let Some(prev_sibling) = node.prev_sibling() {
332                if prev_sibling.kind() == "comment" {
333                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
334                        let cleaned = text
335                            .trim_start_matches("{-")
336                            .trim_end_matches("-}")
337                            .trim_start_matches("--")
338                            .trim();
339                        return Some(cleaned.to_owned());
340                    }
341                }
342            }
343            None
344        },
345        Language::Elixir => {
346            if let Some(prev_sibling) = node.prev_sibling() {
347                if prev_sibling.kind() == "comment" {
348                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
349                        return Some(text.trim_start_matches('#').trim().to_owned());
350                    }
351                }
352            }
353            None
354        },
355        Language::Clojure => None,
356        Language::OCaml | Language::FSharp => {
357            if let Some(prev_sibling) = node.prev_sibling() {
358                if prev_sibling.kind() == "comment" {
359                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
360                        let cleaned = text
361                            .trim_start_matches("(**")
362                            .trim_start_matches("(*")
363                            .trim_end_matches("*)")
364                            .trim();
365                        return Some(cleaned.to_owned());
366                    }
367                }
368            }
369            None
370        },
371        Language::Lua => {
372            if let Some(prev_sibling) = node.prev_sibling() {
373                if prev_sibling.kind() == "comment" {
374                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
375                        let cleaned = text
376                            .trim_start_matches("--[[")
377                            .trim_end_matches("]]")
378                            .trim_start_matches("--")
379                            .trim();
380                        return Some(cleaned.to_owned());
381                    }
382                }
383            }
384            None
385        },
386        Language::R => {
387            if let Some(prev_sibling) = node.prev_sibling() {
388                if prev_sibling.kind() == "comment" {
389                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
390                        return Some(text.trim_start_matches('#').trim().to_owned());
391                    }
392                }
393            }
394            None
395        },
396    }
397}
398
399/// Extract parent class/struct name for methods
400pub fn extract_parent(node: Node<'_>, source_code: &str) -> Option<String> {
401    let mut current = node.parent()?;
402
403    while let Some(parent) = current.parent() {
404        if ["class_definition", "class_declaration", "struct_item", "impl_item"]
405            .contains(&parent.kind())
406        {
407            for child in parent.children(&mut parent.walk()) {
408                if child.kind() == "identifier" || child.kind() == "type_identifier" {
409                    if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
410                        return Some(name.to_owned());
411                    }
412                }
413            }
414        }
415        current = parent;
416    }
417
418    None
419}
420
421/// Extract visibility modifier from a node
422pub fn extract_visibility(node: Node<'_>, source_code: &str, language: Language) -> Visibility {
423    match language {
424        Language::Python => {
425            if let Some(name_node) = node.child_by_field_name("name") {
426                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
427                    if name.starts_with("__") && !name.ends_with("__") {
428                        return Visibility::Private;
429                    } else if name.starts_with('_') {
430                        return Visibility::Protected;
431                    }
432                }
433            }
434            Visibility::Public
435        },
436        Language::Rust => {
437            for child in node.children(&mut node.walk()) {
438                if child.kind() == "visibility_modifier" {
439                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
440                        if text.contains("pub(crate)") || text.contains("pub(super)") {
441                            return Visibility::Internal;
442                        } else if text.starts_with("pub") {
443                            return Visibility::Public;
444                        }
445                    }
446                }
447            }
448            Visibility::Private
449        },
450        Language::JavaScript | Language::TypeScript => {
451            for child in node.children(&mut node.walk()) {
452                let kind = child.kind();
453                if kind == "private" || kind == "accessibility_modifier" {
454                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
455                        return match text {
456                            "private" => Visibility::Private,
457                            "protected" => Visibility::Protected,
458                            _ => Visibility::Public,
459                        };
460                    }
461                }
462            }
463            if let Some(name_node) = node.child_by_field_name("name") {
464                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
465                    if name.starts_with('#') {
466                        return Visibility::Private;
467                    }
468                }
469            }
470            Visibility::Public
471        },
472        Language::Go => {
473            if let Some(name_node) = node.child_by_field_name("name") {
474                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
475                    if let Some(first_char) = name.chars().next() {
476                        if first_char.is_lowercase() {
477                            return Visibility::Private;
478                        }
479                    }
480                }
481            }
482            Visibility::Public
483        },
484        Language::Java => {
485            for child in node.children(&mut node.walk()) {
486                if child.kind() == "modifiers" {
487                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
488                        if text.contains("private") {
489                            return Visibility::Private;
490                        } else if text.contains("protected") {
491                            return Visibility::Protected;
492                        } else if text.contains("public") {
493                            return Visibility::Public;
494                        }
495                    }
496                }
497            }
498            Visibility::Internal
499        },
500        Language::C | Language::Cpp => {
501            for child in node.children(&mut node.walk()) {
502                if child.kind() == "storage_class_specifier" {
503                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
504                        if text == "static" {
505                            return Visibility::Private;
506                        }
507                    }
508                }
509            }
510            Visibility::Public
511        },
512        Language::CSharp | Language::Kotlin | Language::Swift | Language::Scala => {
513            for child in node.children(&mut node.walk()) {
514                let kind = child.kind();
515                if kind == "modifier" || kind == "modifiers" || kind == "visibility_modifier" {
516                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
517                        if text.contains("private") {
518                            return Visibility::Private;
519                        } else if text.contains("protected") {
520                            return Visibility::Protected;
521                        } else if text.contains("internal") {
522                            return Visibility::Internal;
523                        } else if text.contains("public") {
524                            return Visibility::Public;
525                        }
526                    }
527                }
528            }
529            Visibility::Internal
530        },
531        Language::Ruby => {
532            if let Some(name_node) = node.child_by_field_name("name") {
533                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
534                    if name.starts_with('_') {
535                        return Visibility::Private;
536                    }
537                }
538            }
539            Visibility::Public
540        },
541        Language::Php => {
542            for child in node.children(&mut node.walk()) {
543                if child.kind() == "visibility_modifier" {
544                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
545                        return match text {
546                            "private" => Visibility::Private,
547                            "protected" => Visibility::Protected,
548                            "public" => Visibility::Public,
549                            _ => Visibility::Public,
550                        };
551                    }
552                }
553            }
554            Visibility::Public
555        },
556        Language::Bash => Visibility::Public,
557        Language::Haskell
558        | Language::Elixir
559        | Language::Clojure
560        | Language::OCaml
561        | Language::FSharp
562        | Language::Lua
563        | Language::R => Visibility::Public,
564    }
565}
566
567/// Extract function calls from a function/method body
568pub fn extract_calls(node: Node<'_>, source_code: &str, language: Language) -> Vec<String> {
569    let mut calls = HashSet::new();
570
571    let body_node = find_body_node(node, language);
572    if let Some(body) = body_node {
573        collect_calls_recursive(body, source_code, language, &mut calls);
574    }
575
576    if calls.is_empty() {
577        collect_calls_recursive(node, source_code, language, &mut calls);
578    }
579
580    calls.into_iter().collect()
581}
582
583/// Find the body node of a function/method
584pub fn find_body_node(node: Node<'_>, language: Language) -> Option<Node<'_>> {
585    match language {
586        Language::Python => {
587            for child in node.children(&mut node.walk()) {
588                if child.kind() == "block" {
589                    return Some(child);
590                }
591            }
592        },
593        Language::Rust => {
594            for child in node.children(&mut node.walk()) {
595                if child.kind() == "block" {
596                    return Some(child);
597                }
598            }
599        },
600        Language::JavaScript | Language::TypeScript => {
601            for child in node.children(&mut node.walk()) {
602                let kind = child.kind();
603                if kind == "statement_block" {
604                    return Some(child);
605                }
606                if kind == "arrow_function" {
607                    if let Some(body) = find_body_node(child, language) {
608                        return Some(body);
609                    }
610                    return Some(child);
611                }
612            }
613            if node.kind() == "arrow_function" {
614                for child in node.children(&mut node.walk()) {
615                    let kind = child.kind();
616                    if kind != "formal_parameters"
617                        && kind != "identifier"
618                        && kind != "=>"
619                        && kind != "("
620                        && kind != ")"
621                        && kind != ","
622                    {
623                        return Some(child);
624                    }
625                }
626                return Some(node);
627            }
628        },
629        Language::Go => {
630            for child in node.children(&mut node.walk()) {
631                if child.kind() == "block" {
632                    return Some(child);
633                }
634            }
635        },
636        Language::Java => {
637            for child in node.children(&mut node.walk()) {
638                if child.kind() == "block" {
639                    return Some(child);
640                }
641            }
642        },
643        Language::C | Language::Cpp => {
644            for child in node.children(&mut node.walk()) {
645                if child.kind() == "compound_statement" {
646                    return Some(child);
647                }
648            }
649        },
650        Language::CSharp | Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
651            for child in node.children(&mut node.walk()) {
652                let kind = child.kind();
653                if kind == "block" || kind == "compound_statement" || kind == "function_body" {
654                    return Some(child);
655                }
656            }
657        },
658        Language::Ruby => {
659            for child in node.children(&mut node.walk()) {
660                if child.kind() == "body_statement" || child.kind() == "do_block" {
661                    return Some(child);
662                }
663            }
664        },
665        Language::Bash => {
666            for child in node.children(&mut node.walk()) {
667                if child.kind() == "compound_statement" {
668                    return Some(child);
669                }
670            }
671        },
672        Language::Haskell
673        | Language::Elixir
674        | Language::Clojure
675        | Language::OCaml
676        | Language::FSharp
677        | Language::R => {
678            return Some(node);
679        },
680        Language::Lua => {
681            for child in node.children(&mut node.walk()) {
682                if child.kind() == "block" {
683                    return Some(child);
684                }
685            }
686        },
687    }
688    None
689}
690
691/// Recursively collect function calls from a node
692pub fn collect_calls_recursive(
693    node: Node<'_>,
694    source_code: &str,
695    language: Language,
696    calls: &mut HashSet<String>,
697) {
698    let kind = node.kind();
699
700    let call_name = match language {
701        Language::Python => {
702            if kind == "call" {
703                node.child_by_field_name("function").and_then(|f| {
704                    if f.kind() == "identifier" {
705                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
706                    } else if f.kind() == "attribute" {
707                        f.child_by_field_name("attribute")
708                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
709                            .map(String::from)
710                    } else {
711                        None
712                    }
713                })
714            } else {
715                None
716            }
717        },
718        Language::Rust => {
719            if kind == "call_expression" {
720                node.child_by_field_name("function").and_then(|f| {
721                    if f.kind() == "identifier" {
722                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
723                    } else if f.kind() == "field_expression" {
724                        f.child_by_field_name("field")
725                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
726                            .map(String::from)
727                    } else if f.kind() == "scoped_identifier" {
728                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
729                    } else {
730                        None
731                    }
732                })
733            } else if kind == "macro_invocation" {
734                node.child_by_field_name("macro")
735                    .and_then(|m| m.utf8_text(source_code.as_bytes()).ok())
736                    .map(|s| format!("{}!", s))
737            } else {
738                None
739            }
740        },
741        Language::JavaScript | Language::TypeScript => {
742            if kind == "call_expression" {
743                node.child_by_field_name("function").and_then(|f| {
744                    if f.kind() == "identifier" {
745                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
746                    } else if f.kind() == "member_expression" {
747                        f.child_by_field_name("property")
748                            .and_then(|p| p.utf8_text(source_code.as_bytes()).ok())
749                            .map(String::from)
750                    } else {
751                        None
752                    }
753                })
754            } else {
755                None
756            }
757        },
758        Language::Go => {
759            if kind == "call_expression" {
760                node.child_by_field_name("function").and_then(|f| {
761                    if f.kind() == "identifier" {
762                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
763                    } else if f.kind() == "selector_expression" {
764                        f.child_by_field_name("field")
765                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
766                            .map(String::from)
767                    } else {
768                        None
769                    }
770                })
771            } else {
772                None
773            }
774        },
775        Language::Java => {
776            if kind == "method_invocation" {
777                node.child_by_field_name("name")
778                    .and_then(|n| n.utf8_text(source_code.as_bytes()).ok())
779                    .map(String::from)
780            } else {
781                None
782            }
783        },
784        Language::C | Language::Cpp => {
785            if kind == "call_expression" {
786                node.child_by_field_name("function").and_then(|f| {
787                    if f.kind() == "identifier" {
788                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
789                    } else if f.kind() == "field_expression" {
790                        f.child_by_field_name("field")
791                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
792                            .map(String::from)
793                    } else {
794                        None
795                    }
796                })
797            } else {
798                None
799            }
800        },
801        Language::CSharp | Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
802            if kind == "invocation_expression" || kind == "call_expression" {
803                node.children(&mut node.walk())
804                    .find(|child| child.kind() == "identifier" || child.kind() == "simple_name")
805                    .and_then(|child| child.utf8_text(source_code.as_bytes()).ok())
806                    .map(|s| s.to_owned())
807            } else {
808                None
809            }
810        },
811        Language::Ruby => {
812            if kind == "call" || kind == "method_call" {
813                node.child_by_field_name("method")
814                    .and_then(|m| m.utf8_text(source_code.as_bytes()).ok())
815                    .map(String::from)
816            } else {
817                None
818            }
819        },
820        Language::Bash => {
821            if kind == "command" {
822                node.child_by_field_name("name")
823                    .and_then(|n| n.utf8_text(source_code.as_bytes()).ok())
824                    .map(String::from)
825            } else {
826                None
827            }
828        },
829        Language::Haskell
830        | Language::Elixir
831        | Language::Clojure
832        | Language::OCaml
833        | Language::FSharp
834        | Language::Lua
835        | Language::R => {
836            if kind == "function_call" || kind == "call" || kind == "application" {
837                node.children(&mut node.walk())
838                    .find(|child| child.kind() == "identifier" || child.kind() == "variable")
839                    .and_then(|child| child.utf8_text(source_code.as_bytes()).ok())
840                    .map(|s| s.to_owned())
841            } else {
842                None
843            }
844        },
845    };
846
847    if let Some(name) = call_name {
848        if !is_builtin(&name, language) {
849            calls.insert(name);
850        }
851    }
852
853    for child in node.children(&mut node.walk()) {
854        collect_calls_recursive(child, source_code, language, calls);
855    }
856}
857
858/// Check if a function name is a common built-in
859pub fn is_builtin(name: &str, language: Language) -> bool {
860    match language {
861        Language::Python => {
862            matches!(
863                name,
864                "print"
865                    | "len"
866                    | "range"
867                    | "str"
868                    | "int"
869                    | "float"
870                    | "list"
871                    | "dict"
872                    | "set"
873                    | "tuple"
874                    | "bool"
875                    | "type"
876                    | "isinstance"
877                    | "hasattr"
878                    | "getattr"
879                    | "setattr"
880                    | "super"
881                    | "iter"
882                    | "next"
883                    | "open"
884                    | "input"
885                    | "format"
886                    | "enumerate"
887                    | "zip"
888                    | "map"
889                    | "filter"
890                    | "sorted"
891                    | "reversed"
892                    | "sum"
893                    | "min"
894                    | "max"
895                    | "abs"
896                    | "round"
897                    | "ord"
898                    | "chr"
899                    | "hex"
900                    | "bin"
901                    | "oct"
902            )
903        },
904        Language::JavaScript | Language::TypeScript => {
905            matches!(
906                name,
907                "console"
908                    | "log"
909                    | "error"
910                    | "warn"
911                    | "parseInt"
912                    | "parseFloat"
913                    | "setTimeout"
914                    | "setInterval"
915                    | "clearTimeout"
916                    | "clearInterval"
917                    | "JSON"
918                    | "stringify"
919                    | "parse"
920                    | "toString"
921                    | "valueOf"
922                    | "push"
923                    | "pop"
924                    | "shift"
925                    | "unshift"
926                    | "slice"
927                    | "splice"
928                    | "map"
929                    | "filter"
930                    | "reduce"
931                    | "forEach"
932                    | "find"
933                    | "findIndex"
934                    | "includes"
935                    | "indexOf"
936                    | "join"
937                    | "split"
938                    | "replace"
939            )
940        },
941        Language::Rust => {
942            matches!(
943                name,
944                "println!"
945                    | "print!"
946                    | "eprintln!"
947                    | "eprint!"
948                    | "format!"
949                    | "vec!"
950                    | "panic!"
951                    | "assert!"
952                    | "assert_eq!"
953                    | "assert_ne!"
954                    | "debug!"
955                    | "info!"
956                    | "warn!"
957                    | "error!"
958                    | "trace!"
959                    | "unwrap"
960                    | "expect"
961                    | "ok"
962                    | "err"
963                    | "some"
964                    | "none"
965                    | "clone"
966                    | "to_string"
967                    | "into"
968                    | "from"
969                    | "default"
970                    | "iter"
971                    | "into_iter"
972                    | "collect"
973                    | "map"
974                    | "filter"
975            )
976        },
977        Language::Go => {
978            matches!(
979                name,
980                "fmt"
981                    | "Println"
982                    | "Printf"
983                    | "Sprintf"
984                    | "Errorf"
985                    | "make"
986                    | "new"
987                    | "len"
988                    | "cap"
989                    | "append"
990                    | "copy"
991                    | "delete"
992                    | "close"
993                    | "panic"
994                    | "recover"
995                    | "print"
996            )
997        },
998        Language::Java => {
999            matches!(
1000                name,
1001                "println"
1002                    | "print"
1003                    | "printf"
1004                    | "toString"
1005                    | "equals"
1006                    | "hashCode"
1007                    | "getClass"
1008                    | "clone"
1009                    | "notify"
1010                    | "wait"
1011                    | "get"
1012                    | "set"
1013                    | "add"
1014                    | "remove"
1015                    | "size"
1016                    | "isEmpty"
1017                    | "contains"
1018                    | "iterator"
1019                    | "valueOf"
1020                    | "parseInt"
1021            )
1022        },
1023        Language::C | Language::Cpp => {
1024            matches!(
1025                name,
1026                "printf"
1027                    | "scanf"
1028                    | "malloc"
1029                    | "free"
1030                    | "memcpy"
1031                    | "memset"
1032                    | "strlen"
1033                    | "strcpy"
1034                    | "strcmp"
1035                    | "strcat"
1036                    | "sizeof"
1037                    | "cout"
1038                    | "cin"
1039                    | "endl"
1040                    | "cerr"
1041                    | "clog"
1042            )
1043        },
1044        Language::CSharp => {
1045            matches!(
1046                name,
1047                "WriteLine"
1048                    | "Write"
1049                    | "ReadLine"
1050                    | "ToString"
1051                    | "Equals"
1052                    | "GetHashCode"
1053                    | "GetType"
1054                    | "Add"
1055                    | "Remove"
1056                    | "Contains"
1057                    | "Count"
1058                    | "Clear"
1059                    | "ToList"
1060                    | "ToArray"
1061            )
1062        },
1063        Language::Ruby => {
1064            matches!(
1065                name,
1066                "puts"
1067                    | "print"
1068                    | "p"
1069                    | "gets"
1070                    | "each"
1071                    | "map"
1072                    | "select"
1073                    | "reject"
1074                    | "reduce"
1075                    | "inject"
1076                    | "find"
1077                    | "any?"
1078                    | "all?"
1079                    | "include?"
1080                    | "empty?"
1081                    | "nil?"
1082                    | "length"
1083                    | "size"
1084            )
1085        },
1086        Language::Php => {
1087            matches!(
1088                name,
1089                "echo"
1090                    | "print"
1091                    | "var_dump"
1092                    | "print_r"
1093                    | "isset"
1094                    | "empty"
1095                    | "array"
1096                    | "count"
1097                    | "strlen"
1098                    | "strpos"
1099                    | "substr"
1100                    | "explode"
1101                    | "implode"
1102                    | "json_encode"
1103                    | "json_decode"
1104            )
1105        },
1106        Language::Kotlin => {
1107            matches!(
1108                name,
1109                "println"
1110                    | "print"
1111                    | "readLine"
1112                    | "toString"
1113                    | "equals"
1114                    | "hashCode"
1115                    | "map"
1116                    | "filter"
1117                    | "forEach"
1118                    | "let"
1119                    | "also"
1120                    | "apply"
1121                    | "run"
1122                    | "with"
1123                    | "listOf"
1124                    | "mapOf"
1125                    | "setOf"
1126            )
1127        },
1128        Language::Swift => {
1129            matches!(
1130                name,
1131                "print"
1132                    | "debugPrint"
1133                    | "dump"
1134                    | "map"
1135                    | "filter"
1136                    | "reduce"
1137                    | "forEach"
1138                    | "contains"
1139                    | "count"
1140                    | "isEmpty"
1141                    | "append"
1142            )
1143        },
1144        Language::Scala => {
1145            matches!(
1146                name,
1147                "println"
1148                    | "print"
1149                    | "map"
1150                    | "filter"
1151                    | "flatMap"
1152                    | "foreach"
1153                    | "reduce"
1154                    | "fold"
1155                    | "foldLeft"
1156                    | "foldRight"
1157                    | "collect"
1158            )
1159        },
1160        Language::Bash
1161        | Language::Haskell
1162        | Language::Elixir
1163        | Language::Clojure
1164        | Language::OCaml
1165        | Language::FSharp
1166        | Language::Lua
1167        | Language::R => false,
1168    }
1169}
1170
1171/// Clean JSDoc comment
1172pub fn clean_jsdoc(text: &str) -> String {
1173    text.lines()
1174        .map(|line| {
1175            line.trim()
1176                .trim_start_matches("/**")
1177                .trim_start_matches("/*")
1178                .trim_start_matches('*')
1179                .trim_end_matches("*/")
1180                .trim()
1181        })
1182        .filter(|line| !line.is_empty())
1183        .collect::<Vec<_>>()
1184        .join(" ")
1185}
1186
1187/// Clean JavaDoc comment
1188pub fn clean_javadoc(text: &str) -> String {
1189    clean_jsdoc(text)
1190}
1191
1192/// Extract class inheritance (extends) and interface implementations (implements)
1193pub fn extract_inheritance(
1194    node: Node<'_>,
1195    source_code: &str,
1196    language: Language,
1197) -> (Option<String>, Vec<String>) {
1198    let mut extends = None;
1199    let mut implements = Vec::new();
1200
1201    match language {
1202        Language::Python => {
1203            // Python: class Foo(Bar, Baz): - all are considered base classes
1204            if node.kind() == "class_definition" {
1205                if let Some(args) = node.child_by_field_name("superclasses") {
1206                    for child in args.children(&mut args.walk()) {
1207                        if child.kind() == "identifier" || child.kind() == "attribute" {
1208                            if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
1209                                if extends.is_none() {
1210                                    extends = Some(name.to_owned());
1211                                } else {
1212                                    implements.push(name.to_owned());
1213                                }
1214                            }
1215                        }
1216                    }
1217                }
1218            }
1219        },
1220        Language::JavaScript | Language::TypeScript => {
1221            // JS/TS: class Foo extends Bar implements Baz
1222            if node.kind() == "class_declaration" || node.kind() == "class" {
1223                for child in node.children(&mut node.walk()) {
1224                    if child.kind() == "class_heritage" {
1225                        for heritage in child.children(&mut child.walk()) {
1226                            if heritage.kind() == "extends_clause" {
1227                                for type_node in heritage.children(&mut heritage.walk()) {
1228                                    if type_node.kind() == "identifier"
1229                                        || type_node.kind() == "type_identifier"
1230                                    {
1231                                        if let Ok(name) =
1232                                            type_node.utf8_text(source_code.as_bytes())
1233                                        {
1234                                            extends = Some(name.to_owned());
1235                                        }
1236                                    }
1237                                }
1238                            } else if heritage.kind() == "implements_clause" {
1239                                for type_node in heritage.children(&mut heritage.walk()) {
1240                                    if type_node.kind() == "identifier"
1241                                        || type_node.kind() == "type_identifier"
1242                                    {
1243                                        if let Ok(name) =
1244                                            type_node.utf8_text(source_code.as_bytes())
1245                                        {
1246                                            implements.push(name.to_owned());
1247                                        }
1248                                    }
1249                                }
1250                            }
1251                        }
1252                    }
1253                }
1254            }
1255        },
1256        Language::Rust => {
1257            // Rust doesn't have class inheritance, but has trait implementations
1258            // impl Trait for Struct
1259            if node.kind() == "impl_item" {
1260                let mut has_for = false;
1261                for child in node.children(&mut node.walk()) {
1262                    if child.kind() == "for" {
1263                        has_for = true;
1264                    }
1265                    if child.kind() == "type_identifier" || child.kind() == "generic_type" {
1266                        if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
1267                            if has_for {
1268                                // This is the struct being implemented
1269                            } else {
1270                                // This is the trait being implemented
1271                                implements.push(name.to_owned());
1272                            }
1273                        }
1274                    }
1275                }
1276            }
1277        },
1278        Language::Go => {
1279            // Go uses embedding for "inheritance"
1280            if node.kind() == "type_declaration" {
1281                for child in node.children(&mut node.walk()) {
1282                    if child.kind() == "type_spec" {
1283                        for spec_child in child.children(&mut child.walk()) {
1284                            if spec_child.kind() == "struct_type" {
1285                                for field in spec_child.children(&mut spec_child.walk()) {
1286                                    if field.kind() == "field_declaration" {
1287                                        // Embedded field (no name, just type)
1288                                        let has_name = field.child_by_field_name("name").is_some();
1289                                        if !has_name {
1290                                            if let Some(type_node) =
1291                                                field.child_by_field_name("type")
1292                                            {
1293                                                if let Ok(name) =
1294                                                    type_node.utf8_text(source_code.as_bytes())
1295                                                {
1296                                                    implements.push(name.to_owned());
1297                                                }
1298                                            }
1299                                        }
1300                                    }
1301                                }
1302                            }
1303                        }
1304                    }
1305                }
1306            }
1307        },
1308        Language::Java => {
1309            // Java: class Foo extends Bar implements Baz, Qux
1310            if node.kind() == "class_declaration" {
1311                for child in node.children(&mut node.walk()) {
1312                    if child.kind() == "superclass" {
1313                        for type_node in child.children(&mut child.walk()) {
1314                            if type_node.kind() == "type_identifier" {
1315                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1316                                    extends = Some(name.to_owned());
1317                                }
1318                            }
1319                        }
1320                    } else if child.kind() == "super_interfaces" {
1321                        for type_list in child.children(&mut child.walk()) {
1322                            if type_list.kind() == "type_list" {
1323                                for type_node in type_list.children(&mut type_list.walk()) {
1324                                    if type_node.kind() == "type_identifier" {
1325                                        if let Ok(name) =
1326                                            type_node.utf8_text(source_code.as_bytes())
1327                                        {
1328                                            implements.push(name.to_owned());
1329                                        }
1330                                    }
1331                                }
1332                            }
1333                        }
1334                    }
1335                }
1336            }
1337        },
1338        Language::C | Language::Cpp => {
1339            // C++: class Foo : public Bar, public Baz
1340            if node.kind() == "class_specifier" || node.kind() == "struct_specifier" {
1341                for child in node.children(&mut node.walk()) {
1342                    if child.kind() == "base_class_clause" {
1343                        for base in child.children(&mut child.walk()) {
1344                            if base.kind() == "type_identifier" {
1345                                if let Ok(name) = base.utf8_text(source_code.as_bytes()) {
1346                                    if extends.is_none() {
1347                                        extends = Some(name.to_owned());
1348                                    } else {
1349                                        implements.push(name.to_owned());
1350                                    }
1351                                }
1352                            }
1353                        }
1354                    }
1355                }
1356            }
1357        },
1358        Language::CSharp => {
1359            // C#: class Foo : Bar, IBaz
1360            if node.kind() == "class_declaration" {
1361                for child in node.children(&mut node.walk()) {
1362                    if child.kind() == "base_list" {
1363                        for base in child.children(&mut child.walk()) {
1364                            if base.kind() == "identifier" || base.kind() == "generic_name" {
1365                                if let Ok(name) = base.utf8_text(source_code.as_bytes()) {
1366                                    if name.starts_with('I') && name.len() > 1 {
1367                                        // Convention: interfaces start with I
1368                                        implements.push(name.to_owned());
1369                                    } else if extends.is_none() {
1370                                        extends = Some(name.to_owned());
1371                                    } else {
1372                                        implements.push(name.to_owned());
1373                                    }
1374                                }
1375                            }
1376                        }
1377                    }
1378                }
1379            }
1380        },
1381        Language::Ruby => {
1382            // Ruby: class Foo < Bar; include Baz
1383            if node.kind() == "class" {
1384                for child in node.children(&mut node.walk()) {
1385                    if child.kind() == "superclass" {
1386                        for type_node in child.children(&mut child.walk()) {
1387                            if type_node.kind() == "constant" {
1388                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1389                                    extends = Some(name.to_owned());
1390                                }
1391                            }
1392                        }
1393                    }
1394                }
1395            }
1396        },
1397        Language::Php => {
1398            // PHP: class Foo extends Bar implements Baz
1399            if node.kind() == "class_declaration" {
1400                for child in node.children(&mut node.walk()) {
1401                    if child.kind() == "base_clause" {
1402                        for type_node in child.children(&mut child.walk()) {
1403                            if type_node.kind() == "name" {
1404                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1405                                    extends = Some(name.to_owned());
1406                                }
1407                            }
1408                        }
1409                    } else if child.kind() == "class_interface_clause" {
1410                        for type_node in child.children(&mut child.walk()) {
1411                            if type_node.kind() == "name" {
1412                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1413                                    implements.push(name.to_owned());
1414                                }
1415                            }
1416                        }
1417                    }
1418                }
1419            }
1420        },
1421        Language::Kotlin => {
1422            // Kotlin: class Foo : Bar(), Baz
1423            if node.kind() == "class_declaration" {
1424                for child in node.children(&mut node.walk()) {
1425                    if child.kind() == "delegation_specifiers" {
1426                        for spec in child.children(&mut child.walk()) {
1427                            if spec.kind() == "delegation_specifier" {
1428                                for type_node in spec.children(&mut spec.walk()) {
1429                                    if type_node.kind() == "user_type" {
1430                                        if let Ok(name) =
1431                                            type_node.utf8_text(source_code.as_bytes())
1432                                        {
1433                                            if extends.is_none() {
1434                                                extends = Some(name.to_owned());
1435                                            } else {
1436                                                implements.push(name.to_owned());
1437                                            }
1438                                        }
1439                                    }
1440                                }
1441                            }
1442                        }
1443                    }
1444                }
1445            }
1446        },
1447        Language::Swift => {
1448            // Swift: class Foo: Bar, Protocol
1449            if node.kind() == "class_declaration" {
1450                for child in node.children(&mut node.walk()) {
1451                    if child.kind() == "type_inheritance_clause" {
1452                        for type_node in child.children(&mut child.walk()) {
1453                            if type_node.kind() == "type_identifier" {
1454                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1455                                    if extends.is_none() {
1456                                        extends = Some(name.to_owned());
1457                                    } else {
1458                                        implements.push(name.to_owned());
1459                                    }
1460                                }
1461                            }
1462                        }
1463                    }
1464                }
1465            }
1466        },
1467        Language::Scala => {
1468            // Scala: class Foo extends Bar with Baz
1469            if node.kind() == "class_definition" {
1470                for child in node.children(&mut node.walk()) {
1471                    if child.kind() == "extends_clause" {
1472                        for type_node in child.children(&mut child.walk()) {
1473                            if type_node.kind() == "type_identifier" {
1474                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1475                                    if extends.is_none() {
1476                                        extends = Some(name.to_owned());
1477                                    } else {
1478                                        implements.push(name.to_owned());
1479                                    }
1480                                }
1481                            }
1482                        }
1483                    }
1484                }
1485            }
1486        },
1487        Language::Bash
1488        | Language::Haskell
1489        | Language::Elixir
1490        | Language::Clojure
1491        | Language::OCaml
1492        | Language::FSharp
1493        | Language::Lua
1494        | Language::R => {},
1495    }
1496
1497    (extends, implements)
1498}
1499
1500/// Map capture name to SymbolKind
1501pub fn map_symbol_kind(capture_name: &str) -> SymbolKind {
1502    match capture_name {
1503        "function" => SymbolKind::Function,
1504        "class" => SymbolKind::Class,
1505        "method" => SymbolKind::Method,
1506        "struct" => SymbolKind::Struct,
1507        "enum" => SymbolKind::Enum,
1508        "interface" => SymbolKind::Interface,
1509        "trait" => SymbolKind::Trait,
1510        _ => SymbolKind::Function,
1511    }
1512}
1513
1514#[cfg(test)]
1515mod tests {
1516    use super::*;
1517
1518    // ==========================================================================
1519    // safe_char_boundary tests
1520    // ==========================================================================
1521
1522    #[test]
1523    fn test_safe_char_boundary_ascii() {
1524        let s = "hello world";
1525        assert_eq!(safe_char_boundary(s, 0), 0);
1526        assert_eq!(safe_char_boundary(s, 5), 5);
1527        assert_eq!(safe_char_boundary(s, 11), 11);
1528    }
1529
1530    #[test]
1531    fn test_safe_char_boundary_beyond_length() {
1532        let s = "hello";
1533        assert_eq!(safe_char_boundary(s, 100), 5);
1534        assert_eq!(safe_char_boundary(s, 5), 5);
1535    }
1536
1537    #[test]
1538    fn test_safe_char_boundary_empty_string() {
1539        let s = "";
1540        assert_eq!(safe_char_boundary(s, 0), 0);
1541        assert_eq!(safe_char_boundary(s, 10), 0);
1542    }
1543
1544    #[test]
1545    fn test_safe_char_boundary_multibyte_utf8() {
1546        // Chinese character "中" is 3 bytes: E4 B8 AD
1547        let s = "中文";
1548        // Index 0 is valid (start of first char)
1549        assert_eq!(safe_char_boundary(s, 0), 0);
1550        // Index 1 is in the middle of "中", should back up to 0
1551        assert_eq!(safe_char_boundary(s, 1), 0);
1552        // Index 2 is also in the middle
1553        assert_eq!(safe_char_boundary(s, 2), 0);
1554        // Index 3 is the start of "æ–‡"
1555        assert_eq!(safe_char_boundary(s, 3), 3);
1556        // Index 4 is in the middle of "æ–‡"
1557        assert_eq!(safe_char_boundary(s, 4), 3);
1558    }
1559
1560    #[test]
1561    fn test_safe_char_boundary_emoji() {
1562        // "👋" emoji is 4 bytes
1563        let s = "Hello 👋 World";
1564        // The emoji starts at byte 6
1565        assert_eq!(safe_char_boundary(s, 6), 6);
1566        // Middle of emoji should back up
1567        assert_eq!(safe_char_boundary(s, 7), 6);
1568        assert_eq!(safe_char_boundary(s, 8), 6);
1569        assert_eq!(safe_char_boundary(s, 9), 6);
1570        // After emoji (byte 10)
1571        assert_eq!(safe_char_boundary(s, 10), 10);
1572    }
1573
1574    #[test]
1575    fn test_safe_char_boundary_mixed_content() {
1576        // Mix of ASCII and multi-byte
1577        let s = "aбв"; // 'a' is 1 byte, 'б' and 'в' are 2 bytes each
1578        assert_eq!(safe_char_boundary(s, 0), 0);
1579        assert_eq!(safe_char_boundary(s, 1), 1); // Start of 'б'
1580        assert_eq!(safe_char_boundary(s, 2), 1); // Middle of 'б', back to 1
1581        assert_eq!(safe_char_boundary(s, 3), 3); // Start of 'в'
1582        assert_eq!(safe_char_boundary(s, 4), 3); // Middle of 'в'
1583        assert_eq!(safe_char_boundary(s, 5), 5); // End
1584    }
1585
1586    // ==========================================================================
1587    // clean_jsdoc tests
1588    // ==========================================================================
1589
1590    #[test]
1591    fn test_clean_jsdoc_simple() {
1592        let input = "/** This is a simple doc */";
1593        assert_eq!(clean_jsdoc(input), "This is a simple doc");
1594    }
1595
1596    #[test]
1597    fn test_clean_jsdoc_multiline() {
1598        let input = "/**\n * Line 1\n * Line 2\n */";
1599        let result = clean_jsdoc(input);
1600        // Trailing slash is kept when on its own line
1601        assert!(result.contains("Line 1"));
1602        assert!(result.contains("Line 2"));
1603    }
1604
1605    #[test]
1606    fn test_clean_jsdoc_with_asterisks() {
1607        let input = "/**\n * First line\n * Second line\n * Third line\n */";
1608        let result = clean_jsdoc(input);
1609        assert!(result.contains("First line"));
1610        assert!(result.contains("Second line"));
1611        assert!(result.contains("Third line"));
1612    }
1613
1614    #[test]
1615    fn test_clean_jsdoc_empty() {
1616        let input = "/** */";
1617        assert_eq!(clean_jsdoc(input), "");
1618    }
1619
1620    #[test]
1621    fn test_clean_jsdoc_c_style_comment() {
1622        let input = "/* Regular C comment */";
1623        assert_eq!(clean_jsdoc(input), "Regular C comment");
1624    }
1625
1626    #[test]
1627    fn test_clean_jsdoc_with_tags() {
1628        let input = "/**\n * Description\n * @param x The x value\n * @returns Result\n */";
1629        let result = clean_jsdoc(input);
1630        assert!(result.contains("Description"));
1631        assert!(result.contains("@param x"));
1632        assert!(result.contains("@returns"));
1633    }
1634
1635    #[test]
1636    fn test_clean_jsdoc_whitespace_handling() {
1637        let input = "/**   \n   *    Lots of spaces    \n   */";
1638        assert!(clean_jsdoc(input).contains("Lots of spaces"));
1639    }
1640
1641    // ==========================================================================
1642    // clean_javadoc tests
1643    // ==========================================================================
1644
1645    #[test]
1646    fn test_clean_javadoc_simple() {
1647        let input = "/** JavaDoc comment */";
1648        assert_eq!(clean_javadoc(input), "JavaDoc comment");
1649    }
1650
1651    #[test]
1652    fn test_clean_javadoc_multiline() {
1653        let input = "/**\n * Method description.\n * @param name The name\n */";
1654        let result = clean_javadoc(input);
1655        assert!(result.contains("Method description"));
1656        assert!(result.contains("@param name"));
1657    }
1658
1659    // ==========================================================================
1660    // map_symbol_kind tests
1661    // ==========================================================================
1662
1663    #[test]
1664    fn test_map_symbol_kind_function() {
1665        assert_eq!(map_symbol_kind("function"), SymbolKind::Function);
1666    }
1667
1668    #[test]
1669    fn test_map_symbol_kind_class() {
1670        assert_eq!(map_symbol_kind("class"), SymbolKind::Class);
1671    }
1672
1673    #[test]
1674    fn test_map_symbol_kind_method() {
1675        assert_eq!(map_symbol_kind("method"), SymbolKind::Method);
1676    }
1677
1678    #[test]
1679    fn test_map_symbol_kind_struct() {
1680        assert_eq!(map_symbol_kind("struct"), SymbolKind::Struct);
1681    }
1682
1683    #[test]
1684    fn test_map_symbol_kind_enum() {
1685        assert_eq!(map_symbol_kind("enum"), SymbolKind::Enum);
1686    }
1687
1688    #[test]
1689    fn test_map_symbol_kind_interface() {
1690        assert_eq!(map_symbol_kind("interface"), SymbolKind::Interface);
1691    }
1692
1693    #[test]
1694    fn test_map_symbol_kind_trait() {
1695        assert_eq!(map_symbol_kind("trait"), SymbolKind::Trait);
1696    }
1697
1698    #[test]
1699    fn test_map_symbol_kind_unknown() {
1700        // Unknown capture names default to Function
1701        assert_eq!(map_symbol_kind("unknown"), SymbolKind::Function);
1702        assert_eq!(map_symbol_kind(""), SymbolKind::Function);
1703        assert_eq!(map_symbol_kind("random"), SymbolKind::Function);
1704    }
1705
1706    // ==========================================================================
1707    // is_builtin tests - Python
1708    // ==========================================================================
1709
1710    #[test]
1711    fn test_is_builtin_python_print() {
1712        assert!(is_builtin("print", Language::Python));
1713        assert!(is_builtin("len", Language::Python));
1714        assert!(is_builtin("range", Language::Python));
1715        assert!(is_builtin("str", Language::Python));
1716        assert!(is_builtin("int", Language::Python));
1717        assert!(is_builtin("float", Language::Python));
1718        assert!(is_builtin("list", Language::Python));
1719        assert!(is_builtin("dict", Language::Python));
1720        assert!(is_builtin("set", Language::Python));
1721        assert!(is_builtin("tuple", Language::Python));
1722    }
1723
1724    #[test]
1725    fn test_is_builtin_python_type_funcs() {
1726        assert!(is_builtin("bool", Language::Python));
1727        assert!(is_builtin("type", Language::Python));
1728        assert!(is_builtin("isinstance", Language::Python));
1729        assert!(is_builtin("hasattr", Language::Python));
1730        assert!(is_builtin("getattr", Language::Python));
1731        assert!(is_builtin("setattr", Language::Python));
1732        assert!(is_builtin("super", Language::Python));
1733    }
1734
1735    #[test]
1736    fn test_is_builtin_python_itertools() {
1737        assert!(is_builtin("iter", Language::Python));
1738        assert!(is_builtin("next", Language::Python));
1739        assert!(is_builtin("enumerate", Language::Python));
1740        assert!(is_builtin("zip", Language::Python));
1741        assert!(is_builtin("map", Language::Python));
1742        assert!(is_builtin("filter", Language::Python));
1743        assert!(is_builtin("sorted", Language::Python));
1744        assert!(is_builtin("reversed", Language::Python));
1745    }
1746
1747    #[test]
1748    fn test_is_builtin_python_math() {
1749        assert!(is_builtin("sum", Language::Python));
1750        assert!(is_builtin("min", Language::Python));
1751        assert!(is_builtin("max", Language::Python));
1752        assert!(is_builtin("abs", Language::Python));
1753        assert!(is_builtin("round", Language::Python));
1754    }
1755
1756    #[test]
1757    fn test_is_builtin_python_not_builtin() {
1758        assert!(!is_builtin("my_function", Language::Python));
1759        assert!(!is_builtin("custom_print", Language::Python));
1760        assert!(!is_builtin("calculate", Language::Python));
1761    }
1762
1763    // ==========================================================================
1764    // is_builtin tests - JavaScript/TypeScript
1765    // ==========================================================================
1766
1767    #[test]
1768    fn test_is_builtin_js_console() {
1769        assert!(is_builtin("console", Language::JavaScript));
1770        assert!(is_builtin("log", Language::JavaScript));
1771        assert!(is_builtin("error", Language::JavaScript));
1772        assert!(is_builtin("warn", Language::JavaScript));
1773    }
1774
1775    #[test]
1776    fn test_is_builtin_js_parsing() {
1777        assert!(is_builtin("parseInt", Language::JavaScript));
1778        assert!(is_builtin("parseFloat", Language::JavaScript));
1779        assert!(is_builtin("JSON", Language::JavaScript));
1780        assert!(is_builtin("stringify", Language::JavaScript));
1781        assert!(is_builtin("parse", Language::JavaScript));
1782    }
1783
1784    #[test]
1785    fn test_is_builtin_js_timers() {
1786        assert!(is_builtin("setTimeout", Language::JavaScript));
1787        assert!(is_builtin("setInterval", Language::JavaScript));
1788        assert!(is_builtin("clearTimeout", Language::JavaScript));
1789        assert!(is_builtin("clearInterval", Language::JavaScript));
1790    }
1791
1792    #[test]
1793    fn test_is_builtin_js_array_methods() {
1794        assert!(is_builtin("push", Language::JavaScript));
1795        assert!(is_builtin("pop", Language::JavaScript));
1796        assert!(is_builtin("shift", Language::JavaScript));
1797        assert!(is_builtin("unshift", Language::JavaScript));
1798        assert!(is_builtin("slice", Language::JavaScript));
1799        assert!(is_builtin("splice", Language::JavaScript));
1800        assert!(is_builtin("map", Language::JavaScript));
1801        assert!(is_builtin("filter", Language::JavaScript));
1802        assert!(is_builtin("reduce", Language::JavaScript));
1803        assert!(is_builtin("forEach", Language::JavaScript));
1804    }
1805
1806    #[test]
1807    fn test_is_builtin_ts_same_as_js() {
1808        assert!(is_builtin("console", Language::TypeScript));
1809        assert!(is_builtin("map", Language::TypeScript));
1810        assert!(is_builtin("filter", Language::TypeScript));
1811    }
1812
1813    #[test]
1814    fn test_is_builtin_js_not_builtin() {
1815        assert!(!is_builtin("myFunction", Language::JavaScript));
1816        assert!(!is_builtin("customLog", Language::JavaScript));
1817    }
1818
1819    // ==========================================================================
1820    // is_builtin tests - Rust
1821    // ==========================================================================
1822
1823    #[test]
1824    fn test_is_builtin_rust_macros() {
1825        assert!(is_builtin("println!", Language::Rust));
1826        assert!(is_builtin("print!", Language::Rust));
1827        assert!(is_builtin("eprintln!", Language::Rust));
1828        assert!(is_builtin("eprint!", Language::Rust));
1829        assert!(is_builtin("format!", Language::Rust));
1830        assert!(is_builtin("vec!", Language::Rust));
1831        assert!(is_builtin("panic!", Language::Rust));
1832        assert!(is_builtin("assert!", Language::Rust));
1833        assert!(is_builtin("assert_eq!", Language::Rust));
1834        assert!(is_builtin("assert_ne!", Language::Rust));
1835    }
1836
1837    #[test]
1838    fn test_is_builtin_rust_logging() {
1839        assert!(is_builtin("debug!", Language::Rust));
1840        assert!(is_builtin("info!", Language::Rust));
1841        assert!(is_builtin("warn!", Language::Rust));
1842        assert!(is_builtin("error!", Language::Rust));
1843        assert!(is_builtin("trace!", Language::Rust));
1844    }
1845
1846    #[test]
1847    fn test_is_builtin_rust_common_methods() {
1848        assert!(is_builtin("unwrap", Language::Rust));
1849        assert!(is_builtin("expect", Language::Rust));
1850        assert!(is_builtin("ok", Language::Rust));
1851        assert!(is_builtin("err", Language::Rust));
1852        assert!(is_builtin("some", Language::Rust));
1853        assert!(is_builtin("none", Language::Rust));
1854        assert!(is_builtin("clone", Language::Rust));
1855        assert!(is_builtin("to_string", Language::Rust));
1856        assert!(is_builtin("into", Language::Rust));
1857        assert!(is_builtin("from", Language::Rust));
1858        assert!(is_builtin("default", Language::Rust));
1859    }
1860
1861    #[test]
1862    fn test_is_builtin_rust_iterators() {
1863        assert!(is_builtin("iter", Language::Rust));
1864        assert!(is_builtin("into_iter", Language::Rust));
1865        assert!(is_builtin("collect", Language::Rust));
1866        assert!(is_builtin("map", Language::Rust));
1867        assert!(is_builtin("filter", Language::Rust));
1868    }
1869
1870    #[test]
1871    fn test_is_builtin_rust_not_builtin() {
1872        assert!(!is_builtin("my_function", Language::Rust));
1873        assert!(!is_builtin("process_data", Language::Rust));
1874    }
1875
1876    // ==========================================================================
1877    // is_builtin tests - Go
1878    // ==========================================================================
1879
1880    #[test]
1881    fn test_is_builtin_go_fmt() {
1882        assert!(is_builtin("fmt", Language::Go));
1883        assert!(is_builtin("Println", Language::Go));
1884        assert!(is_builtin("Printf", Language::Go));
1885        assert!(is_builtin("Sprintf", Language::Go));
1886        assert!(is_builtin("Errorf", Language::Go));
1887    }
1888
1889    #[test]
1890    fn test_is_builtin_go_memory() {
1891        assert!(is_builtin("make", Language::Go));
1892        assert!(is_builtin("new", Language::Go));
1893        assert!(is_builtin("len", Language::Go));
1894        assert!(is_builtin("cap", Language::Go));
1895        assert!(is_builtin("append", Language::Go));
1896        assert!(is_builtin("copy", Language::Go));
1897        assert!(is_builtin("delete", Language::Go));
1898    }
1899
1900    #[test]
1901    fn test_is_builtin_go_control() {
1902        assert!(is_builtin("close", Language::Go));
1903        assert!(is_builtin("panic", Language::Go));
1904        assert!(is_builtin("recover", Language::Go));
1905        assert!(is_builtin("print", Language::Go));
1906    }
1907
1908    #[test]
1909    fn test_is_builtin_go_not_builtin() {
1910        assert!(!is_builtin("ProcessData", Language::Go));
1911        assert!(!is_builtin("handleRequest", Language::Go));
1912    }
1913
1914    // ==========================================================================
1915    // is_builtin tests - Java
1916    // ==========================================================================
1917
1918    #[test]
1919    fn test_is_builtin_java_io() {
1920        assert!(is_builtin("println", Language::Java));
1921        assert!(is_builtin("print", Language::Java));
1922        assert!(is_builtin("printf", Language::Java));
1923    }
1924
1925    #[test]
1926    fn test_is_builtin_java_object() {
1927        assert!(is_builtin("toString", Language::Java));
1928        assert!(is_builtin("equals", Language::Java));
1929        assert!(is_builtin("hashCode", Language::Java));
1930        assert!(is_builtin("getClass", Language::Java));
1931        assert!(is_builtin("clone", Language::Java));
1932        assert!(is_builtin("notify", Language::Java));
1933        assert!(is_builtin("wait", Language::Java));
1934    }
1935
1936    #[test]
1937    fn test_is_builtin_java_collections() {
1938        assert!(is_builtin("get", Language::Java));
1939        assert!(is_builtin("set", Language::Java));
1940        assert!(is_builtin("add", Language::Java));
1941        assert!(is_builtin("remove", Language::Java));
1942        assert!(is_builtin("size", Language::Java));
1943        assert!(is_builtin("isEmpty", Language::Java));
1944        assert!(is_builtin("contains", Language::Java));
1945        assert!(is_builtin("iterator", Language::Java));
1946    }
1947
1948    #[test]
1949    fn test_is_builtin_java_not_builtin() {
1950        assert!(!is_builtin("processData", Language::Java));
1951        assert!(!is_builtin("calculateTotal", Language::Java));
1952    }
1953
1954    // ==========================================================================
1955    // is_builtin tests - C/C++
1956    // ==========================================================================
1957
1958    #[test]
1959    fn test_is_builtin_c_io() {
1960        assert!(is_builtin("printf", Language::C));
1961        assert!(is_builtin("scanf", Language::C));
1962    }
1963
1964    #[test]
1965    fn test_is_builtin_c_memory() {
1966        assert!(is_builtin("malloc", Language::C));
1967        assert!(is_builtin("free", Language::C));
1968        assert!(is_builtin("memcpy", Language::C));
1969        assert!(is_builtin("memset", Language::C));
1970    }
1971
1972    #[test]
1973    fn test_is_builtin_c_string() {
1974        assert!(is_builtin("strlen", Language::C));
1975        assert!(is_builtin("strcpy", Language::C));
1976        assert!(is_builtin("strcmp", Language::C));
1977        assert!(is_builtin("strcat", Language::C));
1978    }
1979
1980    #[test]
1981    fn test_is_builtin_cpp_streams() {
1982        assert!(is_builtin("cout", Language::Cpp));
1983        assert!(is_builtin("cin", Language::Cpp));
1984        assert!(is_builtin("endl", Language::Cpp));
1985        assert!(is_builtin("cerr", Language::Cpp));
1986        assert!(is_builtin("clog", Language::Cpp));
1987    }
1988
1989    #[test]
1990    fn test_is_builtin_c_not_builtin() {
1991        assert!(!is_builtin("process_data", Language::C));
1992        assert!(!is_builtin("custom_malloc", Language::C));
1993    }
1994
1995    // ==========================================================================
1996    // is_builtin tests - C#
1997    // ==========================================================================
1998
1999    #[test]
2000    fn test_is_builtin_csharp_console() {
2001        assert!(is_builtin("WriteLine", Language::CSharp));
2002        assert!(is_builtin("Write", Language::CSharp));
2003        assert!(is_builtin("ReadLine", Language::CSharp));
2004    }
2005
2006    #[test]
2007    fn test_is_builtin_csharp_object() {
2008        assert!(is_builtin("ToString", Language::CSharp));
2009        assert!(is_builtin("Equals", Language::CSharp));
2010        assert!(is_builtin("GetHashCode", Language::CSharp));
2011        assert!(is_builtin("GetType", Language::CSharp));
2012    }
2013
2014    #[test]
2015    fn test_is_builtin_csharp_collections() {
2016        assert!(is_builtin("Add", Language::CSharp));
2017        assert!(is_builtin("Remove", Language::CSharp));
2018        assert!(is_builtin("Contains", Language::CSharp));
2019        assert!(is_builtin("Count", Language::CSharp));
2020        assert!(is_builtin("Clear", Language::CSharp));
2021        assert!(is_builtin("ToList", Language::CSharp));
2022        assert!(is_builtin("ToArray", Language::CSharp));
2023    }
2024
2025    // ==========================================================================
2026    // is_builtin tests - Ruby
2027    // ==========================================================================
2028
2029    #[test]
2030    fn test_is_builtin_ruby_io() {
2031        assert!(is_builtin("puts", Language::Ruby));
2032        assert!(is_builtin("print", Language::Ruby));
2033        assert!(is_builtin("p", Language::Ruby));
2034        assert!(is_builtin("gets", Language::Ruby));
2035    }
2036
2037    #[test]
2038    fn test_is_builtin_ruby_enumerable() {
2039        assert!(is_builtin("each", Language::Ruby));
2040        assert!(is_builtin("map", Language::Ruby));
2041        assert!(is_builtin("select", Language::Ruby));
2042        assert!(is_builtin("reject", Language::Ruby));
2043        assert!(is_builtin("reduce", Language::Ruby));
2044        assert!(is_builtin("inject", Language::Ruby));
2045        assert!(is_builtin("find", Language::Ruby));
2046    }
2047
2048    #[test]
2049    fn test_is_builtin_ruby_predicates() {
2050        assert!(is_builtin("any?", Language::Ruby));
2051        assert!(is_builtin("all?", Language::Ruby));
2052        assert!(is_builtin("include?", Language::Ruby));
2053        assert!(is_builtin("empty?", Language::Ruby));
2054        assert!(is_builtin("nil?", Language::Ruby));
2055    }
2056
2057    // ==========================================================================
2058    // is_builtin tests - PHP
2059    // ==========================================================================
2060
2061    #[test]
2062    fn test_is_builtin_php_io() {
2063        assert!(is_builtin("echo", Language::Php));
2064        assert!(is_builtin("print", Language::Php));
2065        assert!(is_builtin("var_dump", Language::Php));
2066        assert!(is_builtin("print_r", Language::Php));
2067    }
2068
2069    #[test]
2070    fn test_is_builtin_php_checks() {
2071        assert!(is_builtin("isset", Language::Php));
2072        assert!(is_builtin("empty", Language::Php));
2073    }
2074
2075    #[test]
2076    fn test_is_builtin_php_array_string() {
2077        assert!(is_builtin("array", Language::Php));
2078        assert!(is_builtin("count", Language::Php));
2079        assert!(is_builtin("strlen", Language::Php));
2080        assert!(is_builtin("strpos", Language::Php));
2081        assert!(is_builtin("substr", Language::Php));
2082        assert!(is_builtin("explode", Language::Php));
2083        assert!(is_builtin("implode", Language::Php));
2084        assert!(is_builtin("json_encode", Language::Php));
2085        assert!(is_builtin("json_decode", Language::Php));
2086    }
2087
2088    // ==========================================================================
2089    // is_builtin tests - Kotlin
2090    // ==========================================================================
2091
2092    #[test]
2093    fn test_is_builtin_kotlin_io() {
2094        assert!(is_builtin("println", Language::Kotlin));
2095        assert!(is_builtin("print", Language::Kotlin));
2096        assert!(is_builtin("readLine", Language::Kotlin));
2097    }
2098
2099    #[test]
2100    fn test_is_builtin_kotlin_scope() {
2101        assert!(is_builtin("let", Language::Kotlin));
2102        assert!(is_builtin("also", Language::Kotlin));
2103        assert!(is_builtin("apply", Language::Kotlin));
2104        assert!(is_builtin("run", Language::Kotlin));
2105        assert!(is_builtin("with", Language::Kotlin));
2106    }
2107
2108    #[test]
2109    fn test_is_builtin_kotlin_collections() {
2110        assert!(is_builtin("listOf", Language::Kotlin));
2111        assert!(is_builtin("mapOf", Language::Kotlin));
2112        assert!(is_builtin("setOf", Language::Kotlin));
2113        assert!(is_builtin("map", Language::Kotlin));
2114        assert!(is_builtin("filter", Language::Kotlin));
2115        assert!(is_builtin("forEach", Language::Kotlin));
2116    }
2117
2118    // ==========================================================================
2119    // is_builtin tests - Swift
2120    // ==========================================================================
2121
2122    #[test]
2123    fn test_is_builtin_swift_io() {
2124        assert!(is_builtin("print", Language::Swift));
2125        assert!(is_builtin("debugPrint", Language::Swift));
2126        assert!(is_builtin("dump", Language::Swift));
2127    }
2128
2129    #[test]
2130    fn test_is_builtin_swift_functional() {
2131        assert!(is_builtin("map", Language::Swift));
2132        assert!(is_builtin("filter", Language::Swift));
2133        assert!(is_builtin("reduce", Language::Swift));
2134        assert!(is_builtin("forEach", Language::Swift));
2135    }
2136
2137    #[test]
2138    fn test_is_builtin_swift_collection() {
2139        assert!(is_builtin("contains", Language::Swift));
2140        assert!(is_builtin("count", Language::Swift));
2141        assert!(is_builtin("isEmpty", Language::Swift));
2142        assert!(is_builtin("append", Language::Swift));
2143    }
2144
2145    // ==========================================================================
2146    // is_builtin tests - Scala
2147    // ==========================================================================
2148
2149    #[test]
2150    fn test_is_builtin_scala_io() {
2151        assert!(is_builtin("println", Language::Scala));
2152        assert!(is_builtin("print", Language::Scala));
2153    }
2154
2155    #[test]
2156    fn test_is_builtin_scala_functional() {
2157        assert!(is_builtin("map", Language::Scala));
2158        assert!(is_builtin("filter", Language::Scala));
2159        assert!(is_builtin("flatMap", Language::Scala));
2160        assert!(is_builtin("foreach", Language::Scala));
2161        assert!(is_builtin("reduce", Language::Scala));
2162        assert!(is_builtin("fold", Language::Scala));
2163        assert!(is_builtin("foldLeft", Language::Scala));
2164        assert!(is_builtin("foldRight", Language::Scala));
2165        assert!(is_builtin("collect", Language::Scala));
2166    }
2167
2168    // ==========================================================================
2169    // is_builtin tests - Languages with no builtins
2170    // ==========================================================================
2171
2172    #[test]
2173    fn test_is_builtin_bash_always_false() {
2174        assert!(!is_builtin("ls", Language::Bash));
2175        assert!(!is_builtin("echo", Language::Bash));
2176        assert!(!is_builtin("grep", Language::Bash));
2177    }
2178
2179    #[test]
2180    fn test_is_builtin_haskell_always_false() {
2181        assert!(!is_builtin("putStrLn", Language::Haskell));
2182        assert!(!is_builtin("map", Language::Haskell));
2183    }
2184
2185    #[test]
2186    fn test_is_builtin_elixir_always_false() {
2187        assert!(!is_builtin("IO.puts", Language::Elixir));
2188        assert!(!is_builtin("Enum.map", Language::Elixir));
2189    }
2190
2191    #[test]
2192    fn test_is_builtin_clojure_always_false() {
2193        assert!(!is_builtin("println", Language::Clojure));
2194        assert!(!is_builtin("map", Language::Clojure));
2195    }
2196
2197    #[test]
2198    fn test_is_builtin_ocaml_always_false() {
2199        assert!(!is_builtin("print_endline", Language::OCaml));
2200        assert!(!is_builtin("List.map", Language::OCaml));
2201    }
2202
2203    #[test]
2204    fn test_is_builtin_fsharp_always_false() {
2205        assert!(!is_builtin("printfn", Language::FSharp));
2206        assert!(!is_builtin("List.map", Language::FSharp));
2207    }
2208
2209    #[test]
2210    fn test_is_builtin_lua_always_false() {
2211        assert!(!is_builtin("print", Language::Lua));
2212        assert!(!is_builtin("pairs", Language::Lua));
2213    }
2214
2215    #[test]
2216    fn test_is_builtin_r_always_false() {
2217        assert!(!is_builtin("print", Language::R));
2218        assert!(!is_builtin("cat", Language::R));
2219    }
2220
2221    // ==========================================================================
2222    // Integration tests using tree-sitter parsing
2223    // ==========================================================================
2224
2225    // Helper to parse code and get the first node of a specific kind
2226    fn parse_and_find_node(
2227        code: &str,
2228        language: Language,
2229        node_kind: &str,
2230    ) -> Option<(tree_sitter::Tree, usize)> {
2231        let mut parser = tree_sitter::Parser::new();
2232
2233        let ts_language = match language {
2234            Language::Python => tree_sitter_python::LANGUAGE,
2235            Language::Rust => tree_sitter_rust::LANGUAGE,
2236            Language::JavaScript => tree_sitter_javascript::LANGUAGE,
2237            Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
2238            Language::Go => tree_sitter_go::LANGUAGE,
2239            Language::Java => tree_sitter_java::LANGUAGE,
2240            _ => return None,
2241        };
2242
2243        parser
2244            .set_language(&ts_language.into())
2245            .expect("Error loading grammar");
2246
2247        let tree = parser.parse(code, None)?;
2248        let root = tree.root_node();
2249
2250        fn find_node_recursive(node: tree_sitter::Node<'_>, kind: &str) -> Option<usize> {
2251            if node.kind() == kind {
2252                return Some(node.id());
2253            }
2254            for child in node.children(&mut node.walk()) {
2255                if let Some(id) = find_node_recursive(child, kind) {
2256                    return Some(id);
2257                }
2258            }
2259            None
2260        }
2261
2262        find_node_recursive(root, node_kind).map(|_| (tree, 0))
2263    }
2264
2265    // Helper to find node by kind in tree
2266    fn find_node_in_tree<'a>(
2267        node: tree_sitter::Node<'a>,
2268        kind: &str,
2269    ) -> Option<tree_sitter::Node<'a>> {
2270        if node.kind() == kind {
2271            return Some(node);
2272        }
2273        for child in node.children(&mut node.walk()) {
2274            if let Some(found) = find_node_in_tree(child, kind) {
2275                return Some(found);
2276            }
2277        }
2278        None
2279    }
2280
2281    #[test]
2282    fn test_extract_signature_python() {
2283        // Note: Python signature extraction stops at first ':' or '\n'
2284        // So type annotations in parameters are cut off at the first ':'
2285        let code = "def hello(name):\n    return f'Hello {name}'";
2286        let mut parser = tree_sitter::Parser::new();
2287        parser
2288            .set_language(&tree_sitter_python::LANGUAGE.into())
2289            .unwrap();
2290        let tree = parser.parse(code, None).unwrap();
2291        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2292
2293        let sig = extract_signature(func_node, code, Language::Python);
2294        assert!(sig.is_some());
2295        let sig = sig.unwrap();
2296        assert!(sig.contains("def hello"));
2297        assert!(sig.contains("name"));
2298    }
2299
2300    #[test]
2301    fn test_extract_signature_rust() {
2302        let code = "fn add(a: i32, b: i32) -> i32 { a + b }";
2303        let mut parser = tree_sitter::Parser::new();
2304        parser
2305            .set_language(&tree_sitter_rust::LANGUAGE.into())
2306            .unwrap();
2307        let tree = parser.parse(code, None).unwrap();
2308        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2309
2310        let sig = extract_signature(func_node, code, Language::Rust);
2311        assert!(sig.is_some());
2312        let sig = sig.unwrap();
2313        assert!(sig.contains("fn add"));
2314        assert!(sig.contains("i32"));
2315    }
2316
2317    #[test]
2318    fn test_extract_signature_javascript() {
2319        let code = "function greet(name) { return 'Hello ' + name; }";
2320        let mut parser = tree_sitter::Parser::new();
2321        parser
2322            .set_language(&tree_sitter_javascript::LANGUAGE.into())
2323            .unwrap();
2324        let tree = parser.parse(code, None).unwrap();
2325        let func_node = find_node_in_tree(tree.root_node(), "function_declaration").unwrap();
2326
2327        let sig = extract_signature(func_node, code, Language::JavaScript);
2328        assert!(sig.is_some());
2329        let sig = sig.unwrap();
2330        assert!(sig.contains("function greet"));
2331        assert!(sig.contains("name"));
2332    }
2333
2334    #[test]
2335    fn test_extract_visibility_python_public() {
2336        let code = "def public_func():\n    pass";
2337        let mut parser = tree_sitter::Parser::new();
2338        parser
2339            .set_language(&tree_sitter_python::LANGUAGE.into())
2340            .unwrap();
2341        let tree = parser.parse(code, None).unwrap();
2342        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2343
2344        let vis = extract_visibility(func_node, code, Language::Python);
2345        assert_eq!(vis, Visibility::Public);
2346    }
2347
2348    #[test]
2349    fn test_extract_visibility_python_private() {
2350        let code = "def __private_func():\n    pass";
2351        let mut parser = tree_sitter::Parser::new();
2352        parser
2353            .set_language(&tree_sitter_python::LANGUAGE.into())
2354            .unwrap();
2355        let tree = parser.parse(code, None).unwrap();
2356        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2357
2358        let vis = extract_visibility(func_node, code, Language::Python);
2359        assert_eq!(vis, Visibility::Private);
2360    }
2361
2362    #[test]
2363    fn test_extract_visibility_python_protected() {
2364        let code = "def _protected_func():\n    pass";
2365        let mut parser = tree_sitter::Parser::new();
2366        parser
2367            .set_language(&tree_sitter_python::LANGUAGE.into())
2368            .unwrap();
2369        let tree = parser.parse(code, None).unwrap();
2370        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2371
2372        let vis = extract_visibility(func_node, code, Language::Python);
2373        assert_eq!(vis, Visibility::Protected);
2374    }
2375
2376    #[test]
2377    fn test_extract_visibility_python_dunder() {
2378        // Note: Current implementation treats dunder methods as public because
2379        // the check for `starts_with("__") && !ends_with("__")` excludes them from Private,
2380        // and `starts_with('_')` is checked in an else-if, not reached for true dunders
2381        let code = "def __init__(self):\n    pass";
2382        let mut parser = tree_sitter::Parser::new();
2383        parser
2384            .set_language(&tree_sitter_python::LANGUAGE.into())
2385            .unwrap();
2386        let tree = parser.parse(code, None).unwrap();
2387        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2388
2389        let vis = extract_visibility(func_node, code, Language::Python);
2390        // __init__ starts with _ so hits the else-if branch, returning Protected
2391        // This is the actual behavior - dunder methods are treated as Protected
2392        assert_eq!(vis, Visibility::Protected);
2393    }
2394
2395    #[test]
2396    fn test_extract_visibility_rust_pub() {
2397        let code = "pub fn public_func() {}";
2398        let mut parser = tree_sitter::Parser::new();
2399        parser
2400            .set_language(&tree_sitter_rust::LANGUAGE.into())
2401            .unwrap();
2402        let tree = parser.parse(code, None).unwrap();
2403        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2404
2405        let vis = extract_visibility(func_node, code, Language::Rust);
2406        assert_eq!(vis, Visibility::Public);
2407    }
2408
2409    #[test]
2410    fn test_extract_visibility_rust_private() {
2411        let code = "fn private_func() {}";
2412        let mut parser = tree_sitter::Parser::new();
2413        parser
2414            .set_language(&tree_sitter_rust::LANGUAGE.into())
2415            .unwrap();
2416        let tree = parser.parse(code, None).unwrap();
2417        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2418
2419        let vis = extract_visibility(func_node, code, Language::Rust);
2420        assert_eq!(vis, Visibility::Private);
2421    }
2422
2423    #[test]
2424    fn test_extract_visibility_rust_pub_crate() {
2425        let code = "pub(crate) fn crate_func() {}";
2426        let mut parser = tree_sitter::Parser::new();
2427        parser
2428            .set_language(&tree_sitter_rust::LANGUAGE.into())
2429            .unwrap();
2430        let tree = parser.parse(code, None).unwrap();
2431        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2432
2433        let vis = extract_visibility(func_node, code, Language::Rust);
2434        assert_eq!(vis, Visibility::Internal);
2435    }
2436
2437    #[test]
2438    fn test_extract_visibility_go_exported() {
2439        let code = "func Exported() {}";
2440        let mut parser = tree_sitter::Parser::new();
2441        parser
2442            .set_language(&tree_sitter_go::LANGUAGE.into())
2443            .unwrap();
2444        let tree = parser.parse(code, None).unwrap();
2445        let func_node = find_node_in_tree(tree.root_node(), "function_declaration").unwrap();
2446
2447        let vis = extract_visibility(func_node, code, Language::Go);
2448        assert_eq!(vis, Visibility::Public);
2449    }
2450
2451    #[test]
2452    fn test_extract_visibility_go_unexported() {
2453        let code = "func unexported() {}";
2454        let mut parser = tree_sitter::Parser::new();
2455        parser
2456            .set_language(&tree_sitter_go::LANGUAGE.into())
2457            .unwrap();
2458        let tree = parser.parse(code, None).unwrap();
2459        let func_node = find_node_in_tree(tree.root_node(), "function_declaration").unwrap();
2460
2461        let vis = extract_visibility(func_node, code, Language::Go);
2462        assert_eq!(vis, Visibility::Private);
2463    }
2464
2465    #[test]
2466    fn test_extract_visibility_bash_always_public() {
2467        let code = "my_func() { echo hello; }";
2468        let mut parser = tree_sitter::Parser::new();
2469        parser
2470            .set_language(&tree_sitter_bash::LANGUAGE.into())
2471            .unwrap();
2472        let tree = parser.parse(code, None).unwrap();
2473        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2474
2475        let vis = extract_visibility(func_node, code, Language::Bash);
2476        assert_eq!(vis, Visibility::Public);
2477    }
2478
2479    #[test]
2480    fn test_find_body_node_python() {
2481        let code = "def foo():\n    x = 1\n    return x";
2482        let mut parser = tree_sitter::Parser::new();
2483        parser
2484            .set_language(&tree_sitter_python::LANGUAGE.into())
2485            .unwrap();
2486        let tree = parser.parse(code, None).unwrap();
2487        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2488
2489        let body = find_body_node(func_node, Language::Python);
2490        assert!(body.is_some());
2491        assert_eq!(body.unwrap().kind(), "block");
2492    }
2493
2494    #[test]
2495    fn test_find_body_node_rust() {
2496        let code = "fn foo() { let x = 1; x }";
2497        let mut parser = tree_sitter::Parser::new();
2498        parser
2499            .set_language(&tree_sitter_rust::LANGUAGE.into())
2500            .unwrap();
2501        let tree = parser.parse(code, None).unwrap();
2502        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2503
2504        let body = find_body_node(func_node, Language::Rust);
2505        assert!(body.is_some());
2506        assert_eq!(body.unwrap().kind(), "block");
2507    }
2508
2509    #[test]
2510    fn test_find_body_node_javascript() {
2511        let code = "function foo() { return 1; }";
2512        let mut parser = tree_sitter::Parser::new();
2513        parser
2514            .set_language(&tree_sitter_javascript::LANGUAGE.into())
2515            .unwrap();
2516        let tree = parser.parse(code, None).unwrap();
2517        let func_node = find_node_in_tree(tree.root_node(), "function_declaration").unwrap();
2518
2519        let body = find_body_node(func_node, Language::JavaScript);
2520        assert!(body.is_some());
2521        assert_eq!(body.unwrap().kind(), "statement_block");
2522    }
2523
2524    #[test]
2525    fn test_extract_calls_python() {
2526        let code = "def foo():\n    bar()\n    custom_func(1, 2)";
2527        let mut parser = tree_sitter::Parser::new();
2528        parser
2529            .set_language(&tree_sitter_python::LANGUAGE.into())
2530            .unwrap();
2531        let tree = parser.parse(code, None).unwrap();
2532        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2533
2534        let calls = extract_calls(func_node, code, Language::Python);
2535        assert!(calls.contains(&"bar".to_string()));
2536        assert!(calls.contains(&"custom_func".to_string()));
2537    }
2538
2539    #[test]
2540    fn test_extract_calls_python_filters_builtins() {
2541        let code = "def foo():\n    print('hello')\n    len([1,2,3])";
2542        let mut parser = tree_sitter::Parser::new();
2543        parser
2544            .set_language(&tree_sitter_python::LANGUAGE.into())
2545            .unwrap();
2546        let tree = parser.parse(code, None).unwrap();
2547        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2548
2549        let calls = extract_calls(func_node, code, Language::Python);
2550        // Built-ins should be filtered out
2551        assert!(!calls.contains(&"print".to_string()));
2552        assert!(!calls.contains(&"len".to_string()));
2553    }
2554
2555    #[test]
2556    fn test_extract_calls_rust() {
2557        let code = "fn foo() { bar(); baz(1); }";
2558        let mut parser = tree_sitter::Parser::new();
2559        parser
2560            .set_language(&tree_sitter_rust::LANGUAGE.into())
2561            .unwrap();
2562        let tree = parser.parse(code, None).unwrap();
2563        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2564
2565        let calls = extract_calls(func_node, code, Language::Rust);
2566        assert!(calls.contains(&"bar".to_string()));
2567        assert!(calls.contains(&"baz".to_string()));
2568    }
2569
2570    #[test]
2571    fn test_extract_docstring_rust() {
2572        let code = "/// This is a doc comment\nfn foo() {}";
2573        let mut parser = tree_sitter::Parser::new();
2574        parser
2575            .set_language(&tree_sitter_rust::LANGUAGE.into())
2576            .unwrap();
2577        let tree = parser.parse(code, None).unwrap();
2578        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2579
2580        let docstring = extract_docstring(func_node, code, Language::Rust);
2581        assert!(docstring.is_some());
2582        assert!(docstring.unwrap().contains("This is a doc comment"));
2583    }
2584
2585    #[test]
2586    fn test_extract_docstring_rust_multiline() {
2587        let code = "/// Line 1\n/// Line 2\nfn foo() {}";
2588        let mut parser = tree_sitter::Parser::new();
2589        parser
2590            .set_language(&tree_sitter_rust::LANGUAGE.into())
2591            .unwrap();
2592        let tree = parser.parse(code, None).unwrap();
2593        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2594
2595        let docstring = extract_docstring(func_node, code, Language::Rust);
2596        assert!(docstring.is_some());
2597        let doc = docstring.unwrap();
2598        assert!(doc.contains("Line 1"));
2599        assert!(doc.contains("Line 2"));
2600    }
2601}