infiniloom_engine/parser/
extraction.rs

1//! Symbol extraction utilities for parsing
2//!
3//! This module contains standalone functions for extracting metadata from AST nodes:
4//! - Signatures
5//! - Docstrings
6//! - Visibility modifiers
7//! - Function calls
8//! - Inheritance relationships
9
10use super::language::Language;
11use crate::types::{SymbolKind, Visibility};
12use std::collections::HashSet;
13use tree_sitter::Node;
14
15/// Find a safe character boundary at or before the given byte index.
16/// This prevents panics when slicing strings with multi-byte UTF-8 characters.
17fn safe_char_boundary(s: &str, mut index: usize) -> usize {
18    if index >= s.len() {
19        return s.len();
20    }
21    // Walk backwards to find a valid char boundary
22    while index > 0 && !s.is_char_boundary(index) {
23        index -= 1;
24    }
25    index
26}
27
28/// Extract function/method signature
29pub fn extract_signature(node: Node<'_>, source_code: &str, language: Language) -> Option<String> {
30    let sig_node = match language {
31        Language::Python => {
32            if node.kind() == "function_definition" {
33                let start = node.start_byte();
34                let mut end = start;
35                for byte in &source_code.as_bytes()[start..] {
36                    end += 1;
37                    if *byte == b':' || *byte == b'\n' {
38                        break;
39                    }
40                }
41                return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
42            }
43            None
44        },
45        Language::JavaScript | Language::TypeScript => {
46            if node.kind().contains("function") || node.kind().contains("method") {
47                let start = node.start_byte();
48                let mut end = start;
49                let mut brace_count = 0;
50                for byte in &source_code.as_bytes()[start..] {
51                    if *byte == b'{' {
52                        brace_count += 1;
53                        if brace_count == 1 {
54                            break;
55                        }
56                    }
57                    end += 1;
58                }
59                return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
60            }
61            None
62        },
63        Language::Rust => {
64            if node.kind() == "function_item" {
65                for child in node.children(&mut node.walk()) {
66                    if child.kind() == "block" {
67                        let start = node.start_byte();
68                        let end = child.start_byte();
69                        return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
70                    }
71                }
72            }
73            None
74        },
75        Language::Go => {
76            if node.kind() == "function_declaration" || node.kind() == "method_declaration" {
77                for child in node.children(&mut node.walk()) {
78                    if child.kind() == "block" {
79                        let start = node.start_byte();
80                        let end = child.start_byte();
81                        return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
82                    }
83                }
84            }
85            None
86        },
87        Language::Java => {
88            if node.kind() == "method_declaration" {
89                for child in node.children(&mut node.walk()) {
90                    if child.kind() == "block" {
91                        let start = node.start_byte();
92                        let end = child.start_byte();
93                        return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
94                    }
95                }
96            }
97            None
98        },
99        Language::C
100        | Language::Cpp
101        | Language::CSharp
102        | Language::Php
103        | Language::Kotlin
104        | Language::Swift
105        | Language::Scala => {
106            for child in node.children(&mut node.walk()) {
107                if child.kind() == "block"
108                    || child.kind() == "compound_statement"
109                    || child.kind() == "function_body"
110                {
111                    let start = node.start_byte();
112                    let end = child.start_byte();
113                    return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
114                }
115            }
116            None
117        },
118        Language::Ruby | Language::Lua => {
119            let start = node.start_byte();
120            let mut end = start;
121            for byte in &source_code.as_bytes()[start..] {
122                end += 1;
123                if *byte == b'\n' {
124                    break;
125                }
126            }
127            Some(source_code[start..end].trim().to_owned())
128        },
129        Language::Bash => {
130            let start = node.start_byte();
131            let mut end = start;
132            for byte in &source_code.as_bytes()[start..] {
133                if *byte == b'{' {
134                    break;
135                }
136                end += 1;
137            }
138            Some(source_code[start..end].trim().to_owned())
139        },
140        Language::Haskell
141        | Language::OCaml
142        | Language::FSharp
143        | Language::Elixir
144        | Language::Clojure
145        | Language::R => {
146            let start = node.start_byte();
147            let mut end = start;
148            for byte in &source_code.as_bytes()[start..] {
149                end += 1;
150                if *byte == b'\n' || *byte == b'=' {
151                    break;
152                }
153            }
154            Some(source_code[start..end].trim().to_owned())
155        },
156    };
157
158    sig_node.or_else(|| {
159        let start = node.start_byte();
160        let end = std::cmp::min(start + 200, source_code.len());
161        // Ensure we slice at valid UTF-8 character boundaries
162        let safe_start = safe_char_boundary(source_code, start);
163        let safe_end = safe_char_boundary(source_code, end);
164        if safe_start >= safe_end {
165            return None;
166        }
167        let text = &source_code[safe_start..safe_end];
168        text.lines().next().map(|s| s.trim().to_owned())
169    })
170}
171
172/// Extract docstring/documentation comment
173pub fn extract_docstring(node: Node<'_>, source_code: &str, language: Language) -> Option<String> {
174    match language {
175        Language::Python => {
176            let mut cursor = node.walk();
177            for child in node.children(&mut cursor) {
178                if child.kind() == "block" {
179                    for stmt in child.children(&mut child.walk()) {
180                        if stmt.kind() == "expression_statement" {
181                            for expr in stmt.children(&mut stmt.walk()) {
182                                if expr.kind() == "string" {
183                                    if let Ok(text) = expr.utf8_text(source_code.as_bytes()) {
184                                        return Some(
185                                            text.trim_matches(|c| c == '"' || c == '\'')
186                                                .trim()
187                                                .to_owned(),
188                                        );
189                                    }
190                                }
191                            }
192                        }
193                    }
194                }
195            }
196            None
197        },
198        Language::JavaScript | Language::TypeScript => {
199            if let Some(prev_sibling) = node.prev_sibling() {
200                if prev_sibling.kind() == "comment" {
201                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
202                        if text.starts_with("/**") {
203                            return Some(clean_jsdoc(text));
204                        }
205                    }
206                }
207            }
208            None
209        },
210        Language::Rust => {
211            let start_byte = node.start_byte();
212            let lines_before: Vec<_> = source_code[..start_byte]
213                .lines()
214                .rev()
215                .take_while(|line| line.trim().starts_with("///") || line.trim().is_empty())
216                .collect();
217
218            if !lines_before.is_empty() {
219                let doc: Vec<String> = lines_before
220                    .into_iter()
221                    .rev()
222                    .filter_map(|line| {
223                        let trimmed = line.trim();
224                        trimmed.strip_prefix("///").map(|s| s.trim().to_owned())
225                    })
226                    .collect();
227
228                if !doc.is_empty() {
229                    return Some(doc.join(" "));
230                }
231            }
232            None
233        },
234        Language::Go => {
235            if let Some(prev_sibling) = node.prev_sibling() {
236                if prev_sibling.kind() == "comment" {
237                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
238                        return Some(text.trim_start_matches("//").trim().to_owned());
239                    }
240                }
241            }
242            None
243        },
244        Language::Java => {
245            if let Some(prev_sibling) = node.prev_sibling() {
246                if prev_sibling.kind() == "block_comment" {
247                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
248                        if text.starts_with("/**") {
249                            return Some(clean_javadoc(text));
250                        }
251                    }
252                }
253            }
254            None
255        },
256        Language::C | Language::Cpp => {
257            if let Some(prev_sibling) = node.prev_sibling() {
258                if prev_sibling.kind() == "comment" {
259                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
260                        if text.starts_with("/**") || text.starts_with("/*") {
261                            return Some(clean_jsdoc(text));
262                        }
263                        return Some(text.trim_start_matches("//").trim().to_owned());
264                    }
265                }
266            }
267            None
268        },
269        Language::CSharp => {
270            let start_byte = node.start_byte();
271            let lines_before: Vec<_> = source_code[..start_byte]
272                .lines()
273                .rev()
274                .take_while(|line| line.trim().starts_with("///") || line.trim().is_empty())
275                .collect();
276
277            if !lines_before.is_empty() {
278                let doc: Vec<String> = lines_before
279                    .into_iter()
280                    .rev()
281                    .filter_map(|line| {
282                        let trimmed = line.trim();
283                        trimmed.strip_prefix("///").map(|s| s.trim().to_owned())
284                    })
285                    .collect();
286
287                if !doc.is_empty() {
288                    return Some(doc.join(" "));
289                }
290            }
291            None
292        },
293        Language::Ruby => {
294            if let Some(prev_sibling) = node.prev_sibling() {
295                if prev_sibling.kind() == "comment" {
296                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
297                        return Some(text.trim_start_matches('#').trim().to_owned());
298                    }
299                }
300            }
301            None
302        },
303        Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
304            if let Some(prev_sibling) = node.prev_sibling() {
305                let kind = prev_sibling.kind();
306                if kind == "comment" || kind == "multiline_comment" || kind == "block_comment" {
307                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
308                        if text.starts_with("/**") {
309                            return Some(clean_jsdoc(text));
310                        }
311                    }
312                }
313            }
314            None
315        },
316        Language::Bash => {
317            if let Some(prev_sibling) = node.prev_sibling() {
318                if prev_sibling.kind() == "comment" {
319                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
320                        return Some(text.trim_start_matches('#').trim().to_owned());
321                    }
322                }
323            }
324            None
325        },
326        Language::Haskell => {
327            if let Some(prev_sibling) = node.prev_sibling() {
328                if prev_sibling.kind() == "comment" {
329                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
330                        let cleaned = text
331                            .trim_start_matches("{-")
332                            .trim_end_matches("-}")
333                            .trim_start_matches("--")
334                            .trim();
335                        return Some(cleaned.to_owned());
336                    }
337                }
338            }
339            None
340        },
341        Language::Elixir => {
342            if let Some(prev_sibling) = node.prev_sibling() {
343                if prev_sibling.kind() == "comment" {
344                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
345                        return Some(text.trim_start_matches('#').trim().to_owned());
346                    }
347                }
348            }
349            None
350        },
351        Language::Clojure => None,
352        Language::OCaml | Language::FSharp => {
353            if let Some(prev_sibling) = node.prev_sibling() {
354                if prev_sibling.kind() == "comment" {
355                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
356                        let cleaned = text
357                            .trim_start_matches("(**")
358                            .trim_start_matches("(*")
359                            .trim_end_matches("*)")
360                            .trim();
361                        return Some(cleaned.to_owned());
362                    }
363                }
364            }
365            None
366        },
367        Language::Lua => {
368            if let Some(prev_sibling) = node.prev_sibling() {
369                if prev_sibling.kind() == "comment" {
370                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
371                        let cleaned = text
372                            .trim_start_matches("--[[")
373                            .trim_end_matches("]]")
374                            .trim_start_matches("--")
375                            .trim();
376                        return Some(cleaned.to_owned());
377                    }
378                }
379            }
380            None
381        },
382        Language::R => {
383            if let Some(prev_sibling) = node.prev_sibling() {
384                if prev_sibling.kind() == "comment" {
385                    if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
386                        return Some(text.trim_start_matches('#').trim().to_owned());
387                    }
388                }
389            }
390            None
391        },
392    }
393}
394
395/// Extract parent class/struct name for methods
396pub fn extract_parent(node: Node<'_>, source_code: &str) -> Option<String> {
397    let mut current = node.parent()?;
398
399    while let Some(parent) = current.parent() {
400        if ["class_definition", "class_declaration", "struct_item", "impl_item"]
401            .contains(&parent.kind())
402        {
403            for child in parent.children(&mut parent.walk()) {
404                if child.kind() == "identifier" || child.kind() == "type_identifier" {
405                    if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
406                        return Some(name.to_owned());
407                    }
408                }
409            }
410        }
411        current = parent;
412    }
413
414    None
415}
416
417/// Extract visibility modifier from a node
418pub fn extract_visibility(node: Node<'_>, source_code: &str, language: Language) -> Visibility {
419    match language {
420        Language::Python => {
421            if let Some(name_node) = node.child_by_field_name("name") {
422                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
423                    if name.starts_with("__") && !name.ends_with("__") {
424                        return Visibility::Private;
425                    } else if name.starts_with('_') {
426                        return Visibility::Protected;
427                    }
428                }
429            }
430            Visibility::Public
431        },
432        Language::Rust => {
433            for child in node.children(&mut node.walk()) {
434                if child.kind() == "visibility_modifier" {
435                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
436                        if text.contains("pub(crate)") || text.contains("pub(super)") {
437                            return Visibility::Internal;
438                        } else if text.starts_with("pub") {
439                            return Visibility::Public;
440                        }
441                    }
442                }
443            }
444            Visibility::Private
445        },
446        Language::JavaScript | Language::TypeScript => {
447            for child in node.children(&mut node.walk()) {
448                let kind = child.kind();
449                if kind == "private" || kind == "accessibility_modifier" {
450                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
451                        return match text {
452                            "private" => Visibility::Private,
453                            "protected" => Visibility::Protected,
454                            _ => Visibility::Public,
455                        };
456                    }
457                }
458            }
459            if let Some(name_node) = node.child_by_field_name("name") {
460                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
461                    if name.starts_with('#') {
462                        return Visibility::Private;
463                    }
464                }
465            }
466            Visibility::Public
467        },
468        Language::Go => {
469            if let Some(name_node) = node.child_by_field_name("name") {
470                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
471                    if let Some(first_char) = name.chars().next() {
472                        if first_char.is_lowercase() {
473                            return Visibility::Private;
474                        }
475                    }
476                }
477            }
478            Visibility::Public
479        },
480        Language::Java => {
481            for child in node.children(&mut node.walk()) {
482                if child.kind() == "modifiers" {
483                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
484                        if text.contains("private") {
485                            return Visibility::Private;
486                        } else if text.contains("protected") {
487                            return Visibility::Protected;
488                        } else if text.contains("public") {
489                            return Visibility::Public;
490                        }
491                    }
492                }
493            }
494            Visibility::Internal
495        },
496        Language::C | Language::Cpp => {
497            for child in node.children(&mut node.walk()) {
498                if child.kind() == "storage_class_specifier" {
499                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
500                        if text == "static" {
501                            return Visibility::Private;
502                        }
503                    }
504                }
505            }
506            Visibility::Public
507        },
508        Language::CSharp | Language::Kotlin | Language::Swift | Language::Scala => {
509            for child in node.children(&mut node.walk()) {
510                let kind = child.kind();
511                if kind == "modifier" || kind == "modifiers" || kind == "visibility_modifier" {
512                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
513                        if text.contains("private") {
514                            return Visibility::Private;
515                        } else if text.contains("protected") {
516                            return Visibility::Protected;
517                        } else if text.contains("internal") {
518                            return Visibility::Internal;
519                        } else if text.contains("public") {
520                            return Visibility::Public;
521                        }
522                    }
523                }
524            }
525            Visibility::Internal
526        },
527        Language::Ruby => {
528            if let Some(name_node) = node.child_by_field_name("name") {
529                if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
530                    if name.starts_with('_') {
531                        return Visibility::Private;
532                    }
533                }
534            }
535            Visibility::Public
536        },
537        Language::Php => {
538            for child in node.children(&mut node.walk()) {
539                if child.kind() == "visibility_modifier" {
540                    if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
541                        return match text {
542                            "private" => Visibility::Private,
543                            "protected" => Visibility::Protected,
544                            "public" => Visibility::Public,
545                            _ => Visibility::Public,
546                        };
547                    }
548                }
549            }
550            Visibility::Public
551        },
552        Language::Bash => Visibility::Public,
553        Language::Haskell
554        | Language::Elixir
555        | Language::Clojure
556        | Language::OCaml
557        | Language::FSharp
558        | Language::Lua
559        | Language::R => Visibility::Public,
560    }
561}
562
563/// Extract function calls from a function/method body
564pub fn extract_calls(node: Node<'_>, source_code: &str, language: Language) -> Vec<String> {
565    let mut calls = HashSet::new();
566
567    let body_node = find_body_node(node, language);
568    if let Some(body) = body_node {
569        collect_calls_recursive(body, source_code, language, &mut calls);
570    }
571
572    if calls.is_empty() {
573        collect_calls_recursive(node, source_code, language, &mut calls);
574    }
575
576    calls.into_iter().collect()
577}
578
579/// Find the body node of a function/method
580pub fn find_body_node(node: Node<'_>, language: Language) -> Option<Node<'_>> {
581    match language {
582        Language::Python => {
583            for child in node.children(&mut node.walk()) {
584                if child.kind() == "block" {
585                    return Some(child);
586                }
587            }
588        },
589        Language::Rust => {
590            for child in node.children(&mut node.walk()) {
591                if child.kind() == "block" {
592                    return Some(child);
593                }
594            }
595        },
596        Language::JavaScript | Language::TypeScript => {
597            for child in node.children(&mut node.walk()) {
598                let kind = child.kind();
599                if kind == "statement_block" {
600                    return Some(child);
601                }
602                if kind == "arrow_function" {
603                    if let Some(body) = find_body_node(child, language) {
604                        return Some(body);
605                    }
606                    return Some(child);
607                }
608            }
609            if node.kind() == "arrow_function" {
610                for child in node.children(&mut node.walk()) {
611                    let kind = child.kind();
612                    if kind != "formal_parameters"
613                        && kind != "identifier"
614                        && kind != "=>"
615                        && kind != "("
616                        && kind != ")"
617                        && kind != ","
618                    {
619                        return Some(child);
620                    }
621                }
622                return Some(node);
623            }
624        },
625        Language::Go => {
626            for child in node.children(&mut node.walk()) {
627                if child.kind() == "block" {
628                    return Some(child);
629                }
630            }
631        },
632        Language::Java => {
633            for child in node.children(&mut node.walk()) {
634                if child.kind() == "block" {
635                    return Some(child);
636                }
637            }
638        },
639        Language::C | Language::Cpp => {
640            for child in node.children(&mut node.walk()) {
641                if child.kind() == "compound_statement" {
642                    return Some(child);
643                }
644            }
645        },
646        Language::CSharp | Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
647            for child in node.children(&mut node.walk()) {
648                let kind = child.kind();
649                if kind == "block" || kind == "compound_statement" || kind == "function_body" {
650                    return Some(child);
651                }
652            }
653        },
654        Language::Ruby => {
655            for child in node.children(&mut node.walk()) {
656                if child.kind() == "body_statement" || child.kind() == "do_block" {
657                    return Some(child);
658                }
659            }
660        },
661        Language::Bash => {
662            for child in node.children(&mut node.walk()) {
663                if child.kind() == "compound_statement" {
664                    return Some(child);
665                }
666            }
667        },
668        Language::Haskell
669        | Language::Elixir
670        | Language::Clojure
671        | Language::OCaml
672        | Language::FSharp
673        | Language::R => {
674            return Some(node);
675        },
676        Language::Lua => {
677            for child in node.children(&mut node.walk()) {
678                if child.kind() == "block" {
679                    return Some(child);
680                }
681            }
682        },
683    }
684    None
685}
686
687/// Recursively collect function calls from a node
688pub fn collect_calls_recursive(
689    node: Node<'_>,
690    source_code: &str,
691    language: Language,
692    calls: &mut HashSet<String>,
693) {
694    let kind = node.kind();
695
696    let call_name = match language {
697        Language::Python => {
698            if kind == "call" {
699                node.child_by_field_name("function").and_then(|f| {
700                    if f.kind() == "identifier" {
701                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
702                    } else if f.kind() == "attribute" {
703                        f.child_by_field_name("attribute")
704                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
705                            .map(String::from)
706                    } else {
707                        None
708                    }
709                })
710            } else {
711                None
712            }
713        },
714        Language::Rust => {
715            if kind == "call_expression" {
716                node.child_by_field_name("function").and_then(|f| {
717                    if f.kind() == "identifier" {
718                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
719                    } else if f.kind() == "field_expression" {
720                        f.child_by_field_name("field")
721                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
722                            .map(String::from)
723                    } else if f.kind() == "scoped_identifier" {
724                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
725                    } else {
726                        None
727                    }
728                })
729            } else if kind == "macro_invocation" {
730                node.child_by_field_name("macro")
731                    .and_then(|m| m.utf8_text(source_code.as_bytes()).ok())
732                    .map(|s| format!("{}!", s))
733            } else {
734                None
735            }
736        },
737        Language::JavaScript | Language::TypeScript => {
738            if kind == "call_expression" {
739                node.child_by_field_name("function").and_then(|f| {
740                    if f.kind() == "identifier" {
741                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
742                    } else if f.kind() == "member_expression" {
743                        f.child_by_field_name("property")
744                            .and_then(|p| p.utf8_text(source_code.as_bytes()).ok())
745                            .map(String::from)
746                    } else {
747                        None
748                    }
749                })
750            } else {
751                None
752            }
753        },
754        Language::Go => {
755            if kind == "call_expression" {
756                node.child_by_field_name("function").and_then(|f| {
757                    if f.kind() == "identifier" {
758                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
759                    } else if f.kind() == "selector_expression" {
760                        f.child_by_field_name("field")
761                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
762                            .map(String::from)
763                    } else {
764                        None
765                    }
766                })
767            } else {
768                None
769            }
770        },
771        Language::Java => {
772            if kind == "method_invocation" {
773                node.child_by_field_name("name")
774                    .and_then(|n| n.utf8_text(source_code.as_bytes()).ok())
775                    .map(String::from)
776            } else {
777                None
778            }
779        },
780        Language::C | Language::Cpp => {
781            if kind == "call_expression" {
782                node.child_by_field_name("function").and_then(|f| {
783                    if f.kind() == "identifier" {
784                        f.utf8_text(source_code.as_bytes()).ok().map(String::from)
785                    } else if f.kind() == "field_expression" {
786                        f.child_by_field_name("field")
787                            .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
788                            .map(String::from)
789                    } else {
790                        None
791                    }
792                })
793            } else {
794                None
795            }
796        },
797        Language::CSharp | Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
798            if kind == "invocation_expression" || kind == "call_expression" {
799                node.children(&mut node.walk())
800                    .find(|child| child.kind() == "identifier" || child.kind() == "simple_name")
801                    .and_then(|child| child.utf8_text(source_code.as_bytes()).ok())
802                    .map(|s| s.to_owned())
803            } else {
804                None
805            }
806        },
807        Language::Ruby => {
808            if kind == "call" || kind == "method_call" {
809                node.child_by_field_name("method")
810                    .and_then(|m| m.utf8_text(source_code.as_bytes()).ok())
811                    .map(String::from)
812            } else {
813                None
814            }
815        },
816        Language::Bash => {
817            if kind == "command" {
818                node.child_by_field_name("name")
819                    .and_then(|n| n.utf8_text(source_code.as_bytes()).ok())
820                    .map(String::from)
821            } else {
822                None
823            }
824        },
825        Language::Haskell
826        | Language::Elixir
827        | Language::Clojure
828        | Language::OCaml
829        | Language::FSharp
830        | Language::Lua
831        | Language::R => {
832            if kind == "function_call" || kind == "call" || kind == "application" {
833                node.children(&mut node.walk())
834                    .find(|child| child.kind() == "identifier" || child.kind() == "variable")
835                    .and_then(|child| child.utf8_text(source_code.as_bytes()).ok())
836                    .map(|s| s.to_owned())
837            } else {
838                None
839            }
840        },
841    };
842
843    if let Some(name) = call_name {
844        if !is_builtin(&name, language) {
845            calls.insert(name);
846        }
847    }
848
849    for child in node.children(&mut node.walk()) {
850        collect_calls_recursive(child, source_code, language, calls);
851    }
852}
853
854/// Check if a function name is a common built-in
855pub fn is_builtin(name: &str, language: Language) -> bool {
856    match language {
857        Language::Python => {
858            matches!(
859                name,
860                "print"
861                    | "len"
862                    | "range"
863                    | "str"
864                    | "int"
865                    | "float"
866                    | "list"
867                    | "dict"
868                    | "set"
869                    | "tuple"
870                    | "bool"
871                    | "type"
872                    | "isinstance"
873                    | "hasattr"
874                    | "getattr"
875                    | "setattr"
876                    | "super"
877                    | "iter"
878                    | "next"
879                    | "open"
880                    | "input"
881                    | "format"
882                    | "enumerate"
883                    | "zip"
884                    | "map"
885                    | "filter"
886                    | "sorted"
887                    | "reversed"
888                    | "sum"
889                    | "min"
890                    | "max"
891                    | "abs"
892                    | "round"
893                    | "ord"
894                    | "chr"
895                    | "hex"
896                    | "bin"
897                    | "oct"
898            )
899        },
900        Language::JavaScript | Language::TypeScript => {
901            matches!(
902                name,
903                "console"
904                    | "log"
905                    | "error"
906                    | "warn"
907                    | "parseInt"
908                    | "parseFloat"
909                    | "setTimeout"
910                    | "setInterval"
911                    | "clearTimeout"
912                    | "clearInterval"
913                    | "JSON"
914                    | "stringify"
915                    | "parse"
916                    | "toString"
917                    | "valueOf"
918                    | "push"
919                    | "pop"
920                    | "shift"
921                    | "unshift"
922                    | "slice"
923                    | "splice"
924                    | "map"
925                    | "filter"
926                    | "reduce"
927                    | "forEach"
928                    | "find"
929                    | "findIndex"
930                    | "includes"
931                    | "indexOf"
932                    | "join"
933                    | "split"
934                    | "replace"
935            )
936        },
937        Language::Rust => {
938            matches!(
939                name,
940                "println!"
941                    | "print!"
942                    | "eprintln!"
943                    | "eprint!"
944                    | "format!"
945                    | "vec!"
946                    | "panic!"
947                    | "assert!"
948                    | "assert_eq!"
949                    | "assert_ne!"
950                    | "debug!"
951                    | "info!"
952                    | "warn!"
953                    | "error!"
954                    | "trace!"
955                    | "unwrap"
956                    | "expect"
957                    | "ok"
958                    | "err"
959                    | "some"
960                    | "none"
961                    | "clone"
962                    | "to_string"
963                    | "into"
964                    | "from"
965                    | "default"
966                    | "iter"
967                    | "into_iter"
968                    | "collect"
969                    | "map"
970                    | "filter"
971            )
972        },
973        Language::Go => {
974            matches!(
975                name,
976                "fmt"
977                    | "Println"
978                    | "Printf"
979                    | "Sprintf"
980                    | "Errorf"
981                    | "make"
982                    | "new"
983                    | "len"
984                    | "cap"
985                    | "append"
986                    | "copy"
987                    | "delete"
988                    | "close"
989                    | "panic"
990                    | "recover"
991                    | "print"
992            )
993        },
994        Language::Java => {
995            matches!(
996                name,
997                "println"
998                    | "print"
999                    | "printf"
1000                    | "toString"
1001                    | "equals"
1002                    | "hashCode"
1003                    | "getClass"
1004                    | "clone"
1005                    | "notify"
1006                    | "wait"
1007                    | "get"
1008                    | "set"
1009                    | "add"
1010                    | "remove"
1011                    | "size"
1012                    | "isEmpty"
1013                    | "contains"
1014                    | "iterator"
1015                    | "valueOf"
1016                    | "parseInt"
1017            )
1018        },
1019        Language::C | Language::Cpp => {
1020            matches!(
1021                name,
1022                "printf"
1023                    | "scanf"
1024                    | "malloc"
1025                    | "free"
1026                    | "memcpy"
1027                    | "memset"
1028                    | "strlen"
1029                    | "strcpy"
1030                    | "strcmp"
1031                    | "strcat"
1032                    | "sizeof"
1033                    | "cout"
1034                    | "cin"
1035                    | "endl"
1036                    | "cerr"
1037                    | "clog"
1038            )
1039        },
1040        Language::CSharp => {
1041            matches!(
1042                name,
1043                "WriteLine"
1044                    | "Write"
1045                    | "ReadLine"
1046                    | "ToString"
1047                    | "Equals"
1048                    | "GetHashCode"
1049                    | "GetType"
1050                    | "Add"
1051                    | "Remove"
1052                    | "Contains"
1053                    | "Count"
1054                    | "Clear"
1055                    | "ToList"
1056                    | "ToArray"
1057            )
1058        },
1059        Language::Ruby => {
1060            matches!(
1061                name,
1062                "puts"
1063                    | "print"
1064                    | "p"
1065                    | "gets"
1066                    | "each"
1067                    | "map"
1068                    | "select"
1069                    | "reject"
1070                    | "reduce"
1071                    | "inject"
1072                    | "find"
1073                    | "any?"
1074                    | "all?"
1075                    | "include?"
1076                    | "empty?"
1077                    | "nil?"
1078                    | "length"
1079                    | "size"
1080            )
1081        },
1082        Language::Php => {
1083            matches!(
1084                name,
1085                "echo"
1086                    | "print"
1087                    | "var_dump"
1088                    | "print_r"
1089                    | "isset"
1090                    | "empty"
1091                    | "array"
1092                    | "count"
1093                    | "strlen"
1094                    | "strpos"
1095                    | "substr"
1096                    | "explode"
1097                    | "implode"
1098                    | "json_encode"
1099                    | "json_decode"
1100            )
1101        },
1102        Language::Kotlin => {
1103            matches!(
1104                name,
1105                "println"
1106                    | "print"
1107                    | "readLine"
1108                    | "toString"
1109                    | "equals"
1110                    | "hashCode"
1111                    | "map"
1112                    | "filter"
1113                    | "forEach"
1114                    | "let"
1115                    | "also"
1116                    | "apply"
1117                    | "run"
1118                    | "with"
1119                    | "listOf"
1120                    | "mapOf"
1121                    | "setOf"
1122            )
1123        },
1124        Language::Swift => {
1125            matches!(
1126                name,
1127                "print"
1128                    | "debugPrint"
1129                    | "dump"
1130                    | "map"
1131                    | "filter"
1132                    | "reduce"
1133                    | "forEach"
1134                    | "contains"
1135                    | "count"
1136                    | "isEmpty"
1137                    | "append"
1138            )
1139        },
1140        Language::Scala => {
1141            matches!(
1142                name,
1143                "println"
1144                    | "print"
1145                    | "map"
1146                    | "filter"
1147                    | "flatMap"
1148                    | "foreach"
1149                    | "reduce"
1150                    | "fold"
1151                    | "foldLeft"
1152                    | "foldRight"
1153                    | "collect"
1154            )
1155        },
1156        Language::Bash
1157        | Language::Haskell
1158        | Language::Elixir
1159        | Language::Clojure
1160        | Language::OCaml
1161        | Language::FSharp
1162        | Language::Lua
1163        | Language::R => false,
1164    }
1165}
1166
1167/// Clean JSDoc comment
1168pub fn clean_jsdoc(text: &str) -> String {
1169    text.lines()
1170        .map(|line| {
1171            line.trim()
1172                .trim_start_matches("/**")
1173                .trim_start_matches("/*")
1174                .trim_start_matches('*')
1175                .trim_end_matches("*/")
1176                .trim()
1177        })
1178        .filter(|line| !line.is_empty())
1179        .collect::<Vec<_>>()
1180        .join(" ")
1181}
1182
1183/// Clean JavaDoc comment
1184pub fn clean_javadoc(text: &str) -> String {
1185    clean_jsdoc(text)
1186}
1187
1188/// Extract class inheritance (extends) and interface implementations (implements)
1189pub fn extract_inheritance(
1190    node: Node<'_>,
1191    source_code: &str,
1192    language: Language,
1193) -> (Option<String>, Vec<String>) {
1194    let mut extends = None;
1195    let mut implements = Vec::new();
1196
1197    match language {
1198        Language::Python => {
1199            // Python: class Foo(Bar, Baz): - all are considered base classes
1200            if node.kind() == "class_definition" {
1201                if let Some(args) = node.child_by_field_name("superclasses") {
1202                    for child in args.children(&mut args.walk()) {
1203                        if child.kind() == "identifier" || child.kind() == "attribute" {
1204                            if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
1205                                if extends.is_none() {
1206                                    extends = Some(name.to_owned());
1207                                } else {
1208                                    implements.push(name.to_owned());
1209                                }
1210                            }
1211                        }
1212                    }
1213                }
1214            }
1215        },
1216        Language::JavaScript | Language::TypeScript => {
1217            // JS/TS: class Foo extends Bar implements Baz
1218            if node.kind() == "class_declaration" || node.kind() == "class" {
1219                for child in node.children(&mut node.walk()) {
1220                    if child.kind() == "class_heritage" {
1221                        for heritage in child.children(&mut child.walk()) {
1222                            if heritage.kind() == "extends_clause" {
1223                                for type_node in heritage.children(&mut heritage.walk()) {
1224                                    if type_node.kind() == "identifier"
1225                                        || type_node.kind() == "type_identifier"
1226                                    {
1227                                        if let Ok(name) =
1228                                            type_node.utf8_text(source_code.as_bytes())
1229                                        {
1230                                            extends = Some(name.to_owned());
1231                                        }
1232                                    }
1233                                }
1234                            } else if heritage.kind() == "implements_clause" {
1235                                for type_node in heritage.children(&mut heritage.walk()) {
1236                                    if type_node.kind() == "identifier"
1237                                        || type_node.kind() == "type_identifier"
1238                                    {
1239                                        if let Ok(name) =
1240                                            type_node.utf8_text(source_code.as_bytes())
1241                                        {
1242                                            implements.push(name.to_owned());
1243                                        }
1244                                    }
1245                                }
1246                            }
1247                        }
1248                    }
1249                }
1250            }
1251        },
1252        Language::Rust => {
1253            // Rust doesn't have class inheritance, but has trait implementations
1254            // impl Trait for Struct
1255            if node.kind() == "impl_item" {
1256                let mut has_for = false;
1257                for child in node.children(&mut node.walk()) {
1258                    if child.kind() == "for" {
1259                        has_for = true;
1260                    }
1261                    if child.kind() == "type_identifier" || child.kind() == "generic_type" {
1262                        if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
1263                            if has_for {
1264                                // This is the struct being implemented
1265                            } else {
1266                                // This is the trait being implemented
1267                                implements.push(name.to_owned());
1268                            }
1269                        }
1270                    }
1271                }
1272            }
1273        },
1274        Language::Go => {
1275            // Go uses embedding for "inheritance"
1276            if node.kind() == "type_declaration" {
1277                for child in node.children(&mut node.walk()) {
1278                    if child.kind() == "type_spec" {
1279                        for spec_child in child.children(&mut child.walk()) {
1280                            if spec_child.kind() == "struct_type" {
1281                                for field in spec_child.children(&mut spec_child.walk()) {
1282                                    if field.kind() == "field_declaration" {
1283                                        // Embedded field (no name, just type)
1284                                        let has_name = field.child_by_field_name("name").is_some();
1285                                        if !has_name {
1286                                            if let Some(type_node) =
1287                                                field.child_by_field_name("type")
1288                                            {
1289                                                if let Ok(name) =
1290                                                    type_node.utf8_text(source_code.as_bytes())
1291                                                {
1292                                                    implements.push(name.to_owned());
1293                                                }
1294                                            }
1295                                        }
1296                                    }
1297                                }
1298                            }
1299                        }
1300                    }
1301                }
1302            }
1303        },
1304        Language::Java => {
1305            // Java: class Foo extends Bar implements Baz, Qux
1306            if node.kind() == "class_declaration" {
1307                for child in node.children(&mut node.walk()) {
1308                    if child.kind() == "superclass" {
1309                        for type_node in child.children(&mut child.walk()) {
1310                            if type_node.kind() == "type_identifier" {
1311                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1312                                    extends = Some(name.to_owned());
1313                                }
1314                            }
1315                        }
1316                    } else if child.kind() == "super_interfaces" {
1317                        for type_list in child.children(&mut child.walk()) {
1318                            if type_list.kind() == "type_list" {
1319                                for type_node in type_list.children(&mut type_list.walk()) {
1320                                    if type_node.kind() == "type_identifier" {
1321                                        if let Ok(name) =
1322                                            type_node.utf8_text(source_code.as_bytes())
1323                                        {
1324                                            implements.push(name.to_owned());
1325                                        }
1326                                    }
1327                                }
1328                            }
1329                        }
1330                    }
1331                }
1332            }
1333        },
1334        Language::C | Language::Cpp => {
1335            // C++: class Foo : public Bar, public Baz
1336            if node.kind() == "class_specifier" || node.kind() == "struct_specifier" {
1337                for child in node.children(&mut node.walk()) {
1338                    if child.kind() == "base_class_clause" {
1339                        for base in child.children(&mut child.walk()) {
1340                            if base.kind() == "type_identifier" {
1341                                if let Ok(name) = base.utf8_text(source_code.as_bytes()) {
1342                                    if extends.is_none() {
1343                                        extends = Some(name.to_owned());
1344                                    } else {
1345                                        implements.push(name.to_owned());
1346                                    }
1347                                }
1348                            }
1349                        }
1350                    }
1351                }
1352            }
1353        },
1354        Language::CSharp => {
1355            // C#: class Foo : Bar, IBaz
1356            if node.kind() == "class_declaration" {
1357                for child in node.children(&mut node.walk()) {
1358                    if child.kind() == "base_list" {
1359                        for base in child.children(&mut child.walk()) {
1360                            if base.kind() == "identifier" || base.kind() == "generic_name" {
1361                                if let Ok(name) = base.utf8_text(source_code.as_bytes()) {
1362                                    if name.starts_with('I') && name.len() > 1 {
1363                                        // Convention: interfaces start with I
1364                                        implements.push(name.to_owned());
1365                                    } else if extends.is_none() {
1366                                        extends = Some(name.to_owned());
1367                                    } else {
1368                                        implements.push(name.to_owned());
1369                                    }
1370                                }
1371                            }
1372                        }
1373                    }
1374                }
1375            }
1376        },
1377        Language::Ruby => {
1378            // Ruby: class Foo < Bar; include Baz
1379            if node.kind() == "class" {
1380                for child in node.children(&mut node.walk()) {
1381                    if child.kind() == "superclass" {
1382                        for type_node in child.children(&mut child.walk()) {
1383                            if type_node.kind() == "constant" {
1384                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1385                                    extends = Some(name.to_owned());
1386                                }
1387                            }
1388                        }
1389                    }
1390                }
1391            }
1392        },
1393        Language::Php => {
1394            // PHP: class Foo extends Bar implements Baz
1395            if node.kind() == "class_declaration" {
1396                for child in node.children(&mut node.walk()) {
1397                    if child.kind() == "base_clause" {
1398                        for type_node in child.children(&mut child.walk()) {
1399                            if type_node.kind() == "name" {
1400                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1401                                    extends = Some(name.to_owned());
1402                                }
1403                            }
1404                        }
1405                    } else if child.kind() == "class_interface_clause" {
1406                        for type_node in child.children(&mut child.walk()) {
1407                            if type_node.kind() == "name" {
1408                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1409                                    implements.push(name.to_owned());
1410                                }
1411                            }
1412                        }
1413                    }
1414                }
1415            }
1416        },
1417        Language::Kotlin => {
1418            // Kotlin: class Foo : Bar(), Baz
1419            if node.kind() == "class_declaration" {
1420                for child in node.children(&mut node.walk()) {
1421                    if child.kind() == "delegation_specifiers" {
1422                        for spec in child.children(&mut child.walk()) {
1423                            if spec.kind() == "delegation_specifier" {
1424                                for type_node in spec.children(&mut spec.walk()) {
1425                                    if type_node.kind() == "user_type" {
1426                                        if let Ok(name) =
1427                                            type_node.utf8_text(source_code.as_bytes())
1428                                        {
1429                                            if extends.is_none() {
1430                                                extends = Some(name.to_owned());
1431                                            } else {
1432                                                implements.push(name.to_owned());
1433                                            }
1434                                        }
1435                                    }
1436                                }
1437                            }
1438                        }
1439                    }
1440                }
1441            }
1442        },
1443        Language::Swift => {
1444            // Swift: class Foo: Bar, Protocol
1445            if node.kind() == "class_declaration" {
1446                for child in node.children(&mut node.walk()) {
1447                    if child.kind() == "type_inheritance_clause" {
1448                        for type_node in child.children(&mut child.walk()) {
1449                            if type_node.kind() == "type_identifier" {
1450                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1451                                    if extends.is_none() {
1452                                        extends = Some(name.to_owned());
1453                                    } else {
1454                                        implements.push(name.to_owned());
1455                                    }
1456                                }
1457                            }
1458                        }
1459                    }
1460                }
1461            }
1462        },
1463        Language::Scala => {
1464            // Scala: class Foo extends Bar with Baz
1465            if node.kind() == "class_definition" {
1466                for child in node.children(&mut node.walk()) {
1467                    if child.kind() == "extends_clause" {
1468                        for type_node in child.children(&mut child.walk()) {
1469                            if type_node.kind() == "type_identifier" {
1470                                if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1471                                    if extends.is_none() {
1472                                        extends = Some(name.to_owned());
1473                                    } else {
1474                                        implements.push(name.to_owned());
1475                                    }
1476                                }
1477                            }
1478                        }
1479                    }
1480                }
1481            }
1482        },
1483        Language::Bash
1484        | Language::Haskell
1485        | Language::Elixir
1486        | Language::Clojure
1487        | Language::OCaml
1488        | Language::FSharp
1489        | Language::Lua
1490        | Language::R => {},
1491    }
1492
1493    (extends, implements)
1494}
1495
1496/// Map capture name to SymbolKind
1497pub fn map_symbol_kind(capture_name: &str) -> SymbolKind {
1498    match capture_name {
1499        "function" => SymbolKind::Function,
1500        "class" => SymbolKind::Class,
1501        "method" => SymbolKind::Method,
1502        "struct" => SymbolKind::Struct,
1503        "enum" => SymbolKind::Enum,
1504        "interface" => SymbolKind::Interface,
1505        "trait" => SymbolKind::Trait,
1506        _ => SymbolKind::Function,
1507    }
1508}
1509
1510#[cfg(test)]
1511mod tests {
1512    use super::*;
1513
1514    // ==========================================================================
1515    // safe_char_boundary tests
1516    // ==========================================================================
1517
1518    #[test]
1519    fn test_safe_char_boundary_ascii() {
1520        let s = "hello world";
1521        assert_eq!(safe_char_boundary(s, 0), 0);
1522        assert_eq!(safe_char_boundary(s, 5), 5);
1523        assert_eq!(safe_char_boundary(s, 11), 11);
1524    }
1525
1526    #[test]
1527    fn test_safe_char_boundary_beyond_length() {
1528        let s = "hello";
1529        assert_eq!(safe_char_boundary(s, 100), 5);
1530        assert_eq!(safe_char_boundary(s, 5), 5);
1531    }
1532
1533    #[test]
1534    fn test_safe_char_boundary_empty_string() {
1535        let s = "";
1536        assert_eq!(safe_char_boundary(s, 0), 0);
1537        assert_eq!(safe_char_boundary(s, 10), 0);
1538    }
1539
1540    #[test]
1541    fn test_safe_char_boundary_multibyte_utf8() {
1542        // Chinese character "中" is 3 bytes: E4 B8 AD
1543        let s = "中文";
1544        // Index 0 is valid (start of first char)
1545        assert_eq!(safe_char_boundary(s, 0), 0);
1546        // Index 1 is in the middle of "中", should back up to 0
1547        assert_eq!(safe_char_boundary(s, 1), 0);
1548        // Index 2 is also in the middle
1549        assert_eq!(safe_char_boundary(s, 2), 0);
1550        // Index 3 is the start of "æ–‡"
1551        assert_eq!(safe_char_boundary(s, 3), 3);
1552        // Index 4 is in the middle of "æ–‡"
1553        assert_eq!(safe_char_boundary(s, 4), 3);
1554    }
1555
1556    #[test]
1557    fn test_safe_char_boundary_emoji() {
1558        // "👋" emoji is 4 bytes
1559        let s = "Hello 👋 World";
1560        // The emoji starts at byte 6
1561        assert_eq!(safe_char_boundary(s, 6), 6);
1562        // Middle of emoji should back up
1563        assert_eq!(safe_char_boundary(s, 7), 6);
1564        assert_eq!(safe_char_boundary(s, 8), 6);
1565        assert_eq!(safe_char_boundary(s, 9), 6);
1566        // After emoji (byte 10)
1567        assert_eq!(safe_char_boundary(s, 10), 10);
1568    }
1569
1570    #[test]
1571    fn test_safe_char_boundary_mixed_content() {
1572        // Mix of ASCII and multi-byte
1573        let s = "aбв"; // 'a' is 1 byte, 'б' and 'в' are 2 bytes each
1574        assert_eq!(safe_char_boundary(s, 0), 0);
1575        assert_eq!(safe_char_boundary(s, 1), 1); // Start of 'б'
1576        assert_eq!(safe_char_boundary(s, 2), 1); // Middle of 'б', back to 1
1577        assert_eq!(safe_char_boundary(s, 3), 3); // Start of 'в'
1578        assert_eq!(safe_char_boundary(s, 4), 3); // Middle of 'в'
1579        assert_eq!(safe_char_boundary(s, 5), 5); // End
1580    }
1581
1582    // ==========================================================================
1583    // clean_jsdoc tests
1584    // ==========================================================================
1585
1586    #[test]
1587    fn test_clean_jsdoc_simple() {
1588        let input = "/** This is a simple doc */";
1589        assert_eq!(clean_jsdoc(input), "This is a simple doc");
1590    }
1591
1592    #[test]
1593    fn test_clean_jsdoc_multiline() {
1594        let input = "/**\n * Line 1\n * Line 2\n */";
1595        let result = clean_jsdoc(input);
1596        // Trailing slash is kept when on its own line
1597        assert!(result.contains("Line 1"));
1598        assert!(result.contains("Line 2"));
1599    }
1600
1601    #[test]
1602    fn test_clean_jsdoc_with_asterisks() {
1603        let input = "/**\n * First line\n * Second line\n * Third line\n */";
1604        let result = clean_jsdoc(input);
1605        assert!(result.contains("First line"));
1606        assert!(result.contains("Second line"));
1607        assert!(result.contains("Third line"));
1608    }
1609
1610    #[test]
1611    fn test_clean_jsdoc_empty() {
1612        let input = "/** */";
1613        assert_eq!(clean_jsdoc(input), "");
1614    }
1615
1616    #[test]
1617    fn test_clean_jsdoc_c_style_comment() {
1618        let input = "/* Regular C comment */";
1619        assert_eq!(clean_jsdoc(input), "Regular C comment");
1620    }
1621
1622    #[test]
1623    fn test_clean_jsdoc_with_tags() {
1624        let input = "/**\n * Description\n * @param x The x value\n * @returns Result\n */";
1625        let result = clean_jsdoc(input);
1626        assert!(result.contains("Description"));
1627        assert!(result.contains("@param x"));
1628        assert!(result.contains("@returns"));
1629    }
1630
1631    #[test]
1632    fn test_clean_jsdoc_whitespace_handling() {
1633        let input = "/**   \n   *    Lots of spaces    \n   */";
1634        assert!(clean_jsdoc(input).contains("Lots of spaces"));
1635    }
1636
1637    // ==========================================================================
1638    // clean_javadoc tests
1639    // ==========================================================================
1640
1641    #[test]
1642    fn test_clean_javadoc_simple() {
1643        let input = "/** JavaDoc comment */";
1644        assert_eq!(clean_javadoc(input), "JavaDoc comment");
1645    }
1646
1647    #[test]
1648    fn test_clean_javadoc_multiline() {
1649        let input = "/**\n * Method description.\n * @param name The name\n */";
1650        let result = clean_javadoc(input);
1651        assert!(result.contains("Method description"));
1652        assert!(result.contains("@param name"));
1653    }
1654
1655    // ==========================================================================
1656    // map_symbol_kind tests
1657    // ==========================================================================
1658
1659    #[test]
1660    fn test_map_symbol_kind_function() {
1661        assert_eq!(map_symbol_kind("function"), SymbolKind::Function);
1662    }
1663
1664    #[test]
1665    fn test_map_symbol_kind_class() {
1666        assert_eq!(map_symbol_kind("class"), SymbolKind::Class);
1667    }
1668
1669    #[test]
1670    fn test_map_symbol_kind_method() {
1671        assert_eq!(map_symbol_kind("method"), SymbolKind::Method);
1672    }
1673
1674    #[test]
1675    fn test_map_symbol_kind_struct() {
1676        assert_eq!(map_symbol_kind("struct"), SymbolKind::Struct);
1677    }
1678
1679    #[test]
1680    fn test_map_symbol_kind_enum() {
1681        assert_eq!(map_symbol_kind("enum"), SymbolKind::Enum);
1682    }
1683
1684    #[test]
1685    fn test_map_symbol_kind_interface() {
1686        assert_eq!(map_symbol_kind("interface"), SymbolKind::Interface);
1687    }
1688
1689    #[test]
1690    fn test_map_symbol_kind_trait() {
1691        assert_eq!(map_symbol_kind("trait"), SymbolKind::Trait);
1692    }
1693
1694    #[test]
1695    fn test_map_symbol_kind_unknown() {
1696        // Unknown capture names default to Function
1697        assert_eq!(map_symbol_kind("unknown"), SymbolKind::Function);
1698        assert_eq!(map_symbol_kind(""), SymbolKind::Function);
1699        assert_eq!(map_symbol_kind("random"), SymbolKind::Function);
1700    }
1701
1702    // ==========================================================================
1703    // is_builtin tests - Python
1704    // ==========================================================================
1705
1706    #[test]
1707    fn test_is_builtin_python_print() {
1708        assert!(is_builtin("print", Language::Python));
1709        assert!(is_builtin("len", Language::Python));
1710        assert!(is_builtin("range", Language::Python));
1711        assert!(is_builtin("str", Language::Python));
1712        assert!(is_builtin("int", Language::Python));
1713        assert!(is_builtin("float", Language::Python));
1714        assert!(is_builtin("list", Language::Python));
1715        assert!(is_builtin("dict", Language::Python));
1716        assert!(is_builtin("set", Language::Python));
1717        assert!(is_builtin("tuple", Language::Python));
1718    }
1719
1720    #[test]
1721    fn test_is_builtin_python_type_funcs() {
1722        assert!(is_builtin("bool", Language::Python));
1723        assert!(is_builtin("type", Language::Python));
1724        assert!(is_builtin("isinstance", Language::Python));
1725        assert!(is_builtin("hasattr", Language::Python));
1726        assert!(is_builtin("getattr", Language::Python));
1727        assert!(is_builtin("setattr", Language::Python));
1728        assert!(is_builtin("super", Language::Python));
1729    }
1730
1731    #[test]
1732    fn test_is_builtin_python_itertools() {
1733        assert!(is_builtin("iter", Language::Python));
1734        assert!(is_builtin("next", Language::Python));
1735        assert!(is_builtin("enumerate", Language::Python));
1736        assert!(is_builtin("zip", Language::Python));
1737        assert!(is_builtin("map", Language::Python));
1738        assert!(is_builtin("filter", Language::Python));
1739        assert!(is_builtin("sorted", Language::Python));
1740        assert!(is_builtin("reversed", Language::Python));
1741    }
1742
1743    #[test]
1744    fn test_is_builtin_python_math() {
1745        assert!(is_builtin("sum", Language::Python));
1746        assert!(is_builtin("min", Language::Python));
1747        assert!(is_builtin("max", Language::Python));
1748        assert!(is_builtin("abs", Language::Python));
1749        assert!(is_builtin("round", Language::Python));
1750    }
1751
1752    #[test]
1753    fn test_is_builtin_python_not_builtin() {
1754        assert!(!is_builtin("my_function", Language::Python));
1755        assert!(!is_builtin("custom_print", Language::Python));
1756        assert!(!is_builtin("calculate", Language::Python));
1757    }
1758
1759    // ==========================================================================
1760    // is_builtin tests - JavaScript/TypeScript
1761    // ==========================================================================
1762
1763    #[test]
1764    fn test_is_builtin_js_console() {
1765        assert!(is_builtin("console", Language::JavaScript));
1766        assert!(is_builtin("log", Language::JavaScript));
1767        assert!(is_builtin("error", Language::JavaScript));
1768        assert!(is_builtin("warn", Language::JavaScript));
1769    }
1770
1771    #[test]
1772    fn test_is_builtin_js_parsing() {
1773        assert!(is_builtin("parseInt", Language::JavaScript));
1774        assert!(is_builtin("parseFloat", Language::JavaScript));
1775        assert!(is_builtin("JSON", Language::JavaScript));
1776        assert!(is_builtin("stringify", Language::JavaScript));
1777        assert!(is_builtin("parse", Language::JavaScript));
1778    }
1779
1780    #[test]
1781    fn test_is_builtin_js_timers() {
1782        assert!(is_builtin("setTimeout", Language::JavaScript));
1783        assert!(is_builtin("setInterval", Language::JavaScript));
1784        assert!(is_builtin("clearTimeout", Language::JavaScript));
1785        assert!(is_builtin("clearInterval", Language::JavaScript));
1786    }
1787
1788    #[test]
1789    fn test_is_builtin_js_array_methods() {
1790        assert!(is_builtin("push", Language::JavaScript));
1791        assert!(is_builtin("pop", Language::JavaScript));
1792        assert!(is_builtin("shift", Language::JavaScript));
1793        assert!(is_builtin("unshift", Language::JavaScript));
1794        assert!(is_builtin("slice", Language::JavaScript));
1795        assert!(is_builtin("splice", Language::JavaScript));
1796        assert!(is_builtin("map", Language::JavaScript));
1797        assert!(is_builtin("filter", Language::JavaScript));
1798        assert!(is_builtin("reduce", Language::JavaScript));
1799        assert!(is_builtin("forEach", Language::JavaScript));
1800    }
1801
1802    #[test]
1803    fn test_is_builtin_ts_same_as_js() {
1804        assert!(is_builtin("console", Language::TypeScript));
1805        assert!(is_builtin("map", Language::TypeScript));
1806        assert!(is_builtin("filter", Language::TypeScript));
1807    }
1808
1809    #[test]
1810    fn test_is_builtin_js_not_builtin() {
1811        assert!(!is_builtin("myFunction", Language::JavaScript));
1812        assert!(!is_builtin("customLog", Language::JavaScript));
1813    }
1814
1815    // ==========================================================================
1816    // is_builtin tests - Rust
1817    // ==========================================================================
1818
1819    #[test]
1820    fn test_is_builtin_rust_macros() {
1821        assert!(is_builtin("println!", Language::Rust));
1822        assert!(is_builtin("print!", Language::Rust));
1823        assert!(is_builtin("eprintln!", Language::Rust));
1824        assert!(is_builtin("eprint!", Language::Rust));
1825        assert!(is_builtin("format!", Language::Rust));
1826        assert!(is_builtin("vec!", Language::Rust));
1827        assert!(is_builtin("panic!", Language::Rust));
1828        assert!(is_builtin("assert!", Language::Rust));
1829        assert!(is_builtin("assert_eq!", Language::Rust));
1830        assert!(is_builtin("assert_ne!", Language::Rust));
1831    }
1832
1833    #[test]
1834    fn test_is_builtin_rust_logging() {
1835        assert!(is_builtin("debug!", Language::Rust));
1836        assert!(is_builtin("info!", Language::Rust));
1837        assert!(is_builtin("warn!", Language::Rust));
1838        assert!(is_builtin("error!", Language::Rust));
1839        assert!(is_builtin("trace!", Language::Rust));
1840    }
1841
1842    #[test]
1843    fn test_is_builtin_rust_common_methods() {
1844        assert!(is_builtin("unwrap", Language::Rust));
1845        assert!(is_builtin("expect", Language::Rust));
1846        assert!(is_builtin("ok", Language::Rust));
1847        assert!(is_builtin("err", Language::Rust));
1848        assert!(is_builtin("some", Language::Rust));
1849        assert!(is_builtin("none", Language::Rust));
1850        assert!(is_builtin("clone", Language::Rust));
1851        assert!(is_builtin("to_string", Language::Rust));
1852        assert!(is_builtin("into", Language::Rust));
1853        assert!(is_builtin("from", Language::Rust));
1854        assert!(is_builtin("default", Language::Rust));
1855    }
1856
1857    #[test]
1858    fn test_is_builtin_rust_iterators() {
1859        assert!(is_builtin("iter", Language::Rust));
1860        assert!(is_builtin("into_iter", Language::Rust));
1861        assert!(is_builtin("collect", Language::Rust));
1862        assert!(is_builtin("map", Language::Rust));
1863        assert!(is_builtin("filter", Language::Rust));
1864    }
1865
1866    #[test]
1867    fn test_is_builtin_rust_not_builtin() {
1868        assert!(!is_builtin("my_function", Language::Rust));
1869        assert!(!is_builtin("process_data", Language::Rust));
1870    }
1871
1872    // ==========================================================================
1873    // is_builtin tests - Go
1874    // ==========================================================================
1875
1876    #[test]
1877    fn test_is_builtin_go_fmt() {
1878        assert!(is_builtin("fmt", Language::Go));
1879        assert!(is_builtin("Println", Language::Go));
1880        assert!(is_builtin("Printf", Language::Go));
1881        assert!(is_builtin("Sprintf", Language::Go));
1882        assert!(is_builtin("Errorf", Language::Go));
1883    }
1884
1885    #[test]
1886    fn test_is_builtin_go_memory() {
1887        assert!(is_builtin("make", Language::Go));
1888        assert!(is_builtin("new", Language::Go));
1889        assert!(is_builtin("len", Language::Go));
1890        assert!(is_builtin("cap", Language::Go));
1891        assert!(is_builtin("append", Language::Go));
1892        assert!(is_builtin("copy", Language::Go));
1893        assert!(is_builtin("delete", Language::Go));
1894    }
1895
1896    #[test]
1897    fn test_is_builtin_go_control() {
1898        assert!(is_builtin("close", Language::Go));
1899        assert!(is_builtin("panic", Language::Go));
1900        assert!(is_builtin("recover", Language::Go));
1901        assert!(is_builtin("print", Language::Go));
1902    }
1903
1904    #[test]
1905    fn test_is_builtin_go_not_builtin() {
1906        assert!(!is_builtin("ProcessData", Language::Go));
1907        assert!(!is_builtin("handleRequest", Language::Go));
1908    }
1909
1910    // ==========================================================================
1911    // is_builtin tests - Java
1912    // ==========================================================================
1913
1914    #[test]
1915    fn test_is_builtin_java_io() {
1916        assert!(is_builtin("println", Language::Java));
1917        assert!(is_builtin("print", Language::Java));
1918        assert!(is_builtin("printf", Language::Java));
1919    }
1920
1921    #[test]
1922    fn test_is_builtin_java_object() {
1923        assert!(is_builtin("toString", Language::Java));
1924        assert!(is_builtin("equals", Language::Java));
1925        assert!(is_builtin("hashCode", Language::Java));
1926        assert!(is_builtin("getClass", Language::Java));
1927        assert!(is_builtin("clone", Language::Java));
1928        assert!(is_builtin("notify", Language::Java));
1929        assert!(is_builtin("wait", Language::Java));
1930    }
1931
1932    #[test]
1933    fn test_is_builtin_java_collections() {
1934        assert!(is_builtin("get", Language::Java));
1935        assert!(is_builtin("set", Language::Java));
1936        assert!(is_builtin("add", Language::Java));
1937        assert!(is_builtin("remove", Language::Java));
1938        assert!(is_builtin("size", Language::Java));
1939        assert!(is_builtin("isEmpty", Language::Java));
1940        assert!(is_builtin("contains", Language::Java));
1941        assert!(is_builtin("iterator", Language::Java));
1942    }
1943
1944    #[test]
1945    fn test_is_builtin_java_not_builtin() {
1946        assert!(!is_builtin("processData", Language::Java));
1947        assert!(!is_builtin("calculateTotal", Language::Java));
1948    }
1949
1950    // ==========================================================================
1951    // is_builtin tests - C/C++
1952    // ==========================================================================
1953
1954    #[test]
1955    fn test_is_builtin_c_io() {
1956        assert!(is_builtin("printf", Language::C));
1957        assert!(is_builtin("scanf", Language::C));
1958    }
1959
1960    #[test]
1961    fn test_is_builtin_c_memory() {
1962        assert!(is_builtin("malloc", Language::C));
1963        assert!(is_builtin("free", Language::C));
1964        assert!(is_builtin("memcpy", Language::C));
1965        assert!(is_builtin("memset", Language::C));
1966    }
1967
1968    #[test]
1969    fn test_is_builtin_c_string() {
1970        assert!(is_builtin("strlen", Language::C));
1971        assert!(is_builtin("strcpy", Language::C));
1972        assert!(is_builtin("strcmp", Language::C));
1973        assert!(is_builtin("strcat", Language::C));
1974    }
1975
1976    #[test]
1977    fn test_is_builtin_cpp_streams() {
1978        assert!(is_builtin("cout", Language::Cpp));
1979        assert!(is_builtin("cin", Language::Cpp));
1980        assert!(is_builtin("endl", Language::Cpp));
1981        assert!(is_builtin("cerr", Language::Cpp));
1982        assert!(is_builtin("clog", Language::Cpp));
1983    }
1984
1985    #[test]
1986    fn test_is_builtin_c_not_builtin() {
1987        assert!(!is_builtin("process_data", Language::C));
1988        assert!(!is_builtin("custom_malloc", Language::C));
1989    }
1990
1991    // ==========================================================================
1992    // is_builtin tests - C#
1993    // ==========================================================================
1994
1995    #[test]
1996    fn test_is_builtin_csharp_console() {
1997        assert!(is_builtin("WriteLine", Language::CSharp));
1998        assert!(is_builtin("Write", Language::CSharp));
1999        assert!(is_builtin("ReadLine", Language::CSharp));
2000    }
2001
2002    #[test]
2003    fn test_is_builtin_csharp_object() {
2004        assert!(is_builtin("ToString", Language::CSharp));
2005        assert!(is_builtin("Equals", Language::CSharp));
2006        assert!(is_builtin("GetHashCode", Language::CSharp));
2007        assert!(is_builtin("GetType", Language::CSharp));
2008    }
2009
2010    #[test]
2011    fn test_is_builtin_csharp_collections() {
2012        assert!(is_builtin("Add", Language::CSharp));
2013        assert!(is_builtin("Remove", Language::CSharp));
2014        assert!(is_builtin("Contains", Language::CSharp));
2015        assert!(is_builtin("Count", Language::CSharp));
2016        assert!(is_builtin("Clear", Language::CSharp));
2017        assert!(is_builtin("ToList", Language::CSharp));
2018        assert!(is_builtin("ToArray", Language::CSharp));
2019    }
2020
2021    // ==========================================================================
2022    // is_builtin tests - Ruby
2023    // ==========================================================================
2024
2025    #[test]
2026    fn test_is_builtin_ruby_io() {
2027        assert!(is_builtin("puts", Language::Ruby));
2028        assert!(is_builtin("print", Language::Ruby));
2029        assert!(is_builtin("p", Language::Ruby));
2030        assert!(is_builtin("gets", Language::Ruby));
2031    }
2032
2033    #[test]
2034    fn test_is_builtin_ruby_enumerable() {
2035        assert!(is_builtin("each", Language::Ruby));
2036        assert!(is_builtin("map", Language::Ruby));
2037        assert!(is_builtin("select", Language::Ruby));
2038        assert!(is_builtin("reject", Language::Ruby));
2039        assert!(is_builtin("reduce", Language::Ruby));
2040        assert!(is_builtin("inject", Language::Ruby));
2041        assert!(is_builtin("find", Language::Ruby));
2042    }
2043
2044    #[test]
2045    fn test_is_builtin_ruby_predicates() {
2046        assert!(is_builtin("any?", Language::Ruby));
2047        assert!(is_builtin("all?", Language::Ruby));
2048        assert!(is_builtin("include?", Language::Ruby));
2049        assert!(is_builtin("empty?", Language::Ruby));
2050        assert!(is_builtin("nil?", Language::Ruby));
2051    }
2052
2053    // ==========================================================================
2054    // is_builtin tests - PHP
2055    // ==========================================================================
2056
2057    #[test]
2058    fn test_is_builtin_php_io() {
2059        assert!(is_builtin("echo", Language::Php));
2060        assert!(is_builtin("print", Language::Php));
2061        assert!(is_builtin("var_dump", Language::Php));
2062        assert!(is_builtin("print_r", Language::Php));
2063    }
2064
2065    #[test]
2066    fn test_is_builtin_php_checks() {
2067        assert!(is_builtin("isset", Language::Php));
2068        assert!(is_builtin("empty", Language::Php));
2069    }
2070
2071    #[test]
2072    fn test_is_builtin_php_array_string() {
2073        assert!(is_builtin("array", Language::Php));
2074        assert!(is_builtin("count", Language::Php));
2075        assert!(is_builtin("strlen", Language::Php));
2076        assert!(is_builtin("strpos", Language::Php));
2077        assert!(is_builtin("substr", Language::Php));
2078        assert!(is_builtin("explode", Language::Php));
2079        assert!(is_builtin("implode", Language::Php));
2080        assert!(is_builtin("json_encode", Language::Php));
2081        assert!(is_builtin("json_decode", Language::Php));
2082    }
2083
2084    // ==========================================================================
2085    // is_builtin tests - Kotlin
2086    // ==========================================================================
2087
2088    #[test]
2089    fn test_is_builtin_kotlin_io() {
2090        assert!(is_builtin("println", Language::Kotlin));
2091        assert!(is_builtin("print", Language::Kotlin));
2092        assert!(is_builtin("readLine", Language::Kotlin));
2093    }
2094
2095    #[test]
2096    fn test_is_builtin_kotlin_scope() {
2097        assert!(is_builtin("let", Language::Kotlin));
2098        assert!(is_builtin("also", Language::Kotlin));
2099        assert!(is_builtin("apply", Language::Kotlin));
2100        assert!(is_builtin("run", Language::Kotlin));
2101        assert!(is_builtin("with", Language::Kotlin));
2102    }
2103
2104    #[test]
2105    fn test_is_builtin_kotlin_collections() {
2106        assert!(is_builtin("listOf", Language::Kotlin));
2107        assert!(is_builtin("mapOf", Language::Kotlin));
2108        assert!(is_builtin("setOf", Language::Kotlin));
2109        assert!(is_builtin("map", Language::Kotlin));
2110        assert!(is_builtin("filter", Language::Kotlin));
2111        assert!(is_builtin("forEach", Language::Kotlin));
2112    }
2113
2114    // ==========================================================================
2115    // is_builtin tests - Swift
2116    // ==========================================================================
2117
2118    #[test]
2119    fn test_is_builtin_swift_io() {
2120        assert!(is_builtin("print", Language::Swift));
2121        assert!(is_builtin("debugPrint", Language::Swift));
2122        assert!(is_builtin("dump", Language::Swift));
2123    }
2124
2125    #[test]
2126    fn test_is_builtin_swift_functional() {
2127        assert!(is_builtin("map", Language::Swift));
2128        assert!(is_builtin("filter", Language::Swift));
2129        assert!(is_builtin("reduce", Language::Swift));
2130        assert!(is_builtin("forEach", Language::Swift));
2131    }
2132
2133    #[test]
2134    fn test_is_builtin_swift_collection() {
2135        assert!(is_builtin("contains", Language::Swift));
2136        assert!(is_builtin("count", Language::Swift));
2137        assert!(is_builtin("isEmpty", Language::Swift));
2138        assert!(is_builtin("append", Language::Swift));
2139    }
2140
2141    // ==========================================================================
2142    // is_builtin tests - Scala
2143    // ==========================================================================
2144
2145    #[test]
2146    fn test_is_builtin_scala_io() {
2147        assert!(is_builtin("println", Language::Scala));
2148        assert!(is_builtin("print", Language::Scala));
2149    }
2150
2151    #[test]
2152    fn test_is_builtin_scala_functional() {
2153        assert!(is_builtin("map", Language::Scala));
2154        assert!(is_builtin("filter", Language::Scala));
2155        assert!(is_builtin("flatMap", Language::Scala));
2156        assert!(is_builtin("foreach", Language::Scala));
2157        assert!(is_builtin("reduce", Language::Scala));
2158        assert!(is_builtin("fold", Language::Scala));
2159        assert!(is_builtin("foldLeft", Language::Scala));
2160        assert!(is_builtin("foldRight", Language::Scala));
2161        assert!(is_builtin("collect", Language::Scala));
2162    }
2163
2164    // ==========================================================================
2165    // is_builtin tests - Languages with no builtins
2166    // ==========================================================================
2167
2168    #[test]
2169    fn test_is_builtin_bash_always_false() {
2170        assert!(!is_builtin("ls", Language::Bash));
2171        assert!(!is_builtin("echo", Language::Bash));
2172        assert!(!is_builtin("grep", Language::Bash));
2173    }
2174
2175    #[test]
2176    fn test_is_builtin_haskell_always_false() {
2177        assert!(!is_builtin("putStrLn", Language::Haskell));
2178        assert!(!is_builtin("map", Language::Haskell));
2179    }
2180
2181    #[test]
2182    fn test_is_builtin_elixir_always_false() {
2183        assert!(!is_builtin("IO.puts", Language::Elixir));
2184        assert!(!is_builtin("Enum.map", Language::Elixir));
2185    }
2186
2187    #[test]
2188    fn test_is_builtin_clojure_always_false() {
2189        assert!(!is_builtin("println", Language::Clojure));
2190        assert!(!is_builtin("map", Language::Clojure));
2191    }
2192
2193    #[test]
2194    fn test_is_builtin_ocaml_always_false() {
2195        assert!(!is_builtin("print_endline", Language::OCaml));
2196        assert!(!is_builtin("List.map", Language::OCaml));
2197    }
2198
2199    #[test]
2200    fn test_is_builtin_fsharp_always_false() {
2201        assert!(!is_builtin("printfn", Language::FSharp));
2202        assert!(!is_builtin("List.map", Language::FSharp));
2203    }
2204
2205    #[test]
2206    fn test_is_builtin_lua_always_false() {
2207        assert!(!is_builtin("print", Language::Lua));
2208        assert!(!is_builtin("pairs", Language::Lua));
2209    }
2210
2211    #[test]
2212    fn test_is_builtin_r_always_false() {
2213        assert!(!is_builtin("print", Language::R));
2214        assert!(!is_builtin("cat", Language::R));
2215    }
2216
2217    // ==========================================================================
2218    // Integration tests using tree-sitter parsing
2219    // ==========================================================================
2220
2221    // Helper to parse code and get the first node of a specific kind
2222    fn parse_and_find_node(
2223        code: &str,
2224        language: Language,
2225        node_kind: &str,
2226    ) -> Option<(tree_sitter::Tree, usize)> {
2227        let mut parser = tree_sitter::Parser::new();
2228
2229        let ts_language = match language {
2230            Language::Python => tree_sitter_python::LANGUAGE,
2231            Language::Rust => tree_sitter_rust::LANGUAGE,
2232            Language::JavaScript => tree_sitter_javascript::LANGUAGE,
2233            Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
2234            Language::Go => tree_sitter_go::LANGUAGE,
2235            Language::Java => tree_sitter_java::LANGUAGE,
2236            _ => return None,
2237        };
2238
2239        parser
2240            .set_language(&ts_language.into())
2241            .expect("Error loading grammar");
2242
2243        let tree = parser.parse(code, None)?;
2244        let root = tree.root_node();
2245
2246        fn find_node_recursive(node: tree_sitter::Node<'_>, kind: &str) -> Option<usize> {
2247            if node.kind() == kind {
2248                return Some(node.id());
2249            }
2250            for child in node.children(&mut node.walk()) {
2251                if let Some(id) = find_node_recursive(child, kind) {
2252                    return Some(id);
2253                }
2254            }
2255            None
2256        }
2257
2258        find_node_recursive(root, node_kind).map(|_| (tree, 0))
2259    }
2260
2261    // Helper to find node by kind in tree
2262    fn find_node_in_tree<'a>(
2263        node: tree_sitter::Node<'a>,
2264        kind: &str,
2265    ) -> Option<tree_sitter::Node<'a>> {
2266        if node.kind() == kind {
2267            return Some(node);
2268        }
2269        for child in node.children(&mut node.walk()) {
2270            if let Some(found) = find_node_in_tree(child, kind) {
2271                return Some(found);
2272            }
2273        }
2274        None
2275    }
2276
2277    #[test]
2278    fn test_extract_signature_python() {
2279        // Note: Python signature extraction stops at first ':' or '\n'
2280        // So type annotations in parameters are cut off at the first ':'
2281        let code = "def hello(name):\n    return f'Hello {name}'";
2282        let mut parser = tree_sitter::Parser::new();
2283        parser
2284            .set_language(&tree_sitter_python::LANGUAGE.into())
2285            .unwrap();
2286        let tree = parser.parse(code, None).unwrap();
2287        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2288
2289        let sig = extract_signature(func_node, code, Language::Python);
2290        assert!(sig.is_some());
2291        let sig = sig.unwrap();
2292        assert!(sig.contains("def hello"));
2293        assert!(sig.contains("name"));
2294    }
2295
2296    #[test]
2297    fn test_extract_signature_rust() {
2298        let code = "fn add(a: i32, b: i32) -> i32 { a + b }";
2299        let mut parser = tree_sitter::Parser::new();
2300        parser
2301            .set_language(&tree_sitter_rust::LANGUAGE.into())
2302            .unwrap();
2303        let tree = parser.parse(code, None).unwrap();
2304        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2305
2306        let sig = extract_signature(func_node, code, Language::Rust);
2307        assert!(sig.is_some());
2308        let sig = sig.unwrap();
2309        assert!(sig.contains("fn add"));
2310        assert!(sig.contains("i32"));
2311    }
2312
2313    #[test]
2314    fn test_extract_signature_javascript() {
2315        let code = "function greet(name) { return 'Hello ' + name; }";
2316        let mut parser = tree_sitter::Parser::new();
2317        parser
2318            .set_language(&tree_sitter_javascript::LANGUAGE.into())
2319            .unwrap();
2320        let tree = parser.parse(code, None).unwrap();
2321        let func_node = find_node_in_tree(tree.root_node(), "function_declaration").unwrap();
2322
2323        let sig = extract_signature(func_node, code, Language::JavaScript);
2324        assert!(sig.is_some());
2325        let sig = sig.unwrap();
2326        assert!(sig.contains("function greet"));
2327        assert!(sig.contains("name"));
2328    }
2329
2330    #[test]
2331    fn test_extract_visibility_python_public() {
2332        let code = "def public_func():\n    pass";
2333        let mut parser = tree_sitter::Parser::new();
2334        parser
2335            .set_language(&tree_sitter_python::LANGUAGE.into())
2336            .unwrap();
2337        let tree = parser.parse(code, None).unwrap();
2338        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2339
2340        let vis = extract_visibility(func_node, code, Language::Python);
2341        assert_eq!(vis, Visibility::Public);
2342    }
2343
2344    #[test]
2345    fn test_extract_visibility_python_private() {
2346        let code = "def __private_func():\n    pass";
2347        let mut parser = tree_sitter::Parser::new();
2348        parser
2349            .set_language(&tree_sitter_python::LANGUAGE.into())
2350            .unwrap();
2351        let tree = parser.parse(code, None).unwrap();
2352        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2353
2354        let vis = extract_visibility(func_node, code, Language::Python);
2355        assert_eq!(vis, Visibility::Private);
2356    }
2357
2358    #[test]
2359    fn test_extract_visibility_python_protected() {
2360        let code = "def _protected_func():\n    pass";
2361        let mut parser = tree_sitter::Parser::new();
2362        parser
2363            .set_language(&tree_sitter_python::LANGUAGE.into())
2364            .unwrap();
2365        let tree = parser.parse(code, None).unwrap();
2366        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2367
2368        let vis = extract_visibility(func_node, code, Language::Python);
2369        assert_eq!(vis, Visibility::Protected);
2370    }
2371
2372    #[test]
2373    fn test_extract_visibility_python_dunder() {
2374        // Note: Current implementation treats dunder methods as public because
2375        // the check for `starts_with("__") && !ends_with("__")` excludes them from Private,
2376        // and `starts_with('_')` is checked in an else-if, not reached for true dunders
2377        let code = "def __init__(self):\n    pass";
2378        let mut parser = tree_sitter::Parser::new();
2379        parser
2380            .set_language(&tree_sitter_python::LANGUAGE.into())
2381            .unwrap();
2382        let tree = parser.parse(code, None).unwrap();
2383        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2384
2385        let vis = extract_visibility(func_node, code, Language::Python);
2386        // __init__ starts with _ so hits the else-if branch, returning Protected
2387        // This is the actual behavior - dunder methods are treated as Protected
2388        assert_eq!(vis, Visibility::Protected);
2389    }
2390
2391    #[test]
2392    fn test_extract_visibility_rust_pub() {
2393        let code = "pub fn public_func() {}";
2394        let mut parser = tree_sitter::Parser::new();
2395        parser
2396            .set_language(&tree_sitter_rust::LANGUAGE.into())
2397            .unwrap();
2398        let tree = parser.parse(code, None).unwrap();
2399        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2400
2401        let vis = extract_visibility(func_node, code, Language::Rust);
2402        assert_eq!(vis, Visibility::Public);
2403    }
2404
2405    #[test]
2406    fn test_extract_visibility_rust_private() {
2407        let code = "fn private_func() {}";
2408        let mut parser = tree_sitter::Parser::new();
2409        parser
2410            .set_language(&tree_sitter_rust::LANGUAGE.into())
2411            .unwrap();
2412        let tree = parser.parse(code, None).unwrap();
2413        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2414
2415        let vis = extract_visibility(func_node, code, Language::Rust);
2416        assert_eq!(vis, Visibility::Private);
2417    }
2418
2419    #[test]
2420    fn test_extract_visibility_rust_pub_crate() {
2421        let code = "pub(crate) fn crate_func() {}";
2422        let mut parser = tree_sitter::Parser::new();
2423        parser
2424            .set_language(&tree_sitter_rust::LANGUAGE.into())
2425            .unwrap();
2426        let tree = parser.parse(code, None).unwrap();
2427        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2428
2429        let vis = extract_visibility(func_node, code, Language::Rust);
2430        assert_eq!(vis, Visibility::Internal);
2431    }
2432
2433    #[test]
2434    fn test_extract_visibility_go_exported() {
2435        let code = "func Exported() {}";
2436        let mut parser = tree_sitter::Parser::new();
2437        parser
2438            .set_language(&tree_sitter_go::LANGUAGE.into())
2439            .unwrap();
2440        let tree = parser.parse(code, None).unwrap();
2441        let func_node = find_node_in_tree(tree.root_node(), "function_declaration").unwrap();
2442
2443        let vis = extract_visibility(func_node, code, Language::Go);
2444        assert_eq!(vis, Visibility::Public);
2445    }
2446
2447    #[test]
2448    fn test_extract_visibility_go_unexported() {
2449        let code = "func unexported() {}";
2450        let mut parser = tree_sitter::Parser::new();
2451        parser
2452            .set_language(&tree_sitter_go::LANGUAGE.into())
2453            .unwrap();
2454        let tree = parser.parse(code, None).unwrap();
2455        let func_node = find_node_in_tree(tree.root_node(), "function_declaration").unwrap();
2456
2457        let vis = extract_visibility(func_node, code, Language::Go);
2458        assert_eq!(vis, Visibility::Private);
2459    }
2460
2461    #[test]
2462    fn test_extract_visibility_bash_always_public() {
2463        let code = "my_func() { echo hello; }";
2464        let mut parser = tree_sitter::Parser::new();
2465        parser
2466            .set_language(&tree_sitter_bash::LANGUAGE.into())
2467            .unwrap();
2468        let tree = parser.parse(code, None).unwrap();
2469        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2470
2471        let vis = extract_visibility(func_node, code, Language::Bash);
2472        assert_eq!(vis, Visibility::Public);
2473    }
2474
2475    #[test]
2476    fn test_find_body_node_python() {
2477        let code = "def foo():\n    x = 1\n    return x";
2478        let mut parser = tree_sitter::Parser::new();
2479        parser
2480            .set_language(&tree_sitter_python::LANGUAGE.into())
2481            .unwrap();
2482        let tree = parser.parse(code, None).unwrap();
2483        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2484
2485        let body = find_body_node(func_node, Language::Python);
2486        assert!(body.is_some());
2487        assert_eq!(body.unwrap().kind(), "block");
2488    }
2489
2490    #[test]
2491    fn test_find_body_node_rust() {
2492        let code = "fn foo() { let x = 1; x }";
2493        let mut parser = tree_sitter::Parser::new();
2494        parser
2495            .set_language(&tree_sitter_rust::LANGUAGE.into())
2496            .unwrap();
2497        let tree = parser.parse(code, None).unwrap();
2498        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2499
2500        let body = find_body_node(func_node, Language::Rust);
2501        assert!(body.is_some());
2502        assert_eq!(body.unwrap().kind(), "block");
2503    }
2504
2505    #[test]
2506    fn test_find_body_node_javascript() {
2507        let code = "function foo() { return 1; }";
2508        let mut parser = tree_sitter::Parser::new();
2509        parser
2510            .set_language(&tree_sitter_javascript::LANGUAGE.into())
2511            .unwrap();
2512        let tree = parser.parse(code, None).unwrap();
2513        let func_node = find_node_in_tree(tree.root_node(), "function_declaration").unwrap();
2514
2515        let body = find_body_node(func_node, Language::JavaScript);
2516        assert!(body.is_some());
2517        assert_eq!(body.unwrap().kind(), "statement_block");
2518    }
2519
2520    #[test]
2521    fn test_extract_calls_python() {
2522        let code = "def foo():\n    bar()\n    custom_func(1, 2)";
2523        let mut parser = tree_sitter::Parser::new();
2524        parser
2525            .set_language(&tree_sitter_python::LANGUAGE.into())
2526            .unwrap();
2527        let tree = parser.parse(code, None).unwrap();
2528        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2529
2530        let calls = extract_calls(func_node, code, Language::Python);
2531        assert!(calls.contains(&"bar".to_string()));
2532        assert!(calls.contains(&"custom_func".to_string()));
2533    }
2534
2535    #[test]
2536    fn test_extract_calls_python_filters_builtins() {
2537        let code = "def foo():\n    print('hello')\n    len([1,2,3])";
2538        let mut parser = tree_sitter::Parser::new();
2539        parser
2540            .set_language(&tree_sitter_python::LANGUAGE.into())
2541            .unwrap();
2542        let tree = parser.parse(code, None).unwrap();
2543        let func_node = find_node_in_tree(tree.root_node(), "function_definition").unwrap();
2544
2545        let calls = extract_calls(func_node, code, Language::Python);
2546        // Built-ins should be filtered out
2547        assert!(!calls.contains(&"print".to_string()));
2548        assert!(!calls.contains(&"len".to_string()));
2549    }
2550
2551    #[test]
2552    fn test_extract_calls_rust() {
2553        let code = "fn foo() { bar(); baz(1); }";
2554        let mut parser = tree_sitter::Parser::new();
2555        parser
2556            .set_language(&tree_sitter_rust::LANGUAGE.into())
2557            .unwrap();
2558        let tree = parser.parse(code, None).unwrap();
2559        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2560
2561        let calls = extract_calls(func_node, code, Language::Rust);
2562        assert!(calls.contains(&"bar".to_string()));
2563        assert!(calls.contains(&"baz".to_string()));
2564    }
2565
2566    #[test]
2567    fn test_extract_docstring_rust() {
2568        let code = "/// This is a doc comment\nfn foo() {}";
2569        let mut parser = tree_sitter::Parser::new();
2570        parser
2571            .set_language(&tree_sitter_rust::LANGUAGE.into())
2572            .unwrap();
2573        let tree = parser.parse(code, None).unwrap();
2574        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2575
2576        let docstring = extract_docstring(func_node, code, Language::Rust);
2577        assert!(docstring.is_some());
2578        assert!(docstring.unwrap().contains("This is a doc comment"));
2579    }
2580
2581    #[test]
2582    fn test_extract_docstring_rust_multiline() {
2583        let code = "/// Line 1\n/// Line 2\nfn foo() {}";
2584        let mut parser = tree_sitter::Parser::new();
2585        parser
2586            .set_language(&tree_sitter_rust::LANGUAGE.into())
2587            .unwrap();
2588        let tree = parser.parse(code, None).unwrap();
2589        let func_node = find_node_in_tree(tree.root_node(), "function_item").unwrap();
2590
2591        let docstring = extract_docstring(func_node, code, Language::Rust);
2592        assert!(docstring.is_some());
2593        let doc = docstring.unwrap();
2594        assert!(doc.contains("Line 1"));
2595        assert!(doc.contains("Line 2"));
2596    }
2597}