next_plaid_cli/parser/
mod.rs

1pub mod types;
2
3pub use types::{CodeUnit, Language, UnitType};
4
5use std::path::Path;
6use tree_sitter::{Language as TsLanguage, Node, Parser};
7
8/// Detect language from file extension or filename
9pub fn detect_language(path: &Path) -> Option<Language> {
10    // Check filename first for special cases
11    if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
12        let filename_lower = filename.to_lowercase();
13        match filename_lower.as_str() {
14            "dockerfile" => return Some(Language::Dockerfile),
15            "makefile" | "gnumakefile" => return Some(Language::Makefile),
16            _ => {}
17        }
18    }
19
20    // Then check extension
21    match path.extension()?.to_str()?.to_lowercase().as_str() {
22        // Original languages
23        "py" => Some(Language::Python),
24        "ts" | "tsx" => Some(Language::TypeScript),
25        "js" | "jsx" | "mjs" => Some(Language::JavaScript),
26        "go" => Some(Language::Go),
27        "rs" => Some(Language::Rust),
28        "java" => Some(Language::Java),
29        "c" | "h" => Some(Language::C),
30        "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Some(Language::Cpp),
31        "rb" => Some(Language::Ruby),
32        "cs" => Some(Language::CSharp),
33        // Additional languages
34        "kt" | "kts" => Some(Language::Kotlin),
35        "swift" => Some(Language::Swift),
36        "scala" | "sc" => Some(Language::Scala),
37        "php" => Some(Language::Php),
38        "lua" => Some(Language::Lua),
39        "ex" | "exs" => Some(Language::Elixir),
40        "hs" => Some(Language::Haskell),
41        "ml" | "mli" => Some(Language::Ocaml),
42        // Text/documentation formats
43        "md" | "markdown" => Some(Language::Markdown),
44        "txt" | "text" | "rst" => Some(Language::Text),
45        "adoc" | "asciidoc" => Some(Language::AsciiDoc),
46        "org" => Some(Language::Org),
47        // Config formats
48        "yaml" | "yml" => Some(Language::Yaml),
49        "toml" => Some(Language::Toml),
50        "json" => Some(Language::Json),
51        // Shell scripts
52        "sh" | "bash" | "zsh" => Some(Language::Shell),
53        "ps1" => Some(Language::Powershell),
54        _ => None,
55    }
56}
57
58/// Check if a language is a text/config format (not code parsed with tree-sitter)
59pub fn is_text_format(lang: Language) -> bool {
60    matches!(
61        lang,
62        Language::Markdown
63            | Language::Text
64            | Language::Yaml
65            | Language::Toml
66            | Language::Json
67            | Language::Dockerfile
68            | Language::Makefile
69            | Language::Shell
70            | Language::Powershell
71            | Language::AsciiDoc
72            | Language::Org
73    )
74}
75
76/// Get tree-sitter language for a Language enum
77fn get_tree_sitter_language(lang: Language) -> TsLanguage {
78    match lang {
79        // Original languages
80        Language::Python => tree_sitter_python::LANGUAGE.into(),
81        Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
82        Language::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
83        Language::Go => tree_sitter_go::LANGUAGE.into(),
84        Language::Rust => tree_sitter_rust::LANGUAGE.into(),
85        Language::Java => tree_sitter_java::LANGUAGE.into(),
86        Language::C => tree_sitter_c::LANGUAGE.into(),
87        Language::Cpp => tree_sitter_cpp::LANGUAGE.into(),
88        Language::Ruby => tree_sitter_ruby::LANGUAGE.into(),
89        Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
90        // Additional languages
91        Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(),
92        Language::Swift => tree_sitter_swift::LANGUAGE.into(),
93        Language::Scala => tree_sitter_scala::LANGUAGE.into(),
94        Language::Php => tree_sitter_php::LANGUAGE_PHP.into(),
95        Language::Lua => tree_sitter_lua::LANGUAGE.into(),
96        Language::Elixir => tree_sitter_elixir::LANGUAGE.into(),
97        Language::Haskell => tree_sitter_haskell::LANGUAGE.into(),
98        Language::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
99        // Text/config formats don't use tree-sitter - this should never be called
100        Language::Markdown
101        | Language::Text
102        | Language::Yaml
103        | Language::Toml
104        | Language::Json
105        | Language::Dockerfile
106        | Language::Makefile
107        | Language::Shell
108        | Language::Powershell
109        | Language::AsciiDoc
110        | Language::Org => unreachable!("Text/config formats don't use tree-sitter"),
111    }
112}
113
114/// Extract all code units from a file with 5-layer analysis
115pub fn extract_units(path: &Path, source: &str, lang: Language) -> Vec<CodeUnit> {
116    // Handle text formats separately (no tree-sitter parsing)
117    if is_text_format(lang) {
118        return extract_text_units(path, source, lang);
119    }
120
121    let mut parser = Parser::new();
122    if parser
123        .set_language(&get_tree_sitter_language(lang))
124        .is_err()
125    {
126        return Vec::new();
127    }
128
129    let tree = match parser.parse(source, None) {
130        Some(t) => t,
131        None => return Vec::new(),
132    };
133
134    let lines: Vec<&str> = source.lines().collect();
135    let bytes = source.as_bytes();
136    let file_imports = extract_file_imports(tree.root_node(), bytes, lang);
137
138    let mut units = Vec::new();
139    extract_from_node(
140        tree.root_node(),
141        path,
142        &lines,
143        bytes,
144        lang,
145        &mut units,
146        None,
147        &file_imports,
148    );
149
150    units
151}
152
153/// Extract units from text files (markdown, txt, rst, config files, etc.)
154fn extract_text_units(path: &Path, source: &str, lang: Language) -> Vec<CodeUnit> {
155    let lines: Vec<&str> = source.lines().collect();
156
157    match lang {
158        Language::Markdown => extract_markdown_units(path, source, &lines),
159        // All other text formats: treat as plain text documents
160        _ => extract_plain_text_units(path, source, &lines, lang),
161    }
162}
163
164/// Extract units from markdown files - one document per file
165fn extract_markdown_units(path: &Path, _source: &str, lines: &[&str]) -> Vec<CodeUnit> {
166    if lines.is_empty() || lines.iter().all(|l| l.trim().is_empty()) {
167        return Vec::new();
168    }
169
170    let title = path
171        .file_stem()
172        .and_then(|s| s.to_str())
173        .unwrap_or("document")
174        .to_string();
175
176    let unit = create_text_unit(
177        path,
178        &title,
179        1,
180        Language::Markdown,
181        UnitType::Document,
182        lines,
183    );
184
185    vec![unit]
186}
187
188/// Extract units from plain text files - one unit per file
189fn extract_plain_text_units(
190    path: &Path,
191    _source: &str,
192    lines: &[&str],
193    lang: Language,
194) -> Vec<CodeUnit> {
195    if lines.is_empty() || lines.iter().all(|l| l.trim().is_empty()) {
196        return Vec::new();
197    }
198
199    let title = path
200        .file_stem()
201        .and_then(|s| s.to_str())
202        .unwrap_or("document")
203        .to_string();
204
205    let unit = create_text_unit(path, &title, 1, lang, UnitType::Document, lines);
206
207    vec![unit]
208}
209
210/// Create a CodeUnit for text content
211fn create_text_unit(
212    path: &Path,
213    name: &str,
214    line: usize,
215    lang: Language,
216    unit_type: UnitType,
217    content_lines: &[&str],
218) -> CodeUnit {
219    let qualified_name = format!("{}::{}", path.display(), name);
220
221    // First non-empty line as signature
222    let signature = content_lines
223        .iter()
224        .find(|l| !l.trim().is_empty())
225        .map(|l| l.trim().to_string())
226        .unwrap_or_default();
227
228    // First paragraph as docstring (up to first empty line)
229    let docstring: Option<String> = {
230        let para: Vec<&str> = content_lines
231            .iter()
232            .take_while(|l| !l.trim().is_empty())
233            .map(|l| l.trim())
234            .filter(|l| !l.is_empty())
235            .take(5) // Limit to 5 lines
236            .collect();
237        if para.is_empty() {
238            None
239        } else {
240            Some(para.join(" "))
241        }
242    };
243
244    // Code preview - first 20 lines
245    let preview_lines: Vec<&str> = content_lines.iter().take(20).cloned().collect();
246    let code_preview = preview_lines.join("\n");
247
248    CodeUnit {
249        name: name.to_string(),
250        qualified_name,
251        file: path.to_path_buf(),
252        line,
253        language: lang,
254        unit_type,
255        signature,
256        docstring,
257        parameters: Vec::new(),
258        return_type: None,
259        calls: Vec::new(),
260        called_by: Vec::new(),
261        complexity: 1,
262        has_loops: false,
263        has_branches: false,
264        has_error_handling: false,
265        variables: Vec::new(),
266        imports: Vec::new(),
267        code_preview,
268    }
269}
270
271/// Recursively extract code units from AST nodes
272#[allow(clippy::too_many_arguments)]
273fn extract_from_node(
274    node: Node,
275    path: &Path,
276    lines: &[&str],
277    bytes: &[u8],
278    lang: Language,
279    units: &mut Vec<CodeUnit>,
280    parent_class: Option<&str>,
281    file_imports: &[String],
282) {
283    let kind = node.kind();
284
285    // Check if this is a function/method definition
286    if is_function_node(kind, lang) {
287        if let Some(unit) =
288            extract_function(node, path, lines, bytes, lang, parent_class, file_imports)
289        {
290            units.push(unit);
291        }
292    }
293    // Check if this is a class definition
294    else if is_class_node(kind, lang) {
295        if let Some(class_name) = get_node_name(node, bytes, lang) {
296            // Extract class itself
297            if let Some(unit) = extract_class(node, path, lines, bytes, lang, file_imports) {
298                units.push(unit);
299            }
300
301            // Recurse into class body to find methods
302            if let Some(body) = find_class_body(node, lang) {
303                for child in body.children(&mut body.walk()) {
304                    extract_from_node(
305                        child,
306                        path,
307                        lines,
308                        bytes,
309                        lang,
310                        units,
311                        Some(&class_name),
312                        file_imports,
313                    );
314                }
315            }
316            return; // Don't recurse again for class nodes
317        }
318    }
319
320    // Recurse into children
321    for child in node.children(&mut node.walk()) {
322        extract_from_node(
323            child,
324            path,
325            lines,
326            bytes,
327            lang,
328            units,
329            parent_class,
330            file_imports,
331        );
332    }
333}
334
335fn is_function_node(kind: &str, lang: Language) -> bool {
336    match lang {
337        Language::Python => kind == "function_definition",
338        Language::Rust => kind == "function_item",
339        Language::TypeScript | Language::JavaScript => {
340            matches!(
341                kind,
342                "function_declaration" | "method_definition" | "arrow_function"
343            )
344        }
345        Language::Go => kind == "function_declaration" || kind == "method_declaration",
346        Language::Java => kind == "method_declaration" || kind == "constructor_declaration",
347        Language::C | Language::Cpp => kind == "function_definition",
348        Language::Ruby => kind == "method" || kind == "singleton_method",
349        Language::CSharp => kind == "method_declaration" || kind == "constructor_declaration",
350        // Additional languages
351        Language::Kotlin => matches!(kind, "function_declaration" | "anonymous_function"),
352        Language::Swift => matches!(kind, "function_declaration" | "init_declaration"),
353        Language::Scala => matches!(kind, "function_definition" | "function_declaration"),
354        Language::Php => matches!(kind, "function_definition" | "method_declaration"),
355        Language::Lua => kind == "function_declaration",
356        Language::Elixir => matches!(kind, "call" | "anonymous_function"), // def/defp are calls in elixir
357        Language::Haskell => kind == "function",
358        Language::Ocaml => matches!(kind, "let_binding" | "value_definition"),
359        // Text/config formats - handled separately
360        _ => false,
361    }
362}
363
364fn is_class_node(kind: &str, lang: Language) -> bool {
365    match lang {
366        Language::Python => kind == "class_definition",
367        Language::Rust => kind == "impl_item" || kind == "struct_item",
368        Language::TypeScript | Language::JavaScript => kind == "class_declaration",
369        Language::Go => kind == "type_declaration",
370        Language::Java => kind == "class_declaration" || kind == "interface_declaration",
371        Language::Cpp => kind == "class_specifier" || kind == "struct_specifier",
372        Language::Ruby => kind == "class" || kind == "module",
373        Language::CSharp => kind == "class_declaration" || kind == "interface_declaration",
374        // Additional languages
375        Language::Kotlin => matches!(kind, "class_declaration" | "object_declaration"),
376        Language::Swift => matches!(
377            kind,
378            "class_declaration" | "struct_declaration" | "protocol_declaration"
379        ),
380        Language::Scala => matches!(
381            kind,
382            "class_definition" | "object_definition" | "trait_definition"
383        ),
384        Language::Php => kind == "class_declaration",
385        Language::Lua => false,             // Lua doesn't have classes
386        Language::Elixir => kind == "call", // defmodule is a call
387        Language::Haskell => matches!(kind, "type_alias" | "newtype" | "adt"),
388        Language::Ocaml => matches!(kind, "type_definition" | "module_definition"),
389        // C and text/config formats
390        _ => false,
391    }
392}
393
394fn find_class_body(node: Node, lang: Language) -> Option<Node> {
395    match lang {
396        Language::Python => node.child_by_field_name("body"),
397        Language::Rust => node.child_by_field_name("body"),
398        Language::TypeScript | Language::JavaScript => node.child_by_field_name("body"),
399        Language::Java | Language::CSharp => node.child_by_field_name("body"),
400        Language::Go => node.child_by_field_name("type"),
401        Language::Cpp => {
402            // Look for field_declaration_list in class_specifier
403            for child in node.children(&mut node.walk()) {
404                if child.kind() == "field_declaration_list" {
405                    return Some(child);
406                }
407            }
408            None
409        }
410        Language::Ruby => node.child_by_field_name("body"),
411        // Additional languages
412        Language::Kotlin | Language::Swift | Language::Scala | Language::Php => {
413            node.child_by_field_name("body")
414        }
415        Language::Elixir => node.child_by_field_name("body"),
416        Language::Haskell | Language::Ocaml => node.child_by_field_name("body"),
417        // C, Lua, and text/config formats
418        _ => None,
419    }
420}
421
422fn get_node_name(node: Node, bytes: &[u8], lang: Language) -> Option<String> {
423    let name_node = match lang {
424        Language::Python
425        | Language::Rust
426        | Language::Go
427        | Language::Java
428        | Language::Ruby
429        | Language::CSharp => node.child_by_field_name("name"),
430        Language::TypeScript | Language::JavaScript => node
431            .child_by_field_name("name")
432            .or_else(|| node.child_by_field_name("property")),
433        Language::C | Language::Cpp => {
434            node.child_by_field_name("declarator").and_then(|d| {
435                // Handle function declarator
436                if d.kind() == "function_declarator" {
437                    d.child_by_field_name("declarator")
438                } else {
439                    Some(d)
440                }
441            })
442        }
443        // Additional languages
444        Language::Kotlin
445        | Language::Swift
446        | Language::Scala
447        | Language::Php
448        | Language::Lua
449        | Language::Haskell => node.child_by_field_name("name"),
450        Language::Elixir => {
451            // For def/defp calls, get the function name from arguments
452            node.child_by_field_name("target")
453                .or_else(|| node.child_by_field_name("name"))
454        }
455        Language::Ocaml => node
456            .child_by_field_name("name")
457            .or_else(|| node.child_by_field_name("pattern")),
458        // Text/config formats
459        _ => None,
460    };
461
462    name_node.and_then(|n| {
463        let text = n.utf8_text(bytes).ok()?;
464        if text.is_empty() {
465            None
466        } else {
467            Some(text.to_string())
468        }
469    })
470}
471
472fn extract_function(
473    node: Node,
474    path: &Path,
475    lines: &[&str],
476    bytes: &[u8],
477    lang: Language,
478    parent_class: Option<&str>,
479    file_imports: &[String],
480) -> Option<CodeUnit> {
481    let name = get_node_name(node, bytes, lang)?;
482    let start_line = node.start_position().row;
483    let end_line = node.end_position().row;
484
485    let unit_type = if parent_class.is_some() {
486        UnitType::Method
487    } else {
488        UnitType::Function
489    };
490
491    let mut unit = CodeUnit::new(
492        name,
493        path.to_path_buf(),
494        start_line + 1,
495        lang,
496        unit_type,
497        parent_class,
498    );
499
500    // Layer 1: AST
501    unit.signature = lines
502        .get(start_line)
503        .map(|s| s.trim().to_string())
504        .unwrap_or_default();
505    unit.docstring = extract_docstring(node, lines, lang);
506    unit.parameters = extract_parameters(node, bytes, lang);
507    unit.return_type = extract_return_type(node, bytes, lang);
508
509    // Layer 2: Call Graph
510    unit.calls = extract_function_calls(node, bytes, lang);
511    // called_by is filled later during index build
512
513    // Layer 3: Control Flow
514    let (complexity, has_loops, has_branches, has_error_handling) =
515        extract_control_flow(node, lang);
516    unit.complexity = complexity;
517    unit.has_loops = has_loops;
518    unit.has_branches = has_branches;
519    unit.has_error_handling = has_error_handling;
520
521    // Layer 4: Data Flow
522    unit.variables = extract_variables(node, bytes, lang);
523
524    // Layer 5: Dependencies
525    unit.imports = filter_used_imports(&unit.calls, file_imports);
526
527    // Code Preview (first ~20 lines)
528    let preview_end = (start_line + 20).min(end_line + 1).min(lines.len());
529    unit.code_preview = lines[start_line..preview_end].join("\n");
530
531    Some(unit)
532}
533
534fn extract_class(
535    node: Node,
536    path: &Path,
537    lines: &[&str],
538    bytes: &[u8],
539    lang: Language,
540    file_imports: &[String],
541) -> Option<CodeUnit> {
542    let name = get_node_name(node, bytes, lang)?;
543    let start_line = node.start_position().row;
544    let end_line = node.end_position().row;
545
546    let mut unit = CodeUnit::new(
547        name,
548        path.to_path_buf(),
549        start_line + 1,
550        lang,
551        UnitType::Class,
552        None,
553    );
554
555    // Layer 1: AST
556    unit.signature = lines
557        .get(start_line)
558        .map(|s| s.trim().to_string())
559        .unwrap_or_default();
560    unit.docstring = extract_docstring(node, lines, lang);
561
562    // Layer 5: Dependencies (classes can have imports)
563    unit.imports = file_imports.to_vec();
564
565    // Code Preview (first ~5 lines for classes)
566    let preview_end = (start_line + 5).min(end_line + 1).min(lines.len());
567    unit.code_preview = lines[start_line..preview_end].join("\n");
568
569    Some(unit)
570}
571
572fn extract_docstring(node: Node, lines: &[&str], lang: Language) -> Option<String> {
573    match lang {
574        Language::Python => {
575            // Look for string expression as first statement in body
576            let body = node.child_by_field_name("body")?;
577            let first_child = body.child(0)?;
578            if first_child.kind() == "expression_statement" {
579                let expr = first_child.child(0)?;
580                if expr.kind() == "string" {
581                    let start = expr.start_position().row;
582                    let end = expr.end_position().row;
583                    let doc_lines: Vec<&str> = lines[start..=end.min(lines.len() - 1)].to_vec();
584                    let doc = doc_lines.join("\n");
585                    // Clean up triple quotes
586                    return Some(
587                        doc.trim_matches(|c| c == '"' || c == '\'')
588                            .trim()
589                            .to_string(),
590                    );
591                }
592            }
593            None
594        }
595        Language::Rust => {
596            // Look for doc comments above the function
597            let mut doc_lines = Vec::new();
598            let start_row = node.start_position().row;
599            if start_row > 0 {
600                for i in (0..start_row).rev() {
601                    let line = lines.get(i)?.trim();
602                    if line.starts_with("///") {
603                        doc_lines.insert(0, line.trim_start_matches("///").trim());
604                    } else if line.starts_with("//!") || line.starts_with("#[") || line.is_empty() {
605                        continue;
606                    } else {
607                        break;
608                    }
609                }
610            }
611            if doc_lines.is_empty() {
612                None
613            } else {
614                Some(doc_lines.join(" "))
615            }
616        }
617        Language::JavaScript
618        | Language::TypeScript
619        | Language::Java
620        | Language::CSharp
621        | Language::Kotlin
622        | Language::Swift
623        | Language::Scala
624        | Language::Php => {
625            // Look for JSDoc or similar comment above
626            let start_row = node.start_position().row;
627            if start_row > 0 {
628                let prev_line = lines.get(start_row - 1)?.trim();
629                if prev_line.ends_with("*/") {
630                    // Find the start of the block comment
631                    for i in (0..start_row).rev() {
632                        let line = lines.get(i)?.trim();
633                        if line.starts_with("/**") || line.starts_with("/*") {
634                            let doc: String = lines[i..start_row]
635                                .iter()
636                                .map(|l| {
637                                    l.trim()
638                                        .trim_start_matches("/**")
639                                        .trim_start_matches("/*")
640                                        .trim_start_matches('*')
641                                        .trim_end_matches("*/")
642                                        .trim()
643                                })
644                                .filter(|l| !l.is_empty())
645                                .collect::<Vec<_>>()
646                                .join(" ");
647                            return Some(doc);
648                        }
649                    }
650                }
651            }
652            None
653        }
654        Language::Haskell => {
655            // Look for Haddock comments (-- |)
656            let mut doc_lines = Vec::new();
657            let start_row = node.start_position().row;
658            if start_row > 0 {
659                for i in (0..start_row).rev() {
660                    let line = lines.get(i)?.trim();
661                    if line.starts_with("-- |") || line.starts_with("-- ^") {
662                        doc_lines.insert(
663                            0,
664                            line.trim_start_matches("-- |")
665                                .trim_start_matches("-- ^")
666                                .trim(),
667                        );
668                    } else if line.starts_with("--") && !doc_lines.is_empty() {
669                        doc_lines.insert(0, line.trim_start_matches("--").trim());
670                    } else if !line.is_empty() {
671                        break;
672                    }
673                }
674            }
675            if doc_lines.is_empty() {
676                None
677            } else {
678                Some(doc_lines.join(" "))
679            }
680        }
681        Language::Elixir => {
682            // Look for @doc or @moduledoc
683            let start_row = node.start_position().row;
684            if start_row > 0 {
685                for i in (0..start_row).rev() {
686                    let line = lines.get(i)?.trim();
687                    if line.starts_with("@doc") || line.starts_with("@moduledoc") {
688                        // Simple extraction - get the string content
689                        if let Some(start) = line.find('"') {
690                            return Some(line[start..].trim_matches('"').to_string());
691                        }
692                    } else if !line.is_empty() && !line.starts_with("#") && !line.starts_with("@") {
693                        break;
694                    }
695                }
696            }
697            None
698        }
699        _ => None,
700    }
701}
702
703fn extract_parameters(node: Node, bytes: &[u8], lang: Language) -> Vec<String> {
704    let params_node = match lang {
705        Language::Python | Language::Rust | Language::Go | Language::Java | Language::CSharp => {
706            node.child_by_field_name("parameters")
707        }
708        Language::TypeScript | Language::JavaScript => node
709            .child_by_field_name("parameters")
710            .or_else(|| node.child_by_field_name("formal_parameters")),
711        Language::C | Language::Cpp => node
712            .child_by_field_name("declarator")
713            .and_then(|d| d.child_by_field_name("parameters")),
714        Language::Ruby => node.child_by_field_name("parameters"),
715        // Additional languages
716        Language::Kotlin
717        | Language::Swift
718        | Language::Scala
719        | Language::Php
720        | Language::Lua
721        | Language::Elixir
722        | Language::Haskell
723        | Language::Ocaml => node.child_by_field_name("parameters"),
724        // Text/config formats
725        _ => None,
726    };
727
728    let Some(params) = params_node else {
729        return Vec::new();
730    };
731
732    let mut result = Vec::new();
733    for child in params.children(&mut params.walk()) {
734        // Look for parameter nodes
735        let kind = child.kind();
736        if kind.contains("parameter") || kind == "identifier" {
737            if let Some(name) = child.child_by_field_name("name").or_else(|| {
738                if child.kind() == "identifier" {
739                    Some(child)
740                } else {
741                    None
742                }
743            }) {
744                if let Ok(text) = name.utf8_text(bytes) {
745                    if !text.is_empty() && text != "self" && text != "this" && text != "cls" {
746                        result.push(text.to_string());
747                    }
748                }
749            }
750        }
751    }
752    result
753}
754
755fn extract_return_type(node: Node, bytes: &[u8], lang: Language) -> Option<String> {
756    let ret_node = match lang {
757        Language::Python => node.child_by_field_name("return_type"),
758        Language::Rust => node.child_by_field_name("return_type"),
759        Language::TypeScript => node.child_by_field_name("return_type"),
760        Language::Go => node.child_by_field_name("result"),
761        Language::Java | Language::CSharp => node.child_by_field_name("type"),
762        Language::Cpp | Language::C => node.child_by_field_name("type"),
763        _ => None,
764    };
765
766    ret_node.and_then(|n| n.utf8_text(bytes).ok().map(|s| s.to_string()))
767}
768
769fn extract_function_calls(node: Node, bytes: &[u8], lang: Language) -> Vec<String> {
770    let mut calls = Vec::new();
771    let call_types: &[&str] = match lang {
772        Language::Python => &["call"],
773        Language::Rust => &["call_expression", "macro_invocation"],
774        Language::TypeScript | Language::JavaScript => &["call_expression"],
775        Language::Go => &["call_expression"],
776        Language::Java | Language::CSharp => &["method_invocation", "object_creation_expression"],
777        Language::C | Language::Cpp => &["call_expression"],
778        Language::Ruby => &["call", "method_call"],
779        // Additional languages
780        Language::Kotlin => &["call_expression", "navigation_expression"],
781        Language::Swift => &["call_expression"],
782        Language::Scala => &["call_expression"],
783        Language::Php => &["function_call_expression", "method_call_expression"],
784        Language::Lua => &["function_call"],
785        Language::Elixir => &["call"],
786        Language::Haskell => &["function_application"],
787        Language::Ocaml => &["application"],
788        // Text/config formats
789        _ => return calls,
790    };
791
792    fn visit(node: Node, bytes: &[u8], call_types: &[&str], calls: &mut Vec<String>) {
793        if call_types.contains(&node.kind()) {
794            if let Some(name_node) = node
795                .child_by_field_name("function")
796                .or_else(|| node.child_by_field_name("name"))
797                .or_else(|| node.child_by_field_name("method"))
798                .or_else(|| node.child(0))
799            {
800                if let Ok(text) = name_node.utf8_text(bytes) {
801                    // Extract just the function name (last component)
802                    #[allow(clippy::double_ended_iterator_last)]
803                    let name = text.split('.').last().unwrap_or(text);
804                    #[allow(clippy::double_ended_iterator_last)]
805                    let name = name.split("::").last().unwrap_or(name);
806                    let name = name.trim_end_matches('!'); // Rust macros
807                    if !name.is_empty()
808                        && name
809                            .chars()
810                            .next()
811                            .map(|c| c.is_alphabetic())
812                            .unwrap_or(false)
813                    {
814                        calls.push(name.to_string());
815                    }
816                }
817            }
818        }
819        for child in node.children(&mut node.walk()) {
820            visit(child, bytes, call_types, calls);
821        }
822    }
823
824    visit(node, bytes, call_types, &mut calls);
825    calls.sort();
826    calls.dedup();
827    calls
828}
829
830fn extract_control_flow(node: Node, _lang: Language) -> (usize, bool, bool, bool) {
831    let mut complexity = 1;
832    let mut has_loops = false;
833    let mut has_branches = false;
834    let mut has_error_handling = false;
835
836    fn visit(
837        node: Node,
838        complexity: &mut usize,
839        loops: &mut bool,
840        branches: &mut bool,
841        errors: &mut bool,
842    ) {
843        match node.kind() {
844            // Branches
845            "if_statement"
846            | "if_expression"
847            | "match_expression"
848            | "match_statement"
849            | "switch_statement"
850            | "case_statement"
851            | "conditional_expression"
852            | "ternary_expression"
853            | "if"
854            | "unless"
855            | "when" => {
856                *complexity += 1;
857                *branches = true;
858            }
859            // Loops
860            "for_statement" | "for_expression" | "while_statement" | "while_expression"
861            | "loop_expression" | "for_in_statement" | "foreach_statement" | "do_statement"
862            | "for" | "while" | "until" => {
863                *complexity += 1;
864                *loops = true;
865            }
866            // Error handling
867            "try_statement" | "try_expression" | "catch_clause" | "rescue" | "except_clause"
868            | "try" => {
869                *errors = true;
870            }
871            // Rust-specific error handling patterns
872            "?" | "try_operator" => {
873                *errors = true;
874            }
875            _ => {}
876        }
877        for child in node.children(&mut node.walk()) {
878            visit(child, complexity, loops, branches, errors);
879        }
880    }
881
882    visit(
883        node,
884        &mut complexity,
885        &mut has_loops,
886        &mut has_branches,
887        &mut has_error_handling,
888    );
889    (complexity, has_loops, has_branches, has_error_handling)
890}
891
892fn extract_variables(node: Node, bytes: &[u8], lang: Language) -> Vec<String> {
893    let mut vars = Vec::new();
894    let var_types: &[&str] = match lang {
895        Language::Python => &["assignment", "named_expression", "augmented_assignment"],
896        Language::Rust => &["let_declaration"],
897        Language::TypeScript | Language::JavaScript => {
898            &["variable_declarator", "lexical_declaration"]
899        }
900        Language::Go => &["short_var_declaration", "var_declaration"],
901        Language::Java | Language::CSharp => &["variable_declarator", "local_variable_declaration"],
902        Language::C | Language::Cpp => &["declaration", "init_declarator"],
903        Language::Ruby => &["assignment"],
904        // Additional languages
905        Language::Kotlin => &["property_declaration", "variable_declaration"],
906        Language::Swift => &["property_declaration", "constant_declaration"],
907        Language::Scala => &["val_definition", "var_definition"],
908        Language::Php => &["simple_variable"],
909        Language::Lua => &["variable_declaration", "local_variable_declaration"],
910        Language::Elixir => &["match"],
911        Language::Haskell => &["function_binding"],
912        Language::Ocaml => &["let_binding"],
913        // Text/config formats
914        _ => return vars,
915    };
916
917    fn visit(node: Node, bytes: &[u8], var_types: &[&str], vars: &mut Vec<String>) {
918        if var_types.contains(&node.kind()) {
919            if let Some(name_node) = node
920                .child_by_field_name("left")
921                .or_else(|| node.child_by_field_name("name"))
922                .or_else(|| node.child_by_field_name("pattern"))
923                .or_else(|| node.child(0))
924            {
925                if let Ok(text) = name_node.utf8_text(bytes) {
926                    let name = text.trim();
927                    if !name.is_empty()
928                        && name.len() < 50
929                        && name
930                            .chars()
931                            .next()
932                            .map(|c| c.is_alphabetic() || c == '_')
933                            .unwrap_or(false)
934                    {
935                        vars.push(name.to_string());
936                    }
937                }
938            }
939        }
940        for child in node.children(&mut node.walk()) {
941            visit(child, bytes, var_types, vars);
942        }
943    }
944
945    visit(node, bytes, var_types, &mut vars);
946    vars.sort();
947    vars.dedup();
948    vars
949}
950
951fn extract_file_imports(node: Node, bytes: &[u8], lang: Language) -> Vec<String> {
952    let mut imports = Vec::new();
953    let import_types: &[&str] = match lang {
954        Language::Python => &["import_statement", "import_from_statement"],
955        Language::Rust => &["use_declaration"],
956        Language::TypeScript | Language::JavaScript => &["import_statement"],
957        Language::Go => &["import_declaration"],
958        Language::Java => &["import_declaration"],
959        Language::CSharp => &["using_directive"],
960        Language::C | Language::Cpp => &["preproc_include"],
961        Language::Ruby => &["call"], // require/require_relative
962        // Additional languages
963        Language::Kotlin => &["import_header"],
964        Language::Swift => &["import_declaration"],
965        Language::Scala => &["import_declaration"],
966        Language::Php => &["namespace_use_declaration"],
967        Language::Lua => &["call"],    // require
968        Language::Elixir => &["call"], // import/require/use
969        Language::Haskell => &["import"],
970        Language::Ocaml => &["open_statement"],
971        // Text/config formats
972        _ => return imports,
973    };
974
975    fn visit(
976        node: Node,
977        bytes: &[u8],
978        import_types: &[&str],
979        imports: &mut Vec<String>,
980        lang: Language,
981    ) {
982        if import_types.contains(&node.kind()) {
983            // For Ruby, check if it's actually a require call
984            if lang == Language::Ruby {
985                if let Some(name) = node.child_by_field_name("method") {
986                    if let Ok(text) = name.utf8_text(bytes) {
987                        if text != "require" && text != "require_relative" {
988                            return;
989                        }
990                    }
991                }
992            }
993
994            if let Ok(text) = node.utf8_text(bytes) {
995                // Extract module name from import statement
996                let text = text.trim();
997                // Simple extraction - get the main module name
998                let module = text
999                    .split_whitespace()
1000                    .find(|s| {
1001                        !s.starts_with("import")
1002                            && !s.starts_with("from")
1003                            && !s.starts_with("use")
1004                            && !s.starts_with("using")
1005                    })
1006                    .unwrap_or(text)
1007                    .trim_matches(|c: char| !c.is_alphanumeric() && c != '_' && c != '.')
1008                    .split("::")
1009                    .next()
1010                    .unwrap_or("")
1011                    .split('.')
1012                    .next()
1013                    .unwrap_or("");
1014
1015                if !module.is_empty() {
1016                    imports.push(module.to_string());
1017                }
1018            }
1019        }
1020        for child in node.children(&mut node.walk()) {
1021            visit(child, bytes, import_types, imports, lang);
1022        }
1023    }
1024
1025    visit(node, bytes, import_types, &mut imports, lang);
1026    imports.sort();
1027    imports.dedup();
1028    imports
1029}
1030
1031fn filter_used_imports(calls: &[String], file_imports: &[String]) -> Vec<String> {
1032    // Return imports that might be related to the calls made
1033    // This is a heuristic - we check if any import name appears in calls
1034    file_imports
1035        .iter()
1036        .filter(|import| {
1037            calls.iter().any(|call| {
1038                call.to_lowercase().contains(&import.to_lowercase())
1039                    || import.to_lowercase().contains(&call.to_lowercase())
1040            })
1041        })
1042        .cloned()
1043        .collect()
1044}
1045
1046/// Check if a language is a text/config format (not code parsed with tree-sitter) - public for testing
1047pub fn is_text_format_check(lang: Language) -> bool {
1048    is_text_format(lang)
1049}
1050
1051/// Build call graph and populate called_by for all units
1052pub fn build_call_graph(units: &mut [CodeUnit]) {
1053    use std::collections::HashMap;
1054
1055    // Build index: function_name -> indices of units with that name
1056    let mut name_to_indices: HashMap<String, Vec<usize>> = HashMap::new();
1057    for (i, unit) in units.iter().enumerate() {
1058        name_to_indices
1059            .entry(unit.name.clone())
1060            .or_default()
1061            .push(i);
1062    }
1063
1064    // Collect all calls first to avoid borrow issues
1065    let calls_map: Vec<(usize, Vec<String>)> = units
1066        .iter()
1067        .enumerate()
1068        .map(|(i, u)| (i, u.calls.clone()))
1069        .collect();
1070
1071    // For each unit, find what calls it
1072    for (caller_idx, calls) in calls_map {
1073        let caller_name = units[caller_idx].name.clone();
1074        for callee_name in calls {
1075            if let Some(indices) = name_to_indices.get(&callee_name) {
1076                for &callee_idx in indices {
1077                    if !units[callee_idx].called_by.contains(&caller_name) {
1078                        units[callee_idx].called_by.push(caller_name.clone());
1079                    }
1080                }
1081            }
1082        }
1083    }
1084}
1085
1086#[cfg(test)]
1087mod tests {
1088    use super::*;
1089    use std::path::Path;
1090
1091    // ==================== detect_language tests ====================
1092
1093    #[test]
1094    fn test_detect_language_python() {
1095        assert_eq!(
1096            detect_language(Path::new("main.py")),
1097            Some(Language::Python)
1098        );
1099        assert_eq!(
1100            detect_language(Path::new("src/utils/helper.py")),
1101            Some(Language::Python)
1102        );
1103    }
1104
1105    #[test]
1106    fn test_detect_language_rust() {
1107        assert_eq!(detect_language(Path::new("main.rs")), Some(Language::Rust));
1108        assert_eq!(
1109            detect_language(Path::new("src/lib.rs")),
1110            Some(Language::Rust)
1111        );
1112    }
1113
1114    #[test]
1115    fn test_detect_language_typescript() {
1116        assert_eq!(
1117            detect_language(Path::new("app.ts")),
1118            Some(Language::TypeScript)
1119        );
1120        assert_eq!(
1121            detect_language(Path::new("Component.tsx")),
1122            Some(Language::TypeScript)
1123        );
1124    }
1125
1126    #[test]
1127    fn test_detect_language_javascript() {
1128        assert_eq!(
1129            detect_language(Path::new("app.js")),
1130            Some(Language::JavaScript)
1131        );
1132        assert_eq!(
1133            detect_language(Path::new("Component.jsx")),
1134            Some(Language::JavaScript)
1135        );
1136        assert_eq!(
1137            detect_language(Path::new("module.mjs")),
1138            Some(Language::JavaScript)
1139        );
1140    }
1141
1142    #[test]
1143    fn test_detect_language_go() {
1144        assert_eq!(detect_language(Path::new("main.go")), Some(Language::Go));
1145    }
1146
1147    #[test]
1148    fn test_detect_language_java() {
1149        assert_eq!(
1150            detect_language(Path::new("Main.java")),
1151            Some(Language::Java)
1152        );
1153    }
1154
1155    #[test]
1156    fn test_detect_language_c() {
1157        assert_eq!(detect_language(Path::new("main.c")), Some(Language::C));
1158        assert_eq!(detect_language(Path::new("header.h")), Some(Language::C));
1159    }
1160
1161    #[test]
1162    fn test_detect_language_cpp() {
1163        assert_eq!(detect_language(Path::new("main.cpp")), Some(Language::Cpp));
1164        assert_eq!(detect_language(Path::new("main.cc")), Some(Language::Cpp));
1165        assert_eq!(detect_language(Path::new("main.cxx")), Some(Language::Cpp));
1166        assert_eq!(
1167            detect_language(Path::new("header.hpp")),
1168            Some(Language::Cpp)
1169        );
1170        assert_eq!(
1171            detect_language(Path::new("header.hxx")),
1172            Some(Language::Cpp)
1173        );
1174    }
1175
1176    #[test]
1177    fn test_detect_language_ruby() {
1178        assert_eq!(detect_language(Path::new("main.rb")), Some(Language::Ruby));
1179    }
1180
1181    #[test]
1182    fn test_detect_language_csharp() {
1183        assert_eq!(
1184            detect_language(Path::new("Program.cs")),
1185            Some(Language::CSharp)
1186        );
1187    }
1188
1189    #[test]
1190    fn test_detect_language_kotlin() {
1191        assert_eq!(
1192            detect_language(Path::new("Main.kt")),
1193            Some(Language::Kotlin)
1194        );
1195        assert_eq!(
1196            detect_language(Path::new("build.gradle.kts")),
1197            Some(Language::Kotlin)
1198        );
1199    }
1200
1201    #[test]
1202    fn test_detect_language_swift() {
1203        assert_eq!(
1204            detect_language(Path::new("App.swift")),
1205            Some(Language::Swift)
1206        );
1207    }
1208
1209    #[test]
1210    fn test_detect_language_scala() {
1211        assert_eq!(
1212            detect_language(Path::new("Main.scala")),
1213            Some(Language::Scala)
1214        );
1215        assert_eq!(
1216            detect_language(Path::new("script.sc")),
1217            Some(Language::Scala)
1218        );
1219    }
1220
1221    #[test]
1222    fn test_detect_language_php() {
1223        assert_eq!(detect_language(Path::new("index.php")), Some(Language::Php));
1224    }
1225
1226    #[test]
1227    fn test_detect_language_lua() {
1228        assert_eq!(detect_language(Path::new("init.lua")), Some(Language::Lua));
1229    }
1230
1231    #[test]
1232    fn test_detect_language_elixir() {
1233        assert_eq!(detect_language(Path::new("app.ex")), Some(Language::Elixir));
1234        assert_eq!(
1235            detect_language(Path::new("test.exs")),
1236            Some(Language::Elixir)
1237        );
1238    }
1239
1240    #[test]
1241    fn test_detect_language_haskell() {
1242        assert_eq!(
1243            detect_language(Path::new("Main.hs")),
1244            Some(Language::Haskell)
1245        );
1246    }
1247
1248    #[test]
1249    fn test_detect_language_ocaml() {
1250        assert_eq!(detect_language(Path::new("main.ml")), Some(Language::Ocaml));
1251        assert_eq!(
1252            detect_language(Path::new("main.mli")),
1253            Some(Language::Ocaml)
1254        );
1255    }
1256
1257    #[test]
1258    fn test_detect_language_markdown() {
1259        assert_eq!(
1260            detect_language(Path::new("README.md")),
1261            Some(Language::Markdown)
1262        );
1263        assert_eq!(
1264            detect_language(Path::new("docs.markdown")),
1265            Some(Language::Markdown)
1266        );
1267    }
1268
1269    #[test]
1270    fn test_detect_language_text() {
1271        assert_eq!(
1272            detect_language(Path::new("notes.txt")),
1273            Some(Language::Text)
1274        );
1275        assert_eq!(detect_language(Path::new("doc.text")), Some(Language::Text));
1276        assert_eq!(
1277            detect_language(Path::new("readme.rst")),
1278            Some(Language::Text)
1279        );
1280    }
1281
1282    #[test]
1283    fn test_detect_language_yaml() {
1284        assert_eq!(
1285            detect_language(Path::new("config.yaml")),
1286            Some(Language::Yaml)
1287        );
1288        assert_eq!(
1289            detect_language(Path::new("config.yml")),
1290            Some(Language::Yaml)
1291        );
1292    }
1293
1294    #[test]
1295    fn test_detect_language_toml() {
1296        assert_eq!(
1297            detect_language(Path::new("Cargo.toml")),
1298            Some(Language::Toml)
1299        );
1300    }
1301
1302    #[test]
1303    fn test_detect_language_json() {
1304        assert_eq!(
1305            detect_language(Path::new("package.json")),
1306            Some(Language::Json)
1307        );
1308    }
1309
1310    #[test]
1311    fn test_detect_language_shell() {
1312        assert_eq!(
1313            detect_language(Path::new("script.sh")),
1314            Some(Language::Shell)
1315        );
1316        assert_eq!(
1317            detect_language(Path::new("script.bash")),
1318            Some(Language::Shell)
1319        );
1320        assert_eq!(
1321            detect_language(Path::new("script.zsh")),
1322            Some(Language::Shell)
1323        );
1324    }
1325
1326    #[test]
1327    fn test_detect_language_powershell() {
1328        assert_eq!(
1329            detect_language(Path::new("script.ps1")),
1330            Some(Language::Powershell)
1331        );
1332    }
1333
1334    #[test]
1335    fn test_detect_language_dockerfile() {
1336        assert_eq!(
1337            detect_language(Path::new("Dockerfile")),
1338            Some(Language::Dockerfile)
1339        );
1340        assert_eq!(
1341            detect_language(Path::new("dockerfile")),
1342            Some(Language::Dockerfile)
1343        );
1344    }
1345
1346    #[test]
1347    fn test_detect_language_makefile() {
1348        assert_eq!(
1349            detect_language(Path::new("Makefile")),
1350            Some(Language::Makefile)
1351        );
1352        assert_eq!(
1353            detect_language(Path::new("makefile")),
1354            Some(Language::Makefile)
1355        );
1356        assert_eq!(
1357            detect_language(Path::new("GNUmakefile")),
1358            Some(Language::Makefile)
1359        );
1360    }
1361
1362    #[test]
1363    fn test_detect_language_asciidoc() {
1364        assert_eq!(
1365            detect_language(Path::new("doc.adoc")),
1366            Some(Language::AsciiDoc)
1367        );
1368        assert_eq!(
1369            detect_language(Path::new("doc.asciidoc")),
1370            Some(Language::AsciiDoc)
1371        );
1372    }
1373
1374    #[test]
1375    fn test_detect_language_org() {
1376        assert_eq!(detect_language(Path::new("notes.org")), Some(Language::Org));
1377    }
1378
1379    #[test]
1380    fn test_detect_language_unknown() {
1381        assert_eq!(detect_language(Path::new("file.xyz")), None);
1382        assert_eq!(detect_language(Path::new("file.unknown")), None);
1383        assert_eq!(detect_language(Path::new("no_extension")), None);
1384    }
1385
1386    #[test]
1387    fn test_detect_language_case_insensitive() {
1388        assert_eq!(
1389            detect_language(Path::new("main.PY")),
1390            Some(Language::Python)
1391        );
1392        assert_eq!(detect_language(Path::new("Main.RS")), Some(Language::Rust));
1393        assert_eq!(
1394            detect_language(Path::new("app.TS")),
1395            Some(Language::TypeScript)
1396        );
1397    }
1398
1399    // ==================== is_text_format tests ====================
1400
1401    #[test]
1402    fn test_is_text_format_true() {
1403        assert!(is_text_format(Language::Markdown));
1404        assert!(is_text_format(Language::Text));
1405        assert!(is_text_format(Language::Yaml));
1406        assert!(is_text_format(Language::Toml));
1407        assert!(is_text_format(Language::Json));
1408        assert!(is_text_format(Language::Dockerfile));
1409        assert!(is_text_format(Language::Makefile));
1410        assert!(is_text_format(Language::Shell));
1411        assert!(is_text_format(Language::Powershell));
1412        assert!(is_text_format(Language::AsciiDoc));
1413        assert!(is_text_format(Language::Org));
1414    }
1415
1416    #[test]
1417    fn test_is_text_format_false() {
1418        assert!(!is_text_format(Language::Python));
1419        assert!(!is_text_format(Language::Rust));
1420        assert!(!is_text_format(Language::TypeScript));
1421        assert!(!is_text_format(Language::JavaScript));
1422        assert!(!is_text_format(Language::Go));
1423        assert!(!is_text_format(Language::Java));
1424        assert!(!is_text_format(Language::C));
1425        assert!(!is_text_format(Language::Cpp));
1426        assert!(!is_text_format(Language::Ruby));
1427        assert!(!is_text_format(Language::CSharp));
1428        assert!(!is_text_format(Language::Kotlin));
1429        assert!(!is_text_format(Language::Swift));
1430        assert!(!is_text_format(Language::Scala));
1431        assert!(!is_text_format(Language::Php));
1432        assert!(!is_text_format(Language::Lua));
1433        assert!(!is_text_format(Language::Elixir));
1434        assert!(!is_text_format(Language::Haskell));
1435        assert!(!is_text_format(Language::Ocaml));
1436    }
1437
1438    // ==================== extract_units tests ====================
1439
1440    #[test]
1441    fn test_extract_python_function() {
1442        let source = r#"
1443def hello(name: str) -> str:
1444    """Say hello to someone."""
1445    return f"Hello, {name}!"
1446"#;
1447        let units = extract_units(Path::new("test.py"), source, Language::Python);
1448        assert_eq!(units.len(), 1);
1449        assert_eq!(units[0].name, "hello");
1450        assert_eq!(units[0].unit_type, UnitType::Function);
1451        // Note: parameter extraction depends on tree-sitter AST structure
1452        // The docstring should be extracted
1453        assert!(units[0].docstring.is_some());
1454    }
1455
1456    #[test]
1457    fn test_extract_python_class() {
1458        let source = r#"
1459class Person:
1460    """A person class."""
1461    def __init__(self, name):
1462        self.name = name
1463
1464    def greet(self):
1465        return f"Hello, I'm {self.name}"
1466"#;
1467        let units = extract_units(Path::new("test.py"), source, Language::Python);
1468        assert!(units
1469            .iter()
1470            .any(|u| u.name == "Person" && u.unit_type == UnitType::Class));
1471        assert!(units
1472            .iter()
1473            .any(|u| u.name == "__init__" && u.unit_type == UnitType::Method));
1474        assert!(units
1475            .iter()
1476            .any(|u| u.name == "greet" && u.unit_type == UnitType::Method));
1477    }
1478
1479    #[test]
1480    fn test_extract_rust_function() {
1481        let source = r#"
1482/// Adds two numbers together.
1483fn add(a: i32, b: i32) -> i32 {
1484    a + b
1485}
1486"#;
1487        let units = extract_units(Path::new("test.rs"), source, Language::Rust);
1488        assert_eq!(units.len(), 1);
1489        assert_eq!(units[0].name, "add");
1490        assert_eq!(units[0].unit_type, UnitType::Function);
1491        assert!(units[0].docstring.is_some());
1492        assert!(units[0]
1493            .docstring
1494            .as_ref()
1495            .unwrap()
1496            .contains("Adds two numbers"));
1497    }
1498
1499    #[test]
1500    fn test_extract_rust_impl() {
1501        let source = r#"
1502struct Point {
1503    x: i32,
1504    y: i32,
1505}
1506
1507impl Point {
1508    fn new(x: i32, y: i32) -> Self {
1509        Self { x, y }
1510    }
1511}
1512"#;
1513        let units = extract_units(Path::new("test.rs"), source, Language::Rust);
1514        // struct should be extracted as a Class
1515        assert!(units
1516            .iter()
1517            .any(|u| u.name == "Point" && u.unit_type == UnitType::Class));
1518        // impl block should also be extracted (impl_item is treated as class)
1519        // The function inside impl should be extracted - it may be Method or Function depending on parsing
1520        assert!(units.iter().any(|u| u.name == "new"));
1521    }
1522
1523    #[test]
1524    fn test_extract_javascript_function() {
1525        let source = r#"
1526function greet(name) {
1527    return `Hello, ${name}!`;
1528}
1529"#;
1530        let units = extract_units(Path::new("test.js"), source, Language::JavaScript);
1531        assert_eq!(units.len(), 1);
1532        assert_eq!(units[0].name, "greet");
1533        assert_eq!(units[0].unit_type, UnitType::Function);
1534    }
1535
1536    #[test]
1537    fn test_extract_typescript_class() {
1538        let source = r#"
1539class Calculator {
1540    add(a: number, b: number): number {
1541        return a + b;
1542    }
1543}
1544"#;
1545        let units = extract_units(Path::new("test.ts"), source, Language::TypeScript);
1546        assert!(units
1547            .iter()
1548            .any(|u| u.name == "Calculator" && u.unit_type == UnitType::Class));
1549        assert!(units
1550            .iter()
1551            .any(|u| u.name == "add" && u.unit_type == UnitType::Method));
1552    }
1553
1554    #[test]
1555    fn test_extract_go_function() {
1556        let source = r#"
1557package main
1558
1559func Add(a, b int) int {
1560    return a + b
1561}
1562"#;
1563        let units = extract_units(Path::new("test.go"), source, Language::Go);
1564        assert_eq!(units.len(), 1);
1565        assert_eq!(units[0].name, "Add");
1566        assert_eq!(units[0].unit_type, UnitType::Function);
1567    }
1568
1569    #[test]
1570    fn test_extract_java_class() {
1571        let source = r#"
1572public class Calculator {
1573    public int add(int a, int b) {
1574        return a + b;
1575    }
1576}
1577"#;
1578        let units = extract_units(Path::new("Test.java"), source, Language::Java);
1579        assert!(units
1580            .iter()
1581            .any(|u| u.name == "Calculator" && u.unit_type == UnitType::Class));
1582        assert!(units
1583            .iter()
1584            .any(|u| u.name == "add" && u.unit_type == UnitType::Method));
1585    }
1586
1587    #[test]
1588    fn test_extract_markdown_document() {
1589        let source = r#"# My Document
1590
1591This is a paragraph.
1592
1593## Section 1
1594
1595Some content here.
1596"#;
1597        let units = extract_units(Path::new("README.md"), source, Language::Markdown);
1598        assert_eq!(units.len(), 1);
1599        assert_eq!(units[0].name, "README");
1600        assert_eq!(units[0].unit_type, UnitType::Document);
1601    }
1602
1603    #[test]
1604    fn test_extract_empty_source() {
1605        let units = extract_units(Path::new("test.py"), "", Language::Python);
1606        assert!(units.is_empty());
1607    }
1608
1609    #[test]
1610    fn test_extract_empty_markdown() {
1611        let units = extract_units(Path::new("empty.md"), "", Language::Markdown);
1612        assert!(units.is_empty());
1613    }
1614
1615    #[test]
1616    fn test_extract_whitespace_only_markdown() {
1617        let units = extract_units(
1618            Path::new("whitespace.md"),
1619            "   \n\n   \n",
1620            Language::Markdown,
1621        );
1622        assert!(units.is_empty());
1623    }
1624
1625    // ==================== build_call_graph tests ====================
1626
1627    #[test]
1628    fn test_build_call_graph_simple() {
1629        let source = r#"
1630def caller():
1631    callee()
1632
1633def callee():
1634    pass
1635"#;
1636        let mut units = extract_units(Path::new("test.py"), source, Language::Python);
1637        build_call_graph(&mut units);
1638
1639        let caller = units.iter().find(|u| u.name == "caller").unwrap();
1640        let callee = units.iter().find(|u| u.name == "callee").unwrap();
1641
1642        assert!(caller.calls.contains(&"callee".to_string()));
1643        assert!(callee.called_by.contains(&"caller".to_string()));
1644    }
1645
1646    #[test]
1647    fn test_build_call_graph_multiple_callers() {
1648        let source = r#"
1649def helper():
1650    pass
1651
1652def caller1():
1653    helper()
1654
1655def caller2():
1656    helper()
1657"#;
1658        let mut units = extract_units(Path::new("test.py"), source, Language::Python);
1659        build_call_graph(&mut units);
1660
1661        let helper = units.iter().find(|u| u.name == "helper").unwrap();
1662        assert!(helper.called_by.contains(&"caller1".to_string()));
1663        assert!(helper.called_by.contains(&"caller2".to_string()));
1664    }
1665
1666    // ==================== control flow tests ====================
1667
1668    #[test]
1669    fn test_extract_control_flow_loops() {
1670        let source = r#"
1671def process_items(items):
1672    for item in items:
1673        print(item)
1674"#;
1675        let units = extract_units(Path::new("test.py"), source, Language::Python);
1676        assert_eq!(units.len(), 1);
1677        assert!(units[0].has_loops);
1678    }
1679
1680    #[test]
1681    fn test_extract_control_flow_branches() {
1682        let source = r#"
1683def check_value(x):
1684    if x > 0:
1685        return "positive"
1686    else:
1687        return "non-positive"
1688"#;
1689        let units = extract_units(Path::new("test.py"), source, Language::Python);
1690        assert_eq!(units.len(), 1);
1691        assert!(units[0].has_branches);
1692    }
1693
1694    #[test]
1695    fn test_extract_control_flow_error_handling() {
1696        let source = r#"
1697def safe_divide(a, b):
1698    try:
1699        return a / b
1700    except ZeroDivisionError:
1701        return None
1702"#;
1703        let units = extract_units(Path::new("test.py"), source, Language::Python);
1704        assert_eq!(units.len(), 1);
1705        assert!(units[0].has_error_handling);
1706    }
1707
1708    #[test]
1709    fn test_extract_complexity() {
1710        let source = r#"
1711def complex_function(x, y):
1712    if x > 0:
1713        if y > 0:
1714            return "both positive"
1715    return "not both positive"
1716"#;
1717        let units = extract_units(Path::new("test.py"), source, Language::Python);
1718        assert_eq!(units.len(), 1);
1719        // Base complexity (1) + 2 if statements = 3
1720        assert!(units[0].complexity >= 3);
1721    }
1722
1723    // ==================== Language::from_str tests ====================
1724
1725    #[test]
1726    fn test_language_from_str() {
1727        use std::str::FromStr;
1728
1729        assert_eq!(Language::from_str("python"), Ok(Language::Python));
1730        assert_eq!(Language::from_str("py"), Ok(Language::Python));
1731        assert_eq!(Language::from_str("PYTHON"), Ok(Language::Python));
1732
1733        assert_eq!(Language::from_str("rust"), Ok(Language::Rust));
1734        assert_eq!(Language::from_str("rs"), Ok(Language::Rust));
1735
1736        assert_eq!(Language::from_str("typescript"), Ok(Language::TypeScript));
1737        assert_eq!(Language::from_str("ts"), Ok(Language::TypeScript));
1738
1739        assert_eq!(Language::from_str("javascript"), Ok(Language::JavaScript));
1740        assert_eq!(Language::from_str("js"), Ok(Language::JavaScript));
1741
1742        assert_eq!(Language::from_str("go"), Ok(Language::Go));
1743        assert_eq!(Language::from_str("java"), Ok(Language::Java));
1744
1745        assert_eq!(Language::from_str("c"), Ok(Language::C));
1746        assert_eq!(Language::from_str("cpp"), Ok(Language::Cpp));
1747        assert_eq!(Language::from_str("c++"), Ok(Language::Cpp));
1748
1749        assert_eq!(Language::from_str("csharp"), Ok(Language::CSharp));
1750        assert_eq!(Language::from_str("c#"), Ok(Language::CSharp));
1751        assert_eq!(Language::from_str("cs"), Ok(Language::CSharp));
1752
1753        assert_eq!(Language::from_str("ruby"), Ok(Language::Ruby));
1754        assert_eq!(Language::from_str("rb"), Ok(Language::Ruby));
1755
1756        assert_eq!(
1757            Language::from_str("unknown"),
1758            Err("Unknown language: unknown".to_string())
1759        );
1760    }
1761}