Skip to main content

tldr_cli/commands/remaining/
explain.rs

1//! Explain Command - Comprehensive Function Analysis
2//!
3//! The explain command provides a complete analysis of a function including:
4//! - Signature extraction (params, return type, decorators, docstring)
5//! - Purity analysis (pure/impure/unknown with effects)
6//! - Complexity metrics (cyclomatic, blocks, edges, loops)
7//! - Call relationships (callers and callees)
8//!
9//! # Example
10//!
11//! ```bash
12//! # Analyze a function
13//! tldr explain src/utils.py calculate_total
14//!
15//! # With call graph depth
16//! tldr explain src/utils.py calculate_total --depth 3
17//!
18//! # Text output
19//! tldr explain src/utils.py calculate_total --format text
20//! ```
21
22use std::collections::HashSet;
23use std::path::PathBuf;
24
25use anyhow::Result;
26use clap::Args;
27use tree_sitter::{Node, Parser};
28
29use super::error::RemainingError;
30use super::types::{CallInfo, ComplexityInfo, ExplainReport, ParamInfo, PurityInfo, SignatureInfo};
31
32use crate::output::{OutputFormat, OutputWriter};
33use tldr_core::types::Language;
34
35// =============================================================================
36// CLI Arguments
37// =============================================================================
38
39/// Provide comprehensive function analysis.
40#[derive(Debug, Clone, Args)]
41pub struct ExplainArgs {
42    /// Source file to analyze
43    pub file: PathBuf,
44
45    /// Function name to explain
46    pub function: String,
47
48    /// Call graph depth for callers/callees
49    #[arg(long, default_value = "2")]
50    pub depth: u32,
51
52    /// Output file (stdout if not specified)
53    #[arg(long, short = 'o')]
54    pub output: Option<PathBuf>,
55}
56
57// =============================================================================
58// Constants
59// =============================================================================
60
61/// Known I/O operations that make a function impure
62const IO_OPERATIONS: &[&str] = &[
63    "print",
64    "open",
65    "read",
66    "write",
67    "readline",
68    "readlines",
69    "writelines",
70    "input",
71    "system",
72    "popen",
73    "exec",
74    "eval",
75    "request",
76    "fetch",
77    "urlopen",
78    "execute",
79    "executemany",
80    "fetchone",
81    "fetchall",
82];
83
84/// Known impure calls (non-deterministic or side-effecting)
85const IMPURE_CALLS: &[&str] = &[
86    "random",
87    "randint",
88    "choice",
89    "shuffle",
90    "sample",
91    "uniform",
92    "random.random",
93    "random.randint",
94    "random.choice",
95    "random.shuffle",
96    "time",
97    "time.time",
98    "datetime.now",
99    "datetime.datetime.now",
100    "uuid4",
101    "uuid1",
102    "uuid.uuid4",
103    "uuid.uuid1",
104    "logging.info",
105    "logging.debug",
106    "logging.warning",
107    "logging.error",
108    "os.system",
109    "os.popen",
110    "os.getenv",
111    "os.environ",
112    "os.mkdir",
113    "os.remove",
114    "requests.get",
115    "requests.post",
116    "requests.put",
117    "requests.delete",
118    "subprocess.run",
119    "subprocess.call",
120    "subprocess.Popen",
121];
122
123/// Collection mutation methods
124const COLLECTION_MUTATIONS: &[&str] = &[
125    "append",
126    "extend",
127    "insert",
128    "remove",
129    "pop",
130    "clear",
131    "update",
132    "add",
133    "discard",
134    "setdefault",
135    "sort",
136    "reverse",
137];
138
139/// Known pure builtins
140const PURE_BUILTINS: &[&str] = &[
141    "len",
142    "range",
143    "int",
144    "float",
145    "str",
146    "bool",
147    "list",
148    "dict",
149    "set",
150    "tuple",
151    "sorted",
152    "reversed",
153    "enumerate",
154    "zip",
155    "map",
156    "filter",
157    "min",
158    "max",
159    "sum",
160    "abs",
161    "round",
162    "isinstance",
163    "issubclass",
164    "type",
165    "id",
166    "hash",
167    "repr",
168    "next",
169    "iter",
170    "all",
171    "any",
172    "chr",
173    "ord",
174    "hex",
175    "oct",
176    "bin",
177    "pow",
178    "divmod",
179    "super",
180    "property",
181    "staticmethod",
182    "classmethod",
183];
184
185// =============================================================================
186// Tree-sitter Multi-Language Parsing
187// =============================================================================
188
189/// Get function node kinds for a given language
190fn get_function_node_kinds(language: Language) -> &'static [&'static str] {
191    match language {
192        Language::Python => &["function_definition", "async_function_definition"],
193        Language::TypeScript | Language::JavaScript => &[
194            "function_declaration",
195            "arrow_function",
196            "method_definition",
197            "function",
198        ],
199        Language::Go => &["function_declaration", "method_declaration"],
200        Language::Rust => &["function_item"],
201        Language::Java => &["method_declaration", "constructor_declaration"],
202        Language::Kotlin => &["function_declaration"],
203        Language::CSharp => &["method_declaration", "constructor_declaration"],
204        Language::Ruby => &["method", "singleton_method"],
205        Language::Php => &["function_definition", "method_declaration"],
206        Language::Scala => &["function_definition"],
207        Language::Swift => &["function_declaration"],
208        Language::C | Language::Cpp => &["function_definition"],
209        Language::Lua | Language::Luau => &["function_declaration", "function_definition"],
210        Language::Elixir => &["call"], // Elixir def/defp are call nodes
211        Language::Ocaml => &["value_definition"],
212    }
213}
214
215/// Initialize tree-sitter parser for the detected language
216fn get_parser(language: Language) -> Result<Parser, RemainingError> {
217    let mut parser = Parser::new();
218
219    let ts_language = match language {
220        Language::Python => tree_sitter_python::LANGUAGE.into(),
221        Language::TypeScript => tree_sitter_typescript::LANGUAGE_TSX.into(),
222        Language::JavaScript => tree_sitter_typescript::LANGUAGE_TSX.into(),
223        Language::Go => tree_sitter_go::LANGUAGE.into(),
224        Language::Rust => tree_sitter_rust::LANGUAGE.into(),
225        Language::Java => tree_sitter_java::LANGUAGE.into(),
226        Language::C => tree_sitter_c::LANGUAGE.into(),
227        Language::Cpp => tree_sitter_cpp::LANGUAGE.into(),
228        Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
229        Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(),
230        Language::Scala => tree_sitter_scala::LANGUAGE.into(),
231        Language::Php => tree_sitter_php::LANGUAGE_PHP.into(),
232        Language::Ruby => tree_sitter_ruby::LANGUAGE.into(),
233        Language::Lua => tree_sitter_lua::LANGUAGE.into(),
234        Language::Luau => tree_sitter_luau::LANGUAGE.into(),
235        Language::Elixir => tree_sitter_elixir::LANGUAGE.into(),
236        Language::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
237        Language::Swift => tree_sitter_swift::LANGUAGE.into(),
238    };
239
240    parser.set_language(&ts_language).map_err(|e| {
241        RemainingError::parse_error(PathBuf::new(), format!("Failed to set language: {}", e))
242    })?;
243    Ok(parser)
244}
245
246/// Get text for a node from source
247fn node_text<'a>(node: Node, source: &'a [u8]) -> &'a str {
248    node.utf8_text(source).unwrap_or("")
249}
250
251/// Get the line number (1-indexed) for a node
252fn get_line_number(node: Node) -> u32 {
253    node.start_position().row as u32 + 1
254}
255
256/// Get the end line number (1-indexed) for a node
257fn get_end_line_number(node: Node) -> u32 {
258    node.end_position().row as u32 + 1
259}
260
261// =============================================================================
262// Function Finding
263// =============================================================================
264
265/// Find a function definition by name in the AST
266fn find_function_node<'a>(
267    root: Node<'a>,
268    source: &[u8],
269    function_name: &str,
270    func_kinds: &[&str],
271) -> Option<Node<'a>> {
272    find_function_recursive(root, source, function_name, func_kinds)
273}
274
275fn find_function_recursive<'a>(
276    node: Node<'a>,
277    source: &[u8],
278    function_name: &str,
279    func_kinds: &[&str],
280) -> Option<Node<'a>> {
281    if func_kinds.contains(&node.kind()) {
282        // Check if this function has the name we're looking for
283        // Try field name first (most reliable)
284        if let Some(name_node) = node.child_by_field_name("name") {
285            let name = node_text(name_node, source);
286            if name == function_name {
287                return Some(node);
288            }
289        }
290        // C/C++: function_definition -> declarator -> function_declarator -> identifier
291        if let Some(declarator) = node.child_by_field_name("declarator") {
292            if let Some(name) = extract_c_declarator_name_explain(declarator, source) {
293                if name == function_name {
294                    return Some(node);
295                }
296            }
297        }
298        // Fallback: search for identifier child (Python, etc.)
299        for child in node.children(&mut node.walk()) {
300            if child.kind() == "identifier" {
301                let name = node_text(child, source);
302                if name == function_name {
303                    return Some(node);
304                }
305                break;
306            }
307        }
308    }
309
310    // Check for arrow functions in variable declarations (TS/JS pattern):
311    // lexical_declaration / variable_declaration -> variable_declarator -> name + value(arrow_function)
312    if matches!(node.kind(), "lexical_declaration" | "variable_declaration") {
313        let mut cursor = node.walk();
314        for child in node.children(&mut cursor) {
315            if child.kind() == "variable_declarator" {
316                if let Some(name_node) = child.child_by_field_name("name") {
317                    let var_name = node_text(name_node, source);
318                    if var_name == function_name {
319                        if let Some(value_node) = child.child_by_field_name("value") {
320                            if matches!(
321                                value_node.kind(),
322                                "arrow_function"
323                                    | "function"
324                                    | "function_expression"
325                                    | "generator_function"
326                            ) {
327                                return Some(value_node);
328                            }
329                        }
330                    }
331                }
332            }
333        }
334    }
335
336    // Elixir: def/defp are `call` nodes where the first child identifier is "def"/"defp"
337    // and the function name is in the arguments
338    if node.kind() == "call" && func_kinds.contains(&"call") {
339        for child in node.children(&mut node.walk()) {
340            if child.kind() == "identifier" {
341                let text = node_text(child, source);
342                if text == "def" || text == "defp" {
343                    if let Some(args) = child.next_sibling() {
344                        if args.kind() == "arguments" || args.kind() == "call" {
345                            if let Some(name_node) = args.child(0) {
346                                let fname = if name_node.kind() == "call" {
347                                    name_node
348                                        .child(0)
349                                        .map(|n| node_text(n, source))
350                                        .unwrap_or("")
351                                } else {
352                                    node_text(name_node, source)
353                                };
354                                if fname == function_name {
355                                    return Some(node);
356                                }
357                            }
358                        }
359                    }
360                }
361            }
362        }
363    }
364
365    // OCaml: value_definition -> let_binding -> pattern field contains the function name
366    if node.kind() == "value_definition" {
367        for child in node.children(&mut node.walk()) {
368            if child.kind() == "let_binding" {
369                if let Some(pattern_node) = child.child_by_field_name("pattern") {
370                    let name = node_text(pattern_node, source);
371                    if name == function_name {
372                        return Some(node);
373                    }
374                }
375            }
376        }
377    }
378
379    // Recurse into children
380    for child in node.children(&mut node.walk()) {
381        if let Some(found) = find_function_recursive(child, source, function_name, func_kinds) {
382            return Some(found);
383        }
384    }
385
386    None
387}
388
389/// Recursively extract function name from C/C++ nested declarator chain
390fn extract_c_declarator_name_explain(declarator: Node, source: &[u8]) -> Option<String> {
391    match declarator.kind() {
392        "identifier" | "field_identifier" => {
393            let name = node_text(declarator, source).to_string();
394            if !name.is_empty() {
395                Some(name)
396            } else {
397                None
398            }
399        }
400        "function_declarator"
401        | "pointer_declarator"
402        | "reference_declarator"
403        | "parenthesized_declarator" => declarator
404            .child_by_field_name("declarator")
405            .and_then(|inner| extract_c_declarator_name_explain(inner, source)),
406        _ => None,
407    }
408}
409
410// =============================================================================
411// Signature Extraction
412// =============================================================================
413
414/// Extract signature information from a function node
415fn extract_signature(func_node: Node, source: &[u8], language: Language) -> SignatureInfo {
416    let mut sig = SignatureInfo::new();
417
418    // Check if async (language-specific)
419    sig.is_async = match language {
420        Language::Python => func_node.kind() == "async_function_definition",
421        Language::TypeScript | Language::JavaScript => {
422            // Check for async modifier
423            let mut is_async = false;
424            for child in func_node.children(&mut func_node.walk()) {
425                if child.kind() == "async" {
426                    is_async = true;
427                    break;
428                }
429            }
430            is_async
431        }
432        Language::Rust => {
433            // Check for async keyword
434            node_text(func_node, source).contains("async")
435        }
436        _ => false,
437    };
438
439    // Extract parameters
440    if let Some(params_node) = func_node.child_by_field_name("parameters") {
441        sig.params = extract_params(params_node, source);
442    }
443
444    // Extract return type
445    if let Some(return_node) = func_node.child_by_field_name("return_type") {
446        sig.return_type = Some(node_text(return_node, source).to_string());
447    }
448
449    // Extract decorators (look for decorated_definition parent or decorator children)
450    sig.decorators = extract_decorators(func_node, source);
451
452    // Extract docstring
453    sig.docstring = extract_docstring(func_node, source);
454
455    sig
456}
457
458/// Extract parameters from a parameters node
459fn extract_params(params_node: Node, source: &[u8]) -> Vec<ParamInfo> {
460    let mut params = Vec::new();
461
462    for child in params_node.children(&mut params_node.walk()) {
463        match child.kind() {
464            "identifier" => {
465                // Simple parameter without annotation
466                let name = node_text(child, source);
467                if name != "self" && name != "cls" {
468                    params.push(ParamInfo::new(name));
469                }
470            }
471            "typed_parameter" | "typed_default_parameter" => {
472                // Parameter with type annotation
473                let mut param = ParamInfo::new("");
474                for part in child.children(&mut child.walk()) {
475                    match part.kind() {
476                        "identifier" => {
477                            let name = node_text(part, source);
478                            if name != "self" && name != "cls" && param.name.is_empty() {
479                                param.name = name.to_string();
480                            }
481                        }
482                        "type" => {
483                            param.type_hint = Some(node_text(part, source).to_string());
484                        }
485                        _ => {}
486                    }
487                }
488                // Only add if we got a name
489                if !param.name.is_empty() {
490                    params.push(param);
491                }
492            }
493            "default_parameter" => {
494                // Parameter with default value
495                let mut param = ParamInfo::new("");
496                let mut got_name = false;
497                for part in child.children(&mut child.walk()) {
498                    if part.kind() == "identifier" && !got_name {
499                        let name = node_text(part, source);
500                        if name != "self" && name != "cls" {
501                            param.name = name.to_string();
502                            got_name = true;
503                        }
504                    } else if got_name && param.default.is_none() && part.kind() != "=" {
505                        param.default = Some(node_text(part, source).to_string());
506                    }
507                }
508                if !param.name.is_empty() {
509                    params.push(param);
510                }
511            }
512            _ => {}
513        }
514    }
515
516    params
517}
518
519/// Extract decorators
520fn extract_decorators(func_node: Node, source: &[u8]) -> Vec<String> {
521    let mut decorators = Vec::new();
522
523    // Check if parent is decorated_definition
524    if let Some(parent) = func_node.parent() {
525        if parent.kind() == "decorated_definition" {
526            for child in parent.children(&mut parent.walk()) {
527                if child.kind() == "decorator" {
528                    let text = node_text(child, source);
529                    decorators.push(text.trim_start_matches('@').to_string());
530                }
531            }
532        }
533    }
534
535    decorators
536}
537
538/// Extract docstring from function body
539fn extract_docstring(func_node: Node, source: &[u8]) -> Option<String> {
540    // Look for the function body (block)
541    if let Some(body) = func_node.child_by_field_name("body") {
542        // First statement in body might be a docstring
543        if let Some(first_stmt) = body.child(0) {
544            if first_stmt.kind() == "expression_statement" {
545                if let Some(expr) = first_stmt.child(0) {
546                    if expr.kind() == "string" {
547                        let text = node_text(expr, source);
548                        // Remove quotes
549                        let cleaned = text
550                            .trim_start_matches("\"\"\"")
551                            .trim_start_matches("'''")
552                            .trim_start_matches('"')
553                            .trim_start_matches('\'')
554                            .trim_end_matches("\"\"\"")
555                            .trim_end_matches("'''")
556                            .trim_end_matches('"')
557                            .trim_end_matches('\'')
558                            .trim();
559                        return Some(cleaned.to_string());
560                    }
561                }
562            }
563        }
564    }
565    None
566}
567
568// =============================================================================
569// Purity Analysis
570// =============================================================================
571
572/// Analyze purity of a function
573fn analyze_purity(func_node: Node, source: &[u8]) -> PurityInfo {
574    let mut effects = Vec::new();
575    let mut has_unknown_calls = false;
576    let mut has_any_calls = false;
577
578    analyze_purity_recursive(
579        func_node,
580        source,
581        &mut effects,
582        &mut has_unknown_calls,
583        &mut has_any_calls,
584    );
585
586    if !effects.is_empty() {
587        // Has side effects -> impure
588        PurityInfo::impure(effects)
589    } else if has_unknown_calls {
590        // No known side effects, but calls unknown functions -> unknown
591        PurityInfo::unknown().with_confidence("medium")
592    } else if has_any_calls {
593        // All calls resolved to known-pure builtins -> pure
594        PurityInfo::pure()
595    } else {
596        // No calls detected at all (empty body or pure computation like a+b).
597        // Absence of evidence is not evidence of purity — classify as unknown
598        // with low confidence since we have nothing to base a purity claim on.
599        PurityInfo::unknown().with_confidence("low")
600    }
601}
602
603fn analyze_purity_recursive(
604    node: Node,
605    source: &[u8],
606    effects: &mut Vec<String>,
607    has_unknown_calls: &mut bool,
608    has_any_calls: &mut bool,
609) {
610    match node.kind() {
611        "global_statement" | "nonlocal_statement" => {
612            if !effects.contains(&"global_write".to_string()) {
613                effects.push("global_write".to_string());
614            }
615        }
616        "assignment" | "augmented_assignment" => {
617            // Check for attribute writes (self.x = ...)
618            if let Some(left) = node.child_by_field_name("left") {
619                if left.kind() == "attribute" && !effects.contains(&"attribute_write".to_string()) {
620                    effects.push("attribute_write".to_string());
621                }
622            }
623        }
624        "call" => {
625            *has_any_calls = true;
626            let call_name = extract_call_name(node, source);
627            if let Some(name) = &call_name {
628                // Check for I/O operations
629                for &io_op in IO_OPERATIONS {
630                    if name == io_op || name.ends_with(&format!(".{}", io_op)) {
631                        if !effects.contains(&"io".to_string()) {
632                            effects.push("io".to_string());
633                        }
634                        return;
635                    }
636                }
637
638                // Check for impure calls
639                for &impure in IMPURE_CALLS {
640                    if name == impure || name.ends_with(impure) {
641                        if !effects.contains(&"io".to_string()) {
642                            effects.push("io".to_string());
643                        }
644                        return;
645                    }
646                }
647
648                // Check for collection mutations
649                let method_name = name.split('.').next_back().unwrap_or(name);
650                for &mutation in COLLECTION_MUTATIONS {
651                    if method_name == mutation {
652                        if !effects.contains(&"collection_modify".to_string()) {
653                            effects.push("collection_modify".to_string());
654                        }
655                        return;
656                    }
657                }
658
659                // Check if it's a known pure builtin
660                let base = name.split('.').next_back().unwrap_or(name);
661                if !PURE_BUILTINS.contains(&name.as_str()) && !PURE_BUILTINS.contains(&base) {
662                    *has_unknown_calls = true;
663                }
664            }
665        }
666        _ => {}
667    }
668
669    // Recurse into children
670    for child in node.children(&mut node.walk()) {
671        analyze_purity_recursive(child, source, effects, has_unknown_calls, has_any_calls);
672    }
673}
674
675/// Extract call name from a call node
676fn extract_call_name(node: Node, source: &[u8]) -> Option<String> {
677    if let Some(func) = node.child_by_field_name("function") {
678        return Some(extract_name_from_expr(func, source));
679    }
680
681    for child in node.children(&mut node.walk()) {
682        match child.kind() {
683            "identifier" => return Some(node_text(child, source).to_string()),
684            "attribute" => return Some(extract_name_from_expr(child, source)),
685            _ => continue,
686        }
687    }
688    None
689}
690
691/// Extract a dotted name from an expression
692fn extract_name_from_expr(node: Node, source: &[u8]) -> String {
693    match node.kind() {
694        "identifier" => node_text(node, source).to_string(),
695        "attribute" => {
696            let mut parts = Vec::new();
697            let mut current = node;
698
699            loop {
700                if let Some(attr) = current.child_by_field_name("attribute") {
701                    parts.push(node_text(attr, source).to_string());
702                }
703
704                if let Some(obj) = current.child_by_field_name("object") {
705                    if obj.kind() == "attribute" {
706                        current = obj;
707                    } else if obj.kind() == "identifier" {
708                        parts.push(node_text(obj, source).to_string());
709                        break;
710                    } else {
711                        break;
712                    }
713                } else {
714                    break;
715                }
716            }
717
718            parts.reverse();
719            parts.join(".")
720        }
721        _ => node_text(node, source).to_string(),
722    }
723}
724
725// =============================================================================
726// Complexity Analysis
727// =============================================================================
728
729/// Compute complexity metrics for a function
730fn compute_complexity(func_node: Node) -> ComplexityInfo {
731    let mut cyclomatic = 1; // Base complexity
732    let mut num_blocks = 1;
733    let mut num_edges = 0;
734    let mut has_loops = false;
735
736    count_complexity_recursive(
737        func_node,
738        &mut cyclomatic,
739        &mut num_blocks,
740        &mut num_edges,
741        &mut has_loops,
742    );
743
744    ComplexityInfo::new(cyclomatic, num_blocks, num_edges, has_loops)
745}
746
747fn count_complexity_recursive(
748    node: Node,
749    cyclomatic: &mut u32,
750    num_blocks: &mut u32,
751    num_edges: &mut u32,
752    has_loops: &mut bool,
753) {
754    match node.kind() {
755        "if_statement" | "elif_clause" => {
756            *cyclomatic += 1;
757            *num_blocks += 1;
758            *num_edges += 2;
759        }
760        "for_statement" | "while_statement" => {
761            *cyclomatic += 1;
762            *num_blocks += 1;
763            *num_edges += 2;
764            *has_loops = true;
765        }
766        "try_statement" => {
767            *cyclomatic += 1;
768            *num_blocks += 1;
769            *num_edges += 1;
770        }
771        "except_clause" => {
772            *cyclomatic += 1;
773            *num_blocks += 1;
774            *num_edges += 1;
775        }
776        "and_operator" | "or_operator" => {
777            *cyclomatic += 1;
778        }
779        "conditional_expression" => {
780            // Ternary: x if cond else y
781            *cyclomatic += 1;
782            *num_edges += 1;
783        }
784        "list_comprehension"
785        | "set_comprehension"
786        | "dictionary_comprehension"
787        | "generator_expression" => {
788            *cyclomatic += 1;
789            *has_loops = true;
790        }
791        _ => {}
792    }
793
794    for child in node.children(&mut node.walk()) {
795        count_complexity_recursive(child, cyclomatic, num_blocks, num_edges, has_loops);
796    }
797}
798
799// =============================================================================
800// Call Graph Analysis
801// =============================================================================
802
803/// Find callees (functions called by this function)
804fn find_callees(
805    func_node: Node,
806    source: &[u8],
807    file_path: &str,
808    local_functions: &HashSet<String>,
809) -> Vec<CallInfo> {
810    let mut callees = Vec::new();
811    find_callees_recursive(func_node, source, file_path, local_functions, &mut callees);
812    callees
813}
814
815fn find_callees_recursive(
816    node: Node,
817    source: &[u8],
818    file_path: &str,
819    local_functions: &HashSet<String>,
820    callees: &mut Vec<CallInfo>,
821) {
822    if node.kind() == "call" {
823        if let Some(name) = extract_call_name(node, source) {
824            // Get base name for local function check
825            let base_name = name.split('.').next().unwrap_or(&name);
826
827            // Add if it's a local function or a known call
828            let file = if local_functions.contains(base_name) {
829                file_path.to_string()
830            } else {
831                "<external>".to_string()
832            };
833
834            // Avoid duplicates
835            if !callees.iter().any(|c| c.name == name) {
836                callees.push(CallInfo::new(name, file, get_line_number(node)));
837            }
838        }
839    }
840
841    for child in node.children(&mut node.walk()) {
842        find_callees_recursive(child, source, file_path, local_functions, callees);
843    }
844}
845
846/// Find callers (functions that call this function) - searches the entire file
847fn find_callers(
848    root: Node,
849    source: &[u8],
850    target_function: &str,
851    file_path: &str,
852    func_kinds: &[&str],
853) -> Vec<CallInfo> {
854    let mut callers = Vec::new();
855    find_callers_in_file(
856        root,
857        source,
858        target_function,
859        file_path,
860        &mut callers,
861        None,
862        func_kinds,
863    );
864    callers
865}
866
867fn find_callers_in_file(
868    node: Node,
869    source: &[u8],
870    target_function: &str,
871    file_path: &str,
872    callers: &mut Vec<CallInfo>,
873    current_function: Option<&str>,
874    func_kinds: &[&str],
875) {
876    if func_kinds.contains(&node.kind()) {
877        // Get this function's name
878        let mut func_name = None;
879
880        // Try field name first
881        if let Some(name_node) = node.child_by_field_name("name") {
882            func_name = Some(node_text(name_node, source));
883        } else {
884            // Fallback: search for identifier child
885            for child in node.children(&mut node.walk()) {
886                if child.kind() == "identifier" {
887                    func_name = Some(node_text(child, source));
888                    break;
889                }
890            }
891        }
892
893        // Recurse with this function as current
894        for child in node.children(&mut node.walk()) {
895            find_callers_in_file(
896                child,
897                source,
898                target_function,
899                file_path,
900                callers,
901                func_name,
902                func_kinds,
903            );
904        }
905        return;
906    } else if node.kind() == "call" {
907        if let Some(name) = extract_call_name(node, source) {
908            // Check if this call is to our target function
909            let base = name.split('.').next_back().unwrap_or(&name);
910            if base == target_function || name == target_function {
911                if let Some(caller_name) = current_function {
912                    // Avoid duplicates and self-references
913                    if caller_name != target_function
914                        && !callers.iter().any(|c| c.name == caller_name)
915                    {
916                        callers.push(CallInfo::new(caller_name, file_path, get_line_number(node)));
917                    }
918                }
919            }
920        }
921    }
922
923    for child in node.children(&mut node.walk()) {
924        find_callers_in_file(
925            child,
926            source,
927            target_function,
928            file_path,
929            callers,
930            current_function,
931            func_kinds,
932        );
933    }
934}
935
936/// Collect all function names in a file
937fn collect_function_names(root: Node, source: &[u8], func_kinds: &[&str]) -> HashSet<String> {
938    let mut names = HashSet::new();
939    collect_function_names_recursive(root, source, &mut names, func_kinds);
940    names
941}
942
943fn collect_function_names_recursive(
944    node: Node,
945    source: &[u8],
946    names: &mut HashSet<String>,
947    func_kinds: &[&str],
948) {
949    if func_kinds.contains(&node.kind()) {
950        // Try field name first
951        if let Some(name_node) = node.child_by_field_name("name") {
952            names.insert(node_text(name_node, source).to_string());
953        } else {
954            // Fallback: search for identifier child
955            for child in node.children(&mut node.walk()) {
956                if child.kind() == "identifier" {
957                    names.insert(node_text(child, source).to_string());
958                    break;
959                }
960            }
961        }
962    }
963
964    for child in node.children(&mut node.walk()) {
965        collect_function_names_recursive(child, source, names, func_kinds);
966    }
967}
968
969// =============================================================================
970// Text Formatting
971// =============================================================================
972
973/// Format an ExplainReport as human-readable text
974fn format_explain_text(report: &ExplainReport) -> String {
975    let mut lines = Vec::new();
976
977    lines.push(format!("Function: {}", report.function_name));
978    lines.push(format!("File: {}", report.file));
979    lines.push(format!("Lines: {}-{}", report.line_start, report.line_end));
980    lines.push(format!("Language: {}", report.language));
981    lines.push(String::new());
982
983    // Signature
984    lines.push("Signature:".to_string());
985    if report.signature.is_async {
986        lines.push("  async: yes".to_string());
987    }
988    lines.push(format!("  Parameters: {}", report.signature.params.len()));
989    for param in &report.signature.params {
990        let type_str = param.type_hint.as_deref().unwrap_or("untyped");
991        lines.push(format!("    - {}: {}", param.name, type_str));
992    }
993    if let Some(ref ret) = report.signature.return_type {
994        lines.push(format!("  Returns: {}", ret));
995    }
996    if !report.signature.decorators.is_empty() {
997        lines.push(format!(
998            "  Decorators: {}",
999            report.signature.decorators.join(", ")
1000        ));
1001    }
1002    if let Some(ref doc) = report.signature.docstring {
1003        let preview = if doc.len() > 100 {
1004            format!("{}...", &doc[..100])
1005        } else {
1006            doc.clone()
1007        };
1008        lines.push(format!("  Docstring: {}", preview));
1009    }
1010    lines.push(String::new());
1011
1012    // Purity
1013    lines.push("Purity:".to_string());
1014    lines.push(format!(
1015        "  Classification: {}",
1016        report.purity.classification
1017    ));
1018    lines.push(format!("  Confidence: {}", report.purity.confidence));
1019    if !report.purity.effects.is_empty() {
1020        lines.push(format!("  Effects: {}", report.purity.effects.join(", ")));
1021    }
1022    lines.push(String::new());
1023
1024    // Complexity
1025    if let Some(ref cx) = report.complexity {
1026        lines.push("Complexity:".to_string());
1027        lines.push(format!("  Cyclomatic: {}", cx.cyclomatic));
1028        lines.push(format!("  Blocks: {}", cx.num_blocks));
1029        lines.push(format!("  Edges: {}", cx.num_edges));
1030        lines.push(format!("  Has loops: {}", cx.has_loops));
1031        lines.push(String::new());
1032    }
1033
1034    // Callers
1035    if !report.callers.is_empty() {
1036        lines.push(format!("Callers ({}):", report.callers.len()));
1037        for caller in &report.callers {
1038            lines.push(format!(
1039                "  - {} ({}:{})",
1040                caller.name, caller.file, caller.line
1041            ));
1042        }
1043        lines.push(String::new());
1044    }
1045
1046    // Callees
1047    if !report.callees.is_empty() {
1048        lines.push(format!("Callees ({}):", report.callees.len()));
1049        for callee in &report.callees {
1050            lines.push(format!(
1051                "  - {} ({}:{})",
1052                callee.name, callee.file, callee.line
1053            ));
1054        }
1055    }
1056
1057    lines.join("\n")
1058}
1059
1060// =============================================================================
1061// Entry Point
1062// =============================================================================
1063
1064impl ExplainArgs {
1065    /// Run the explain command
1066    pub fn run(&self, format: OutputFormat, quiet: bool) -> Result<()> {
1067        let writer = OutputWriter::new(format, quiet);
1068
1069        writer.progress(&format!(
1070            "Analyzing function {} in {}...",
1071            self.function,
1072            self.file.display()
1073        ));
1074
1075        // Check file exists
1076        if !self.file.exists() {
1077            return Err(RemainingError::file_not_found(&self.file).into());
1078        }
1079
1080        // Detect language from file extension
1081        let language = Language::from_path(&self.file)
1082            .ok_or_else(|| RemainingError::parse_error(&self.file, "Unsupported language"))?;
1083
1084        // Get function node kinds for this language
1085        let func_kinds = get_function_node_kinds(language);
1086
1087        // Read source
1088        let source = std::fs::read_to_string(&self.file)
1089            .map_err(|e| RemainingError::parse_error(&self.file, e.to_string()))?;
1090        let source_bytes = source.as_bytes();
1091
1092        // Parse with tree-sitter
1093        let mut parser = get_parser(language)?;
1094        let tree = parser
1095            .parse(&source, None)
1096            .ok_or_else(|| RemainingError::parse_error(&self.file, "Failed to parse file"))?;
1097
1098        let root = tree.root_node();
1099
1100        // Find the function
1101        let func_node = find_function_node(root, source_bytes, &self.function, func_kinds)
1102            .ok_or_else(|| RemainingError::symbol_not_found(&self.function, &self.file))?;
1103
1104        // Get file path string
1105        let file_path = self.file.to_string_lossy().to_string();
1106
1107        // Get language name for report
1108        let language_name = match language {
1109            Language::Python => "python",
1110            Language::TypeScript => "typescript",
1111            Language::JavaScript => "javascript",
1112            Language::Go => "go",
1113            Language::Rust => "rust",
1114            Language::Java => "java",
1115            Language::C => "c",
1116            Language::Cpp => "cpp",
1117            Language::CSharp => "csharp",
1118            Language::Kotlin => "kotlin",
1119            Language::Scala => "scala",
1120            Language::Php => "php",
1121            Language::Ruby => "ruby",
1122            Language::Lua => "lua",
1123            Language::Luau => "luau",
1124            Language::Elixir => "elixir",
1125            Language::Ocaml => "ocaml",
1126            Language::Swift => "swift",
1127        };
1128
1129        // Build report
1130        let mut report = ExplainReport::new(
1131            &self.function,
1132            &file_path,
1133            get_line_number(func_node),
1134            get_end_line_number(func_node),
1135            language_name,
1136        );
1137
1138        // Extract signature
1139        report.signature = extract_signature(func_node, source_bytes, language);
1140
1141        // Analyze purity
1142        report.purity = analyze_purity(func_node, source_bytes);
1143
1144        // Compute complexity
1145        report.complexity = Some(compute_complexity(func_node));
1146
1147        // Collect local function names for call graph analysis
1148        let local_functions = collect_function_names(root, source_bytes, func_kinds);
1149
1150        // Find callees
1151        report.callees = find_callees(func_node, source_bytes, &file_path, &local_functions);
1152
1153        // Find callers
1154        report.callers = find_callers(root, source_bytes, &self.function, &file_path, func_kinds);
1155
1156        // Output based on format
1157        if writer.is_text() {
1158            let text = format_explain_text(&report);
1159            writer.write_text(&text)?;
1160        } else {
1161            writer.write(&report)?;
1162        }
1163
1164        // Write to output file if specified
1165        if let Some(ref output_path) = self.output {
1166            let output_str = if format == OutputFormat::Text {
1167                format_explain_text(&report)
1168            } else {
1169                serde_json::to_string_pretty(&report)?
1170            };
1171            std::fs::write(output_path, &output_str)?;
1172        }
1173
1174        Ok(())
1175    }
1176}
1177
1178// =============================================================================
1179// Tests
1180// =============================================================================
1181
1182#[cfg(test)]
1183mod tests {
1184    use super::*;
1185
1186    const SAMPLE_CODE: &str = r#"
1187def calculate_total(items: list[dict], tax_rate: float = 0.1) -> float:
1188    """Calculate total price with tax.
1189
1190    Args:
1191        items: List of items with 'price' key
1192        tax_rate: Tax rate as decimal (default 10%)
1193
1194    Returns:
1195        Total price including tax
1196    """
1197    subtotal = sum(item['price'] for item in items)
1198    return subtotal * (1 + tax_rate)
1199
1200def helper_function(x):
1201    return x * 2
1202
1203def main():
1204    items = [{'price': 10}, {'price': 20}]
1205    total = calculate_total(items)
1206    doubled = helper_function(total)
1207    print(doubled)
1208"#;
1209
1210    #[test]
1211    fn test_find_function() {
1212        let language = Language::Python;
1213        let func_kinds = get_function_node_kinds(language);
1214        let mut parser = get_parser(language).unwrap();
1215        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
1216        let root = tree.root_node();
1217
1218        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "calculate_total", func_kinds);
1219        assert!(func.is_some());
1220
1221        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "nonexistent", func_kinds);
1222        assert!(func.is_none());
1223    }
1224
1225    #[test]
1226    fn test_extract_signature() {
1227        let language = Language::Python;
1228        let func_kinds = get_function_node_kinds(language);
1229        let mut parser = get_parser(language).unwrap();
1230        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
1231        let root = tree.root_node();
1232
1233        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "calculate_total", func_kinds)
1234            .unwrap();
1235        let sig = extract_signature(func, SAMPLE_CODE.as_bytes(), language);
1236
1237        assert_eq!(sig.params.len(), 2);
1238        assert_eq!(sig.params[0].name, "items");
1239        assert_eq!(sig.params[1].name, "tax_rate");
1240        assert!(sig.return_type.is_some());
1241        assert!(sig.docstring.is_some());
1242    }
1243
1244    #[test]
1245    fn test_purity_analysis() {
1246        let language = Language::Python;
1247        let func_kinds = get_function_node_kinds(language);
1248        let mut parser = get_parser(language).unwrap();
1249        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
1250        let root = tree.root_node();
1251
1252        // calculate_total should be pure
1253        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "calculate_total", func_kinds)
1254            .unwrap();
1255        let purity = analyze_purity(func, SAMPLE_CODE.as_bytes());
1256        assert_eq!(purity.classification, "pure");
1257
1258        // main calls print, so impure
1259        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "main", func_kinds).unwrap();
1260        let purity = analyze_purity(func, SAMPLE_CODE.as_bytes());
1261        assert_eq!(purity.classification, "impure");
1262        assert!(purity.effects.contains(&"io".to_string()));
1263    }
1264
1265    #[test]
1266    fn test_complexity_analysis() {
1267        let code = r#"
1268def complex_func(x, y):
1269    if x > 0:
1270        if y > 0:
1271            return x + y
1272        else:
1273            return x
1274    else:
1275        for i in range(10):
1276            x += i
1277        return x
1278"#;
1279        let language = Language::Python;
1280        let func_kinds = get_function_node_kinds(language);
1281        let mut parser = get_parser(language).unwrap();
1282        let tree = parser.parse(code, None).unwrap();
1283        let root = tree.root_node();
1284
1285        let func = find_function_node(root, code.as_bytes(), "complex_func", func_kinds).unwrap();
1286        let cx = compute_complexity(func);
1287
1288        assert!(cx.cyclomatic > 1);
1289        assert!(cx.has_loops);
1290    }
1291
1292    #[test]
1293    fn test_find_callees() {
1294        let language = Language::Python;
1295        let func_kinds = get_function_node_kinds(language);
1296        let mut parser = get_parser(language).unwrap();
1297        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
1298        let root = tree.root_node();
1299
1300        let local_funcs = collect_function_names(root, SAMPLE_CODE.as_bytes(), func_kinds);
1301        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "main", func_kinds).unwrap();
1302        let callees = find_callees(func, SAMPLE_CODE.as_bytes(), "test.py", &local_funcs);
1303
1304        assert!(callees.iter().any(|c| c.name == "calculate_total"));
1305        assert!(callees.iter().any(|c| c.name == "helper_function"));
1306    }
1307
1308    #[test]
1309    fn test_find_callers() {
1310        let language = Language::Python;
1311        let func_kinds = get_function_node_kinds(language);
1312        let mut parser = get_parser(language).unwrap();
1313        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
1314        let root = tree.root_node();
1315
1316        let callers = find_callers(
1317            root,
1318            SAMPLE_CODE.as_bytes(),
1319            "calculate_total",
1320            "test.py",
1321            func_kinds,
1322        );
1323        assert!(callers.iter().any(|c| c.name == "main"));
1324    }
1325
1326    #[test]
1327    fn test_find_ts_arrow_function() {
1328        let ts_source = r#"
1329const getDuration = (start: Date, end: Date): number => {
1330    return end.getTime() - start.getTime();
1331};
1332
1333function regularFunc(x: number): number {
1334    return x * 2;
1335}
1336
1337export const processItems = (items: string[]) => {
1338    return items.map(i => i.trim());
1339};
1340"#;
1341        let language = Language::TypeScript;
1342        let func_kinds = get_function_node_kinds(language);
1343        let mut parser = get_parser(language).unwrap();
1344        let tree = parser.parse(ts_source, None).unwrap();
1345        let root = tree.root_node();
1346
1347        // Regular function should always work
1348        let regular = find_function_node(root, ts_source.as_bytes(), "regularFunc", func_kinds);
1349        assert!(regular.is_some(), "Should find regular TS function");
1350
1351        // Arrow function assigned to const should also work
1352        let arrow = find_function_node(root, ts_source.as_bytes(), "getDuration", func_kinds);
1353        assert!(
1354            arrow.is_some(),
1355            "Should find TS arrow function 'getDuration'"
1356        );
1357
1358        // Exported arrow function should also work
1359        let exported = find_function_node(root, ts_source.as_bytes(), "processItems", func_kinds);
1360        assert!(
1361            exported.is_some(),
1362            "Should find exported TS arrow function 'processItems'"
1363        );
1364    }
1365
1366    // =========================================================================
1367    // Bug: analyze_purity returns "pure" when it should return "unknown"
1368    // =========================================================================
1369
1370    /// A function with no function body content (empty/pass) should classify
1371    /// as "unknown", not "pure". We have no evidence of purity -- the analysis
1372    /// simply found nothing.
1373    #[test]
1374    fn test_empty_function_is_unknown_not_pure() {
1375        let source = r#"
1376def empty_func():
1377    pass
1378"#;
1379        let language = Language::Python;
1380        let func_kinds = get_function_node_kinds(language);
1381        let mut parser = get_parser(language).unwrap();
1382        let tree = parser.parse(source, None).unwrap();
1383        let root = tree.root_node();
1384
1385        let func_node = find_function_node(root, source.as_bytes(), "empty_func", func_kinds);
1386        assert!(func_node.is_some(), "Should find empty_func");
1387
1388        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
1389
1390        // The buggy code returns "pure" because no effects and no unknown calls.
1391        // But "pass" means we found nothing -- not that we proved purity.
1392        // A truly empty function (just `pass`) has no evidence to support "pure".
1393        assert_ne!(
1394            purity.classification, "pure",
1395            "A function with only `pass` (no calls, no computation) should NOT be classified as \
1396             'pure' with high confidence. We have no evidence to support a purity claim. \
1397             Got classification='{}', confidence='{}'. Expected 'unknown'.",
1398            purity.classification, purity.confidence
1399        );
1400    }
1401
1402    /// A function that calls other user-defined functions (not builtins, not IO)
1403    /// where those calls are unresolved should classify as "unknown", not "pure".
1404    ///
1405    /// The bug: when a call doesn't match IO_OPERATIONS, IMPURE_CALLS,
1406    /// COLLECTION_MUTATIONS, or PURE_BUILTINS, it sets has_unknown_calls=true.
1407    /// This case is actually handled correctly for unknown calls, BUT if the
1408    /// call name happens to match a PURE_BUILTIN substring, it incorrectly
1409    /// passes as pure. This test verifies the general "unknown calls" path works.
1410    #[test]
1411    fn test_function_with_unknown_calls_is_unknown() {
1412        let source = r#"
1413def my_func(x):
1414    result = compute_something(x)
1415    return transform_result(result)
1416"#;
1417        let language = Language::Python;
1418        let func_kinds = get_function_node_kinds(language);
1419        let mut parser = get_parser(language).unwrap();
1420        let tree = parser.parse(source, None).unwrap();
1421        let root = tree.root_node();
1422
1423        let func_node = find_function_node(root, source.as_bytes(), "my_func", func_kinds);
1424        assert!(func_node.is_some(), "Should find my_func");
1425
1426        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
1427
1428        // compute_something and transform_result are NOT in PURE_BUILTINS,
1429        // so has_unknown_calls should be true -> classification = "unknown"
1430        assert_eq!(
1431            purity.classification, "unknown",
1432            "Function calling unknown user functions should be 'unknown', got '{}'",
1433            purity.classification
1434        );
1435        assert_ne!(
1436            purity.confidence, "high",
1437            "Unknown classification should not have high confidence, got '{}'",
1438            purity.confidence
1439        );
1440    }
1441
1442    /// A function that ONLY calls known-pure builtins should classify as "pure".
1443    /// This is the legitimate pure case.
1444    #[test]
1445    fn test_only_pure_builtins_is_pure() {
1446        let source = r#"
1447def pure_func(items):
1448    return len(items) + sum(items)
1449"#;
1450        let language = Language::Python;
1451        let func_kinds = get_function_node_kinds(language);
1452        let mut parser = get_parser(language).unwrap();
1453        let tree = parser.parse(source, None).unwrap();
1454        let root = tree.root_node();
1455
1456        let func_node = find_function_node(root, source.as_bytes(), "pure_func", func_kinds);
1457        assert!(func_node.is_some(), "Should find pure_func");
1458
1459        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
1460
1461        assert_eq!(
1462            purity.classification, "pure",
1463            "Function calling only pure builtins (len, sum) should be 'pure', got '{}'",
1464            purity.classification
1465        );
1466        assert_eq!(
1467            purity.confidence, "high",
1468            "Pure classification should have high confidence"
1469        );
1470    }
1471
1472    /// A function with IO operations should classify as "impure".
1473    #[test]
1474    fn test_io_operations_is_impure() {
1475        let source = r#"
1476def impure_func(msg):
1477    print(msg)
1478    return True
1479"#;
1480        let language = Language::Python;
1481        let func_kinds = get_function_node_kinds(language);
1482        let mut parser = get_parser(language).unwrap();
1483        let tree = parser.parse(source, None).unwrap();
1484        let root = tree.root_node();
1485
1486        let func_node = find_function_node(root, source.as_bytes(), "impure_func", func_kinds);
1487        assert!(func_node.is_some(), "Should find impure_func");
1488
1489        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
1490
1491        assert_eq!(
1492            purity.classification, "impure",
1493            "Function with print() should be 'impure', got '{}'",
1494            purity.classification
1495        );
1496        assert_eq!(
1497            purity.confidence, "high",
1498            "Impure classification should have high confidence"
1499        );
1500        assert!(
1501            purity.effects.contains(&"io".to_string()),
1502            "Effects should contain 'io', got {:?}",
1503            purity.effects
1504        );
1505    }
1506
1507    /// A function with only arithmetic (no calls at all) should be "unknown"
1508    /// because we have no positive evidence of purity -- the analysis simply
1509    /// didn't find any calls to classify.
1510    #[test]
1511    fn test_no_calls_arithmetic_only_is_unknown() {
1512        let source = r#"
1513def add(a, b):
1514    return a + b
1515"#;
1516        let language = Language::Python;
1517        let func_kinds = get_function_node_kinds(language);
1518        let mut parser = get_parser(language).unwrap();
1519        let tree = parser.parse(source, None).unwrap();
1520        let root = tree.root_node();
1521
1522        let func_node = find_function_node(root, source.as_bytes(), "add", func_kinds);
1523        assert!(func_node.is_some(), "Should find add");
1524
1525        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
1526
1527        // The bug: analyze_purity returns "pure" because no effects and
1528        // no unknown calls. But we have no positive evidence -- we just
1529        // didn't find any calls. The correct answer is "unknown" with
1530        // low confidence, or at minimum not "pure/high".
1531        assert_ne!(
1532            purity.classification, "pure",
1533            "A simple arithmetic function with no calls should NOT confidently be 'pure'. \
1534             The analysis found no calls to evaluate -- absence of evidence is not evidence \
1535             of purity. Got classification='{}', confidence='{}'. \
1536             Expected 'unknown' since no calls were analyzed.",
1537            purity.classification, purity.confidence
1538        );
1539    }
1540}