Skip to main content

tldr_cli/commands/remaining/
explain.rs

1//! Explain Command - Comprehensive Function Analysis
2//!
3//! The explain command provides a complete analysis of a function including:
4//! - Signature extraction (params, return type, decorators, docstring)
5//! - Purity analysis (pure/impure/unknown with effects)
6//! - Complexity metrics (cyclomatic, blocks, edges, loops)
7//! - Call relationships (callers and callees)
8//!
9//! # Example
10//!
11//! ```bash
12//! # Analyze a function
13//! tldr explain src/utils.py calculate_total
14//!
15//! # With call graph depth
16//! tldr explain src/utils.py calculate_total --depth 3
17//!
18//! # Text output
19//! tldr explain src/utils.py calculate_total --format text
20//! ```
21
22use std::collections::HashSet;
23use std::path::PathBuf;
24
25use anyhow::Result;
26use clap::Args;
27use tree_sitter::{Node, Parser};
28
29use super::error::RemainingError;
30use super::types::{CallInfo, ComplexityInfo, ExplainReport, ParamInfo, PurityInfo, SignatureInfo};
31
32use crate::output::{OutputFormat, OutputWriter};
33use tldr_core::types::Language;
34
35// =============================================================================
36// CLI Arguments
37// =============================================================================
38
39/// Provide comprehensive function analysis.
40#[derive(Debug, Clone, Args)]
41pub struct ExplainArgs {
42    /// Source file to analyze
43    pub file: PathBuf,
44
45    /// Function name to explain
46    pub function: String,
47
48    /// Call graph depth for callers/callees
49    #[arg(long, default_value = "2")]
50    pub depth: u32,
51
52    /// Output file (stdout if not specified)
53    #[arg(long, short = 'o')]
54    pub output: Option<PathBuf>,
55}
56
57// =============================================================================
58// Constants
59// =============================================================================
60
61/// Known I/O operations that make a function impure
62const IO_OPERATIONS: &[&str] = &[
63    "print",
64    "open",
65    "read",
66    "write",
67    "readline",
68    "readlines",
69    "writelines",
70    "input",
71    "system",
72    "popen",
73    "exec",
74    "eval",
75    "request",
76    "fetch",
77    "urlopen",
78    "execute",
79    "executemany",
80    "fetchone",
81    "fetchall",
82];
83
84/// Known impure calls (non-deterministic or side-effecting)
85const IMPURE_CALLS: &[&str] = &[
86    "random",
87    "randint",
88    "choice",
89    "shuffle",
90    "sample",
91    "uniform",
92    "random.random",
93    "random.randint",
94    "random.choice",
95    "random.shuffle",
96    "time",
97    "time.time",
98    "datetime.now",
99    "datetime.datetime.now",
100    "uuid4",
101    "uuid1",
102    "uuid.uuid4",
103    "uuid.uuid1",
104    "logging.info",
105    "logging.debug",
106    "logging.warning",
107    "logging.error",
108    "os.system",
109    "os.popen",
110    "os.getenv",
111    "os.environ",
112    "os.mkdir",
113    "os.remove",
114    "requests.get",
115    "requests.post",
116    "requests.put",
117    "requests.delete",
118    "subprocess.run",
119    "subprocess.call",
120    "subprocess.Popen",
121];
122
123/// Collection mutation methods
124const COLLECTION_MUTATIONS: &[&str] = &[
125    "append",
126    "extend",
127    "insert",
128    "remove",
129    "pop",
130    "clear",
131    "update",
132    "add",
133    "discard",
134    "setdefault",
135    "sort",
136    "reverse",
137];
138
139/// Known pure builtins
140const PURE_BUILTINS: &[&str] = &[
141    "len",
142    "range",
143    "int",
144    "float",
145    "str",
146    "bool",
147    "list",
148    "dict",
149    "set",
150    "tuple",
151    "sorted",
152    "reversed",
153    "enumerate",
154    "zip",
155    "map",
156    "filter",
157    "min",
158    "max",
159    "sum",
160    "abs",
161    "round",
162    "isinstance",
163    "issubclass",
164    "type",
165    "id",
166    "hash",
167    "repr",
168    "next",
169    "iter",
170    "all",
171    "any",
172    "chr",
173    "ord",
174    "hex",
175    "oct",
176    "bin",
177    "pow",
178    "divmod",
179    "super",
180    "property",
181    "staticmethod",
182    "classmethod",
183];
184
185// =============================================================================
186// Tree-sitter Multi-Language Parsing
187// =============================================================================
188
189/// Get function node kinds for a given language
190fn get_function_node_kinds(language: Language) -> &'static [&'static str] {
191    match language {
192        Language::Python => &["function_definition", "async_function_definition"],
193        Language::TypeScript | Language::JavaScript => &[
194            "function_declaration",
195            "arrow_function",
196            "method_definition",
197            "function",
198        ],
199        Language::Go => &["function_declaration", "method_declaration"],
200        Language::Rust => &["function_item"],
201        Language::Java => &["method_declaration", "constructor_declaration"],
202        Language::Kotlin => &["function_declaration"],
203        Language::CSharp => &["method_declaration", "constructor_declaration"],
204        Language::Ruby => &["method", "singleton_method"],
205        Language::Php => &["function_definition", "method_declaration"],
206        Language::Scala => &["function_definition"],
207        Language::Swift => &["function_declaration"],
208        Language::C | Language::Cpp => &["function_definition"],
209        Language::Lua | Language::Luau => &["function_declaration", "function_definition"],
210        Language::Elixir => &["call"], // Elixir def/defp are call nodes
211        Language::Ocaml => &["value_definition"],
212    }
213}
214
215/// Initialize tree-sitter parser for the detected language
216fn get_parser(language: Language) -> Result<Parser, RemainingError> {
217    let mut parser = Parser::new();
218
219    let ts_language = match language {
220        Language::Python => tree_sitter_python::LANGUAGE.into(),
221        Language::TypeScript => tree_sitter_typescript::LANGUAGE_TSX.into(),
222        Language::JavaScript => tree_sitter_typescript::LANGUAGE_TSX.into(),
223        Language::Go => tree_sitter_go::LANGUAGE.into(),
224        Language::Rust => tree_sitter_rust::LANGUAGE.into(),
225        Language::Java => tree_sitter_java::LANGUAGE.into(),
226        Language::C => tree_sitter_c::LANGUAGE.into(),
227        Language::Cpp => tree_sitter_cpp::LANGUAGE.into(),
228        Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
229        Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(),
230        Language::Scala => tree_sitter_scala::LANGUAGE.into(),
231        Language::Php => tree_sitter_php::LANGUAGE_PHP.into(),
232        Language::Ruby => tree_sitter_ruby::LANGUAGE.into(),
233        Language::Lua => tree_sitter_lua::LANGUAGE.into(),
234        Language::Luau => tree_sitter_luau::LANGUAGE.into(),
235        Language::Elixir => tree_sitter_elixir::LANGUAGE.into(),
236        Language::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
237        Language::Swift => tree_sitter_swift::LANGUAGE.into(),
238    };
239
240    parser.set_language(&ts_language).map_err(|e| {
241        RemainingError::parse_error(PathBuf::new(), format!("Failed to set language: {}", e))
242    })?;
243    Ok(parser)
244}
245
246/// Get text for a node from source
247fn node_text<'a>(node: Node, source: &'a [u8]) -> &'a str {
248    node.utf8_text(source).unwrap_or("")
249}
250
251/// Get the line number (1-indexed) for a node
252fn get_line_number(node: Node) -> u32 {
253    node.start_position().row as u32 + 1
254}
255
256/// Get the end line number (1-indexed) for a node
257fn get_end_line_number(node: Node) -> u32 {
258    node.end_position().row as u32 + 1
259}
260
261// =============================================================================
262// Function Finding
263// =============================================================================
264
265/// Find a function definition by name in the AST
266fn find_function_node<'a>(
267    root: Node<'a>,
268    source: &[u8],
269    function_name: &str,
270    func_kinds: &[&str],
271) -> Option<Node<'a>> {
272    find_function_recursive(root, source, function_name, func_kinds)
273}
274
275fn find_function_recursive<'a>(
276    node: Node<'a>,
277    source: &[u8],
278    function_name: &str,
279    func_kinds: &[&str],
280) -> Option<Node<'a>> {
281    if func_kinds.contains(&node.kind()) {
282        // Check if this function has the name we're looking for
283        // Try field name first (most reliable)
284        if let Some(name_node) = node.child_by_field_name("name") {
285            let name = node_text(name_node, source);
286            if name == function_name {
287                return Some(node);
288            }
289        }
290        // C/C++: function_definition -> declarator -> function_declarator -> identifier
291        if let Some(declarator) = node.child_by_field_name("declarator") {
292            if let Some(name) = extract_c_declarator_name_explain(declarator, source) {
293                if name == function_name {
294                    return Some(node);
295                }
296            }
297        }
298        // Fallback: search for identifier child (Python, etc.)
299        for child in node.children(&mut node.walk()) {
300            if child.kind() == "identifier" {
301                let name = node_text(child, source);
302                if name == function_name {
303                    return Some(node);
304                }
305                break;
306            }
307        }
308    }
309
310    // Check for arrow functions in variable declarations (TS/JS pattern):
311    // lexical_declaration / variable_declaration -> variable_declarator -> name + value(arrow_function)
312    if matches!(node.kind(), "lexical_declaration" | "variable_declaration") {
313        let mut cursor = node.walk();
314        for child in node.children(&mut cursor) {
315            if child.kind() == "variable_declarator" {
316                if let Some(name_node) = child.child_by_field_name("name") {
317                    let var_name = node_text(name_node, source);
318                    if var_name == function_name {
319                        if let Some(value_node) = child.child_by_field_name("value") {
320                            if matches!(
321                                value_node.kind(),
322                                "arrow_function"
323                                    | "function"
324                                    | "function_expression"
325                                    | "generator_function"
326                            ) {
327                                return Some(value_node);
328                            }
329                        }
330                    }
331                }
332            }
333        }
334    }
335
336    // Elixir: def/defp are `call` nodes where the first child identifier is "def"/"defp"
337    // and the function name is in the arguments
338    if node.kind() == "call" && func_kinds.contains(&"call") {
339        for child in node.children(&mut node.walk()) {
340            if child.kind() == "identifier" {
341                let text = node_text(child, source);
342                if text == "def" || text == "defp" {
343                    if let Some(args) = child.next_sibling() {
344                        if args.kind() == "arguments" || args.kind() == "call" {
345                            if let Some(name_node) = args.child(0) {
346                                let fname = if name_node.kind() == "call" {
347                                    name_node
348                                        .child(0)
349                                        .map(|n| node_text(n, source))
350                                        .unwrap_or("")
351                                } else {
352                                    node_text(name_node, source)
353                                };
354                                if fname == function_name {
355                                    return Some(node);
356                                }
357                            }
358                        }
359                    }
360                }
361            }
362        }
363    }
364
365    // OCaml: value_definition -> let_binding -> pattern field contains the function name
366    if node.kind() == "value_definition" {
367        for child in node.children(&mut node.walk()) {
368            if child.kind() == "let_binding" {
369                if let Some(pattern_node) = child.child_by_field_name("pattern") {
370                    let name = node_text(pattern_node, source);
371                    if name == function_name {
372                        return Some(node);
373                    }
374                }
375            }
376        }
377    }
378
379    // Recurse into children
380    for child in node.children(&mut node.walk()) {
381        if let Some(found) = find_function_recursive(child, source, function_name, func_kinds) {
382            return Some(found);
383        }
384    }
385
386    None
387}
388
389/// Recursively extract function name from C/C++ nested declarator chain
390fn extract_c_declarator_name_explain(declarator: Node, source: &[u8]) -> Option<String> {
391    match declarator.kind() {
392        "identifier" | "field_identifier" => {
393            let name = node_text(declarator, source).to_string();
394            if !name.is_empty() {
395                Some(name)
396            } else {
397                None
398            }
399        }
400        "function_declarator"
401        | "pointer_declarator"
402        | "reference_declarator"
403        | "parenthesized_declarator" => declarator
404            .child_by_field_name("declarator")
405            .and_then(|inner| extract_c_declarator_name_explain(inner, source)),
406        _ => None,
407    }
408}
409
410// =============================================================================
411// Signature Extraction
412// =============================================================================
413
414/// Extract signature information from a function node
415fn extract_signature(func_node: Node, source: &[u8], language: Language) -> SignatureInfo {
416    let mut sig = SignatureInfo::new();
417
418    // Check if async (language-specific)
419    sig.is_async = match language {
420        Language::Python => func_node.kind() == "async_function_definition",
421        Language::TypeScript | Language::JavaScript => {
422            // Check for async modifier
423            let mut is_async = false;
424            for child in func_node.children(&mut func_node.walk()) {
425                if child.kind() == "async" {
426                    is_async = true;
427                    break;
428                }
429            }
430            is_async
431        }
432        Language::Rust => {
433            // Check for async keyword
434            node_text(func_node, source).contains("async")
435        }
436        _ => false,
437    };
438
439    // Extract parameters
440    if let Some(params_node) = func_node.child_by_field_name("parameters") {
441        sig.params = extract_params(params_node, source);
442    }
443
444    // Extract return type
445    if let Some(return_node) = func_node.child_by_field_name("return_type") {
446        sig.return_type = Some(node_text(return_node, source).to_string());
447    }
448
449    // Extract decorators (look for decorated_definition parent or decorator children)
450    sig.decorators = extract_decorators(func_node, source);
451
452    // Extract docstring
453    sig.docstring = extract_docstring(func_node, source);
454
455    sig
456}
457
458/// Extract parameters from a parameters node
459fn extract_params(params_node: Node, source: &[u8]) -> Vec<ParamInfo> {
460    let mut params = Vec::new();
461
462    for child in params_node.children(&mut params_node.walk()) {
463        match child.kind() {
464            "identifier" => {
465                // Simple parameter without annotation
466                let name = node_text(child, source);
467                if name != "self" && name != "cls" {
468                    params.push(ParamInfo::new(name));
469                }
470            }
471            "typed_parameter" | "typed_default_parameter" => {
472                // Parameter with type annotation
473                let mut param = ParamInfo::new("");
474                for part in child.children(&mut child.walk()) {
475                    match part.kind() {
476                        "identifier" => {
477                            let name = node_text(part, source);
478                            if name != "self" && name != "cls" && param.name.is_empty() {
479                                param.name = name.to_string();
480                            }
481                        }
482                        "type" => {
483                            param.type_hint = Some(node_text(part, source).to_string());
484                        }
485                        _ => {}
486                    }
487                }
488                // Only add if we got a name
489                if !param.name.is_empty() {
490                    params.push(param);
491                }
492            }
493            "default_parameter" => {
494                // Parameter with default value
495                let mut param = ParamInfo::new("");
496                let mut got_name = false;
497                for part in child.children(&mut child.walk()) {
498                    if part.kind() == "identifier" && !got_name {
499                        let name = node_text(part, source);
500                        if name != "self" && name != "cls" {
501                            param.name = name.to_string();
502                            got_name = true;
503                        }
504                    } else if got_name && param.default.is_none() && part.kind() != "=" {
505                        param.default = Some(node_text(part, source).to_string());
506                    }
507                }
508                if !param.name.is_empty() {
509                    params.push(param);
510                }
511            }
512            _ => {}
513        }
514    }
515
516    params
517}
518
519/// Extract decorators
520fn extract_decorators(func_node: Node, source: &[u8]) -> Vec<String> {
521    let mut decorators = Vec::new();
522
523    // Check if parent is decorated_definition
524    if let Some(parent) = func_node.parent() {
525        if parent.kind() == "decorated_definition" {
526            for child in parent.children(&mut parent.walk()) {
527                if child.kind() == "decorator" {
528                    let text = node_text(child, source);
529                    decorators.push(text.trim_start_matches('@').to_string());
530                }
531            }
532        }
533    }
534
535    decorators
536}
537
538/// Extract docstring from function body
539fn extract_docstring(func_node: Node, source: &[u8]) -> Option<String> {
540    // Look for the function body (block)
541    if let Some(body) = func_node.child_by_field_name("body") {
542        // First statement in body might be a docstring
543        if let Some(first_stmt) = body.child(0) {
544            if first_stmt.kind() == "expression_statement" {
545                if let Some(expr) = first_stmt.child(0) {
546                    if expr.kind() == "string" {
547                        let text = node_text(expr, source);
548                        // Remove quotes
549                        let cleaned = text
550                            .trim_start_matches("\"\"\"")
551                            .trim_start_matches("'''")
552                            .trim_start_matches('"')
553                            .trim_start_matches('\'')
554                            .trim_end_matches("\"\"\"")
555                            .trim_end_matches("'''")
556                            .trim_end_matches('"')
557                            .trim_end_matches('\'')
558                            .trim();
559                        return Some(cleaned.to_string());
560                    }
561                }
562            }
563        }
564    }
565    None
566}
567
568// =============================================================================
569// Purity Analysis
570// =============================================================================
571
572/// Analyze purity of a function
573fn analyze_purity(func_node: Node, source: &[u8]) -> PurityInfo {
574    let mut effects = Vec::new();
575    let mut has_unknown_calls = false;
576    let mut has_any_calls = false;
577
578    analyze_purity_recursive(
579        func_node,
580        source,
581        &mut effects,
582        &mut has_unknown_calls,
583        &mut has_any_calls,
584    );
585
586    if !effects.is_empty() {
587        // Has side effects -> impure
588        PurityInfo::impure(effects)
589    } else if has_unknown_calls {
590        // No known side effects, but calls unknown functions -> unknown
591        PurityInfo::unknown().with_confidence("medium")
592    } else if has_any_calls {
593        // All calls resolved to known-pure builtins -> pure
594        PurityInfo::pure()
595    } else {
596        // No calls detected at all (empty body or pure computation like a+b).
597        // Absence of evidence is not evidence of purity — classify as unknown
598        // with low confidence since we have nothing to base a purity claim on.
599        PurityInfo::unknown().with_confidence("low")
600    }
601}
602
603fn analyze_purity_recursive(
604    node: Node,
605    source: &[u8],
606    effects: &mut Vec<String>,
607    has_unknown_calls: &mut bool,
608    has_any_calls: &mut bool,
609) {
610    match node.kind() {
611        "global_statement" | "nonlocal_statement" => {
612            if !effects.contains(&"global_write".to_string()) {
613                effects.push("global_write".to_string());
614            }
615        }
616        "assignment" | "augmented_assignment" => {
617            // Check for attribute writes (self.x = ...)
618            if let Some(left) = node.child_by_field_name("left") {
619                if left.kind() == "attribute"
620                    && !effects.contains(&"attribute_write".to_string())
621                {
622                    effects.push("attribute_write".to_string());
623                }
624            }
625        }
626        "call" => {
627            *has_any_calls = true;
628            let call_name = extract_call_name(node, source);
629            if let Some(name) = &call_name {
630                // Check for I/O operations
631                for &io_op in IO_OPERATIONS {
632                    if name == io_op || name.ends_with(&format!(".{}", io_op)) {
633                        if !effects.contains(&"io".to_string()) {
634                            effects.push("io".to_string());
635                        }
636                        return;
637                    }
638                }
639
640                // Check for impure calls
641                for &impure in IMPURE_CALLS {
642                    if name == impure || name.ends_with(impure) {
643                        if !effects.contains(&"io".to_string()) {
644                            effects.push("io".to_string());
645                        }
646                        return;
647                    }
648                }
649
650                // Check for collection mutations
651                let method_name = name.split('.').next_back().unwrap_or(name);
652                for &mutation in COLLECTION_MUTATIONS {
653                    if method_name == mutation {
654                        if !effects.contains(&"collection_modify".to_string()) {
655                            effects.push("collection_modify".to_string());
656                        }
657                        return;
658                    }
659                }
660
661                // Check if it's a known pure builtin
662                let base = name.split('.').next_back().unwrap_or(name);
663                if !PURE_BUILTINS.contains(&name.as_str()) && !PURE_BUILTINS.contains(&base) {
664                    *has_unknown_calls = true;
665                }
666            }
667        }
668        _ => {}
669    }
670
671    // Recurse into children
672    for child in node.children(&mut node.walk()) {
673        analyze_purity_recursive(child, source, effects, has_unknown_calls, has_any_calls);
674    }
675}
676
677/// Extract call name from a call node
678fn extract_call_name(node: Node, source: &[u8]) -> Option<String> {
679    if let Some(func) = node.child_by_field_name("function") {
680        return Some(extract_name_from_expr(func, source));
681    }
682
683    for child in node.children(&mut node.walk()) {
684        match child.kind() {
685            "identifier" => return Some(node_text(child, source).to_string()),
686            "attribute" => return Some(extract_name_from_expr(child, source)),
687            _ => continue,
688        }
689    }
690    None
691}
692
693/// Extract a dotted name from an expression
694fn extract_name_from_expr(node: Node, source: &[u8]) -> String {
695    match node.kind() {
696        "identifier" => node_text(node, source).to_string(),
697        "attribute" => {
698            let mut parts = Vec::new();
699            let mut current = node;
700
701            loop {
702                if let Some(attr) = current.child_by_field_name("attribute") {
703                    parts.push(node_text(attr, source).to_string());
704                }
705
706                if let Some(obj) = current.child_by_field_name("object") {
707                    if obj.kind() == "attribute" {
708                        current = obj;
709                    } else if obj.kind() == "identifier" {
710                        parts.push(node_text(obj, source).to_string());
711                        break;
712                    } else {
713                        break;
714                    }
715                } else {
716                    break;
717                }
718            }
719
720            parts.reverse();
721            parts.join(".")
722        }
723        _ => node_text(node, source).to_string(),
724    }
725}
726
727// =============================================================================
728// Complexity Analysis
729// =============================================================================
730
731/// Compute complexity metrics for a function
732fn compute_complexity(func_node: Node) -> ComplexityInfo {
733    let mut cyclomatic = 1; // Base complexity
734    let mut num_blocks = 1;
735    let mut num_edges = 0;
736    let mut has_loops = false;
737
738    count_complexity_recursive(
739        func_node,
740        &mut cyclomatic,
741        &mut num_blocks,
742        &mut num_edges,
743        &mut has_loops,
744    );
745
746    ComplexityInfo::new(cyclomatic, num_blocks, num_edges, has_loops)
747}
748
749fn count_complexity_recursive(
750    node: Node,
751    cyclomatic: &mut u32,
752    num_blocks: &mut u32,
753    num_edges: &mut u32,
754    has_loops: &mut bool,
755) {
756    match node.kind() {
757        "if_statement" | "elif_clause" => {
758            *cyclomatic += 1;
759            *num_blocks += 1;
760            *num_edges += 2;
761        }
762        "for_statement" | "while_statement" => {
763            *cyclomatic += 1;
764            *num_blocks += 1;
765            *num_edges += 2;
766            *has_loops = true;
767        }
768        "try_statement" => {
769            *cyclomatic += 1;
770            *num_blocks += 1;
771            *num_edges += 1;
772        }
773        "except_clause" => {
774            *cyclomatic += 1;
775            *num_blocks += 1;
776            *num_edges += 1;
777        }
778        "and_operator" | "or_operator" => {
779            *cyclomatic += 1;
780        }
781        "conditional_expression" => {
782            // Ternary: x if cond else y
783            *cyclomatic += 1;
784            *num_edges += 1;
785        }
786        "list_comprehension"
787        | "set_comprehension"
788        | "dictionary_comprehension"
789        | "generator_expression" => {
790            *cyclomatic += 1;
791            *has_loops = true;
792        }
793        _ => {}
794    }
795
796    for child in node.children(&mut node.walk()) {
797        count_complexity_recursive(child, cyclomatic, num_blocks, num_edges, has_loops);
798    }
799}
800
801// =============================================================================
802// Call Graph Analysis
803// =============================================================================
804
805/// Find callees (functions called by this function)
806fn find_callees(
807    func_node: Node,
808    source: &[u8],
809    file_path: &str,
810    local_functions: &HashSet<String>,
811) -> Vec<CallInfo> {
812    let mut callees = Vec::new();
813    find_callees_recursive(func_node, source, file_path, local_functions, &mut callees);
814    callees
815}
816
817fn find_callees_recursive(
818    node: Node,
819    source: &[u8],
820    file_path: &str,
821    local_functions: &HashSet<String>,
822    callees: &mut Vec<CallInfo>,
823) {
824    if node.kind() == "call" {
825        if let Some(name) = extract_call_name(node, source) {
826            // Get base name for local function check
827            let base_name = name.split('.').next().unwrap_or(&name);
828
829            // Add if it's a local function or a known call
830            let file = if local_functions.contains(base_name) {
831                file_path.to_string()
832            } else {
833                "<external>".to_string()
834            };
835
836            // Avoid duplicates
837            if !callees.iter().any(|c| c.name == name) {
838                callees.push(CallInfo::new(name, file, get_line_number(node)));
839            }
840        }
841    }
842
843    for child in node.children(&mut node.walk()) {
844        find_callees_recursive(child, source, file_path, local_functions, callees);
845    }
846}
847
848/// Find callers (functions that call this function) - searches the entire file
849fn find_callers(
850    root: Node,
851    source: &[u8],
852    target_function: &str,
853    file_path: &str,
854    func_kinds: &[&str],
855) -> Vec<CallInfo> {
856    let mut callers = Vec::new();
857    find_callers_in_file(
858        root,
859        source,
860        target_function,
861        file_path,
862        &mut callers,
863        None,
864        func_kinds,
865    );
866    callers
867}
868
869fn find_callers_in_file(
870    node: Node,
871    source: &[u8],
872    target_function: &str,
873    file_path: &str,
874    callers: &mut Vec<CallInfo>,
875    current_function: Option<&str>,
876    func_kinds: &[&str],
877) {
878    if func_kinds.contains(&node.kind()) {
879        // Get this function's name
880        let mut func_name = None;
881
882        // Try field name first
883        if let Some(name_node) = node.child_by_field_name("name") {
884            func_name = Some(node_text(name_node, source));
885        } else {
886            // Fallback: search for identifier child
887            for child in node.children(&mut node.walk()) {
888                if child.kind() == "identifier" {
889                    func_name = Some(node_text(child, source));
890                    break;
891                }
892            }
893        }
894
895        // Recurse with this function as current
896        for child in node.children(&mut node.walk()) {
897            find_callers_in_file(
898                child,
899                source,
900                target_function,
901                file_path,
902                callers,
903                func_name,
904                func_kinds,
905            );
906        }
907        return;
908    } else if node.kind() == "call" {
909        if let Some(name) = extract_call_name(node, source) {
910            // Check if this call is to our target function
911            let base = name.split('.').next_back().unwrap_or(&name);
912            if base == target_function || name == target_function {
913                if let Some(caller_name) = current_function {
914                    // Avoid duplicates and self-references
915                    if caller_name != target_function
916                        && !callers.iter().any(|c| c.name == caller_name)
917                    {
918                        callers.push(CallInfo::new(caller_name, file_path, get_line_number(node)));
919                    }
920                }
921            }
922        }
923    }
924
925    for child in node.children(&mut node.walk()) {
926        find_callers_in_file(
927            child,
928            source,
929            target_function,
930            file_path,
931            callers,
932            current_function,
933            func_kinds,
934        );
935    }
936}
937
938/// Collect all function names in a file
939fn collect_function_names(root: Node, source: &[u8], func_kinds: &[&str]) -> HashSet<String> {
940    let mut names = HashSet::new();
941    collect_function_names_recursive(root, source, &mut names, func_kinds);
942    names
943}
944
945fn collect_function_names_recursive(
946    node: Node,
947    source: &[u8],
948    names: &mut HashSet<String>,
949    func_kinds: &[&str],
950) {
951    if func_kinds.contains(&node.kind()) {
952        // Try field name first
953        if let Some(name_node) = node.child_by_field_name("name") {
954            names.insert(node_text(name_node, source).to_string());
955        } else {
956            // Fallback: search for identifier child
957            for child in node.children(&mut node.walk()) {
958                if child.kind() == "identifier" {
959                    names.insert(node_text(child, source).to_string());
960                    break;
961                }
962            }
963        }
964    }
965
966    for child in node.children(&mut node.walk()) {
967        collect_function_names_recursive(child, source, names, func_kinds);
968    }
969}
970
971// =============================================================================
972// Text Formatting
973// =============================================================================
974
975/// Format an ExplainReport as human-readable text
976fn format_explain_text(report: &ExplainReport) -> String {
977    let mut lines = Vec::new();
978
979    lines.push(format!("Function: {}", report.function_name));
980    lines.push(format!("File: {}", report.file));
981    lines.push(format!("Lines: {}-{}", report.line_start, report.line_end));
982    lines.push(format!("Language: {}", report.language));
983    lines.push(String::new());
984
985    // Signature
986    lines.push("Signature:".to_string());
987    if report.signature.is_async {
988        lines.push("  async: yes".to_string());
989    }
990    lines.push(format!("  Parameters: {}", report.signature.params.len()));
991    for param in &report.signature.params {
992        let type_str = param.type_hint.as_deref().unwrap_or("untyped");
993        lines.push(format!("    - {}: {}", param.name, type_str));
994    }
995    if let Some(ref ret) = report.signature.return_type {
996        lines.push(format!("  Returns: {}", ret));
997    }
998    if !report.signature.decorators.is_empty() {
999        lines.push(format!(
1000            "  Decorators: {}",
1001            report.signature.decorators.join(", ")
1002        ));
1003    }
1004    if let Some(ref doc) = report.signature.docstring {
1005        let preview = if doc.len() > 100 {
1006            format!("{}...", &doc[..100])
1007        } else {
1008            doc.clone()
1009        };
1010        lines.push(format!("  Docstring: {}", preview));
1011    }
1012    lines.push(String::new());
1013
1014    // Purity
1015    lines.push("Purity:".to_string());
1016    lines.push(format!(
1017        "  Classification: {}",
1018        report.purity.classification
1019    ));
1020    lines.push(format!("  Confidence: {}", report.purity.confidence));
1021    if !report.purity.effects.is_empty() {
1022        lines.push(format!("  Effects: {}", report.purity.effects.join(", ")));
1023    }
1024    lines.push(String::new());
1025
1026    // Complexity
1027    if let Some(ref cx) = report.complexity {
1028        lines.push("Complexity:".to_string());
1029        lines.push(format!("  Cyclomatic: {}", cx.cyclomatic));
1030        lines.push(format!("  Blocks: {}", cx.num_blocks));
1031        lines.push(format!("  Edges: {}", cx.num_edges));
1032        lines.push(format!("  Has loops: {}", cx.has_loops));
1033        lines.push(String::new());
1034    }
1035
1036    // Callers
1037    if !report.callers.is_empty() {
1038        lines.push(format!("Callers ({}):", report.callers.len()));
1039        for caller in &report.callers {
1040            lines.push(format!(
1041                "  - {} ({}:{})",
1042                caller.name, caller.file, caller.line
1043            ));
1044        }
1045        lines.push(String::new());
1046    }
1047
1048    // Callees
1049    if !report.callees.is_empty() {
1050        lines.push(format!("Callees ({}):", report.callees.len()));
1051        for callee in &report.callees {
1052            lines.push(format!(
1053                "  - {} ({}:{})",
1054                callee.name, callee.file, callee.line
1055            ));
1056        }
1057    }
1058
1059    lines.join("\n")
1060}
1061
1062// =============================================================================
1063// Entry Point
1064// =============================================================================
1065
1066impl ExplainArgs {
1067    /// Run the explain command
1068    pub fn run(&self, format: OutputFormat, quiet: bool) -> Result<()> {
1069        let writer = OutputWriter::new(format, quiet);
1070
1071        writer.progress(&format!(
1072            "Analyzing function {} in {}...",
1073            self.function,
1074            self.file.display()
1075        ));
1076
1077        // Check file exists
1078        if !self.file.exists() {
1079            return Err(RemainingError::file_not_found(&self.file).into());
1080        }
1081
1082        // Detect language from file extension
1083        let language = Language::from_path(&self.file)
1084            .ok_or_else(|| RemainingError::parse_error(&self.file, "Unsupported language"))?;
1085
1086        // Get function node kinds for this language
1087        let func_kinds = get_function_node_kinds(language);
1088
1089        // Read source
1090        let source = std::fs::read_to_string(&self.file)
1091            .map_err(|e| RemainingError::parse_error(&self.file, e.to_string()))?;
1092        let source_bytes = source.as_bytes();
1093
1094        // Parse with tree-sitter
1095        let mut parser = get_parser(language)?;
1096        let tree = parser
1097            .parse(&source, None)
1098            .ok_or_else(|| RemainingError::parse_error(&self.file, "Failed to parse file"))?;
1099
1100        let root = tree.root_node();
1101
1102        // Find the function
1103        let func_node = find_function_node(root, source_bytes, &self.function, func_kinds)
1104            .ok_or_else(|| RemainingError::symbol_not_found(&self.function, &self.file))?;
1105
1106        // Get file path string
1107        let file_path = self.file.to_string_lossy().to_string();
1108
1109        // Get language name for report
1110        let language_name = match language {
1111            Language::Python => "python",
1112            Language::TypeScript => "typescript",
1113            Language::JavaScript => "javascript",
1114            Language::Go => "go",
1115            Language::Rust => "rust",
1116            Language::Java => "java",
1117            Language::C => "c",
1118            Language::Cpp => "cpp",
1119            Language::CSharp => "csharp",
1120            Language::Kotlin => "kotlin",
1121            Language::Scala => "scala",
1122            Language::Php => "php",
1123            Language::Ruby => "ruby",
1124            Language::Lua => "lua",
1125            Language::Luau => "luau",
1126            Language::Elixir => "elixir",
1127            Language::Ocaml => "ocaml",
1128            Language::Swift => "swift",
1129        };
1130
1131        // Build report
1132        let mut report = ExplainReport::new(
1133            &self.function,
1134            &file_path,
1135            get_line_number(func_node),
1136            get_end_line_number(func_node),
1137            language_name,
1138        );
1139
1140        // Extract signature
1141        report.signature = extract_signature(func_node, source_bytes, language);
1142
1143        // Analyze purity
1144        report.purity = analyze_purity(func_node, source_bytes);
1145
1146        // Compute complexity
1147        report.complexity = Some(compute_complexity(func_node));
1148
1149        // Collect local function names for call graph analysis
1150        let local_functions = collect_function_names(root, source_bytes, func_kinds);
1151
1152        // Find callees
1153        report.callees = find_callees(func_node, source_bytes, &file_path, &local_functions);
1154
1155        // Find callers
1156        report.callers = find_callers(root, source_bytes, &self.function, &file_path, func_kinds);
1157
1158        // Output based on format
1159        if writer.is_text() {
1160            let text = format_explain_text(&report);
1161            writer.write_text(&text)?;
1162        } else {
1163            writer.write(&report)?;
1164        }
1165
1166        // Write to output file if specified
1167        if let Some(ref output_path) = self.output {
1168            let output_str = if format == OutputFormat::Text {
1169                format_explain_text(&report)
1170            } else {
1171                serde_json::to_string_pretty(&report)?
1172            };
1173            std::fs::write(output_path, &output_str)?;
1174        }
1175
1176        Ok(())
1177    }
1178}
1179
1180// =============================================================================
1181// Tests
1182// =============================================================================
1183
1184#[cfg(test)]
1185mod tests {
1186    use super::*;
1187    
1188
1189    const SAMPLE_CODE: &str = r#"
1190def calculate_total(items: list[dict], tax_rate: float = 0.1) -> float:
1191    """Calculate total price with tax.
1192
1193    Args:
1194        items: List of items with 'price' key
1195        tax_rate: Tax rate as decimal (default 10%)
1196
1197    Returns:
1198        Total price including tax
1199    """
1200    subtotal = sum(item['price'] for item in items)
1201    return subtotal * (1 + tax_rate)
1202
1203def helper_function(x):
1204    return x * 2
1205
1206def main():
1207    items = [{'price': 10}, {'price': 20}]
1208    total = calculate_total(items)
1209    doubled = helper_function(total)
1210    print(doubled)
1211"#;
1212
1213    #[test]
1214    fn test_find_function() {
1215        let language = Language::Python;
1216        let func_kinds = get_function_node_kinds(language);
1217        let mut parser = get_parser(language).unwrap();
1218        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
1219        let root = tree.root_node();
1220
1221        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "calculate_total", func_kinds);
1222        assert!(func.is_some());
1223
1224        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "nonexistent", func_kinds);
1225        assert!(func.is_none());
1226    }
1227
1228    #[test]
1229    fn test_extract_signature() {
1230        let language = Language::Python;
1231        let func_kinds = get_function_node_kinds(language);
1232        let mut parser = get_parser(language).unwrap();
1233        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
1234        let root = tree.root_node();
1235
1236        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "calculate_total", func_kinds)
1237            .unwrap();
1238        let sig = extract_signature(func, SAMPLE_CODE.as_bytes(), language);
1239
1240        assert_eq!(sig.params.len(), 2);
1241        assert_eq!(sig.params[0].name, "items");
1242        assert_eq!(sig.params[1].name, "tax_rate");
1243        assert!(sig.return_type.is_some());
1244        assert!(sig.docstring.is_some());
1245    }
1246
1247    #[test]
1248    fn test_purity_analysis() {
1249        let language = Language::Python;
1250        let func_kinds = get_function_node_kinds(language);
1251        let mut parser = get_parser(language).unwrap();
1252        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
1253        let root = tree.root_node();
1254
1255        // calculate_total should be pure
1256        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "calculate_total", func_kinds)
1257            .unwrap();
1258        let purity = analyze_purity(func, SAMPLE_CODE.as_bytes());
1259        assert_eq!(purity.classification, "pure");
1260
1261        // main calls print, so impure
1262        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "main", func_kinds).unwrap();
1263        let purity = analyze_purity(func, SAMPLE_CODE.as_bytes());
1264        assert_eq!(purity.classification, "impure");
1265        assert!(purity.effects.contains(&"io".to_string()));
1266    }
1267
1268    #[test]
1269    fn test_complexity_analysis() {
1270        let code = r#"
1271def complex_func(x, y):
1272    if x > 0:
1273        if y > 0:
1274            return x + y
1275        else:
1276            return x
1277    else:
1278        for i in range(10):
1279            x += i
1280        return x
1281"#;
1282        let language = Language::Python;
1283        let func_kinds = get_function_node_kinds(language);
1284        let mut parser = get_parser(language).unwrap();
1285        let tree = parser.parse(code, None).unwrap();
1286        let root = tree.root_node();
1287
1288        let func = find_function_node(root, code.as_bytes(), "complex_func", func_kinds).unwrap();
1289        let cx = compute_complexity(func);
1290
1291        assert!(cx.cyclomatic > 1);
1292        assert!(cx.has_loops);
1293    }
1294
1295    #[test]
1296    fn test_find_callees() {
1297        let language = Language::Python;
1298        let func_kinds = get_function_node_kinds(language);
1299        let mut parser = get_parser(language).unwrap();
1300        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
1301        let root = tree.root_node();
1302
1303        let local_funcs = collect_function_names(root, SAMPLE_CODE.as_bytes(), func_kinds);
1304        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "main", func_kinds).unwrap();
1305        let callees = find_callees(func, SAMPLE_CODE.as_bytes(), "test.py", &local_funcs);
1306
1307        assert!(callees.iter().any(|c| c.name == "calculate_total"));
1308        assert!(callees.iter().any(|c| c.name == "helper_function"));
1309    }
1310
1311    #[test]
1312    fn test_find_callers() {
1313        let language = Language::Python;
1314        let func_kinds = get_function_node_kinds(language);
1315        let mut parser = get_parser(language).unwrap();
1316        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
1317        let root = tree.root_node();
1318
1319        let callers = find_callers(
1320            root,
1321            SAMPLE_CODE.as_bytes(),
1322            "calculate_total",
1323            "test.py",
1324            func_kinds,
1325        );
1326        assert!(callers.iter().any(|c| c.name == "main"));
1327    }
1328
1329    #[test]
1330    fn test_find_ts_arrow_function() {
1331        let ts_source = r#"
1332const getDuration = (start: Date, end: Date): number => {
1333    return end.getTime() - start.getTime();
1334};
1335
1336function regularFunc(x: number): number {
1337    return x * 2;
1338}
1339
1340export const processItems = (items: string[]) => {
1341    return items.map(i => i.trim());
1342};
1343"#;
1344        let language = Language::TypeScript;
1345        let func_kinds = get_function_node_kinds(language);
1346        let mut parser = get_parser(language).unwrap();
1347        let tree = parser.parse(ts_source, None).unwrap();
1348        let root = tree.root_node();
1349
1350        // Regular function should always work
1351        let regular = find_function_node(root, ts_source.as_bytes(), "regularFunc", func_kinds);
1352        assert!(regular.is_some(), "Should find regular TS function");
1353
1354        // Arrow function assigned to const should also work
1355        let arrow = find_function_node(root, ts_source.as_bytes(), "getDuration", func_kinds);
1356        assert!(
1357            arrow.is_some(),
1358            "Should find TS arrow function 'getDuration'"
1359        );
1360
1361        // Exported arrow function should also work
1362        let exported = find_function_node(root, ts_source.as_bytes(), "processItems", func_kinds);
1363        assert!(
1364            exported.is_some(),
1365            "Should find exported TS arrow function 'processItems'"
1366        );
1367    }
1368
1369    // =========================================================================
1370    // Bug: analyze_purity returns "pure" when it should return "unknown"
1371    // =========================================================================
1372
1373    /// A function with no function body content (empty/pass) should classify
1374    /// as "unknown", not "pure". We have no evidence of purity -- the analysis
1375    /// simply found nothing.
1376    #[test]
1377    fn test_empty_function_is_unknown_not_pure() {
1378        let source = r#"
1379def empty_func():
1380    pass
1381"#;
1382        let language = Language::Python;
1383        let func_kinds = get_function_node_kinds(language);
1384        let mut parser = get_parser(language).unwrap();
1385        let tree = parser.parse(source, None).unwrap();
1386        let root = tree.root_node();
1387
1388        let func_node = find_function_node(root, source.as_bytes(), "empty_func", func_kinds);
1389        assert!(func_node.is_some(), "Should find empty_func");
1390
1391        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
1392
1393        // The buggy code returns "pure" because no effects and no unknown calls.
1394        // But "pass" means we found nothing -- not that we proved purity.
1395        // A truly empty function (just `pass`) has no evidence to support "pure".
1396        assert_ne!(
1397            purity.classification, "pure",
1398            "A function with only `pass` (no calls, no computation) should NOT be classified as \
1399             'pure' with high confidence. We have no evidence to support a purity claim. \
1400             Got classification='{}', confidence='{}'. Expected 'unknown'.",
1401            purity.classification, purity.confidence
1402        );
1403    }
1404
1405    /// A function that calls other user-defined functions (not builtins, not IO)
1406    /// where those calls are unresolved should classify as "unknown", not "pure".
1407    ///
1408    /// The bug: when a call doesn't match IO_OPERATIONS, IMPURE_CALLS,
1409    /// COLLECTION_MUTATIONS, or PURE_BUILTINS, it sets has_unknown_calls=true.
1410    /// This case is actually handled correctly for unknown calls, BUT if the
1411    /// call name happens to match a PURE_BUILTIN substring, it incorrectly
1412    /// passes as pure. This test verifies the general "unknown calls" path works.
1413    #[test]
1414    fn test_function_with_unknown_calls_is_unknown() {
1415        let source = r#"
1416def my_func(x):
1417    result = compute_something(x)
1418    return transform_result(result)
1419"#;
1420        let language = Language::Python;
1421        let func_kinds = get_function_node_kinds(language);
1422        let mut parser = get_parser(language).unwrap();
1423        let tree = parser.parse(source, None).unwrap();
1424        let root = tree.root_node();
1425
1426        let func_node = find_function_node(root, source.as_bytes(), "my_func", func_kinds);
1427        assert!(func_node.is_some(), "Should find my_func");
1428
1429        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
1430
1431        // compute_something and transform_result are NOT in PURE_BUILTINS,
1432        // so has_unknown_calls should be true -> classification = "unknown"
1433        assert_eq!(
1434            purity.classification, "unknown",
1435            "Function calling unknown user functions should be 'unknown', got '{}'",
1436            purity.classification
1437        );
1438        assert_ne!(
1439            purity.confidence, "high",
1440            "Unknown classification should not have high confidence, got '{}'",
1441            purity.confidence
1442        );
1443    }
1444
1445    /// A function that ONLY calls known-pure builtins should classify as "pure".
1446    /// This is the legitimate pure case.
1447    #[test]
1448    fn test_only_pure_builtins_is_pure() {
1449        let source = r#"
1450def pure_func(items):
1451    return len(items) + sum(items)
1452"#;
1453        let language = Language::Python;
1454        let func_kinds = get_function_node_kinds(language);
1455        let mut parser = get_parser(language).unwrap();
1456        let tree = parser.parse(source, None).unwrap();
1457        let root = tree.root_node();
1458
1459        let func_node = find_function_node(root, source.as_bytes(), "pure_func", func_kinds);
1460        assert!(func_node.is_some(), "Should find pure_func");
1461
1462        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
1463
1464        assert_eq!(
1465            purity.classification, "pure",
1466            "Function calling only pure builtins (len, sum) should be 'pure', got '{}'",
1467            purity.classification
1468        );
1469        assert_eq!(
1470            purity.confidence, "high",
1471            "Pure classification should have high confidence"
1472        );
1473    }
1474
1475    /// A function with IO operations should classify as "impure".
1476    #[test]
1477    fn test_io_operations_is_impure() {
1478        let source = r#"
1479def impure_func(msg):
1480    print(msg)
1481    return True
1482"#;
1483        let language = Language::Python;
1484        let func_kinds = get_function_node_kinds(language);
1485        let mut parser = get_parser(language).unwrap();
1486        let tree = parser.parse(source, None).unwrap();
1487        let root = tree.root_node();
1488
1489        let func_node = find_function_node(root, source.as_bytes(), "impure_func", func_kinds);
1490        assert!(func_node.is_some(), "Should find impure_func");
1491
1492        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
1493
1494        assert_eq!(
1495            purity.classification, "impure",
1496            "Function with print() should be 'impure', got '{}'",
1497            purity.classification
1498        );
1499        assert_eq!(
1500            purity.confidence, "high",
1501            "Impure classification should have high confidence"
1502        );
1503        assert!(
1504            purity.effects.contains(&"io".to_string()),
1505            "Effects should contain 'io', got {:?}",
1506            purity.effects
1507        );
1508    }
1509
1510    /// A function with only arithmetic (no calls at all) should be "unknown"
1511    /// because we have no positive evidence of purity -- the analysis simply
1512    /// didn't find any calls to classify.
1513    #[test]
1514    fn test_no_calls_arithmetic_only_is_unknown() {
1515        let source = r#"
1516def add(a, b):
1517    return a + b
1518"#;
1519        let language = Language::Python;
1520        let func_kinds = get_function_node_kinds(language);
1521        let mut parser = get_parser(language).unwrap();
1522        let tree = parser.parse(source, None).unwrap();
1523        let root = tree.root_node();
1524
1525        let func_node = find_function_node(root, source.as_bytes(), "add", func_kinds);
1526        assert!(func_node.is_some(), "Should find add");
1527
1528        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
1529
1530        // The bug: analyze_purity returns "pure" because no effects and
1531        // no unknown calls. But we have no positive evidence -- we just
1532        // didn't find any calls. The correct answer is "unknown" with
1533        // low confidence, or at minimum not "pure/high".
1534        assert_ne!(
1535            purity.classification, "pure",
1536            "A simple arithmetic function with no calls should NOT confidently be 'pure'. \
1537             The analysis found no calls to evaluate -- absence of evidence is not evidence \
1538             of purity. Got classification='{}', confidence='{}'. \
1539             Expected 'unknown' since no calls were analyzed.",
1540            purity.classification, purity.confidence
1541        );
1542    }
1543}