Skip to main content

tldr_cli/commands/remaining/
explain.rs

1//! Explain Command - Comprehensive Function Analysis
2//!
3//! The explain command provides a complete analysis of a function including:
4//! - Signature extraction (params, return type, decorators, docstring)
5//! - Purity analysis (pure/impure/unknown with effects)
6//! - Complexity metrics (cyclomatic, blocks, edges, loops)
7//! - Call relationships (callers and callees)
8//!
9//! # Example
10//!
11//! ```bash
12//! # Analyze a function
13//! tldr explain src/utils.py calculate_total
14//!
15//! # With call graph depth
16//! tldr explain src/utils.py calculate_total --depth 3
17//!
18//! # Text output
19//! tldr explain src/utils.py calculate_total --format text
20//! ```
21
22use std::collections::HashSet;
23use std::path::PathBuf;
24
25use anyhow::Result;
26use clap::Args;
27use tree_sitter::{Node, Parser};
28
29use super::error::RemainingError;
30use super::types::{CallInfo, ComplexityInfo, ExplainReport, ParamInfo, PurityInfo, SignatureInfo};
31
32use crate::output::{OutputFormat, OutputWriter};
33use tldr_core::types::Language;
34use tldr_core::{
35    build_project_call_graph, find_references, impact_analysis_with_ast_fallback, names_match,
36    ReferenceKind, ReferencesOptions,
37};
38
39// =============================================================================
40// CLI Arguments
41// =============================================================================
42
43/// Provide comprehensive function analysis.
44#[derive(Debug, Clone, Args)]
45pub struct ExplainArgs {
46    /// Source file to analyze
47    pub file: PathBuf,
48
49    /// Function name to explain
50    pub function: String,
51
52    /// Call graph depth for callers/callees
53    #[arg(long, default_value = "2")]
54    pub depth: u32,
55
56    /// Output file (stdout if not specified)
57    #[arg(long, short = 'o')]
58    pub output: Option<PathBuf>,
59}
60
61// =============================================================================
62// Constants
63// =============================================================================
64
65/// Known I/O operations that make a function impure
66const IO_OPERATIONS: &[&str] = &[
67    "print",
68    "open",
69    "read",
70    "write",
71    "readline",
72    "readlines",
73    "writelines",
74    "input",
75    "system",
76    "popen",
77    "exec",
78    "eval",
79    "request",
80    "fetch",
81    "urlopen",
82    "execute",
83    "executemany",
84    "fetchone",
85    "fetchall",
86];
87
88/// Known impure calls (non-deterministic or side-effecting)
89const IMPURE_CALLS: &[&str] = &[
90    "random",
91    "randint",
92    "choice",
93    "shuffle",
94    "sample",
95    "uniform",
96    "random.random",
97    "random.randint",
98    "random.choice",
99    "random.shuffle",
100    "time",
101    "time.time",
102    "datetime.now",
103    "datetime.datetime.now",
104    "uuid4",
105    "uuid1",
106    "uuid.uuid4",
107    "uuid.uuid1",
108    "logging.info",
109    "logging.debug",
110    "logging.warning",
111    "logging.error",
112    "os.system",
113    "os.popen",
114    "os.getenv",
115    "os.environ",
116    "os.mkdir",
117    "os.remove",
118    "requests.get",
119    "requests.post",
120    "requests.put",
121    "requests.delete",
122    "subprocess.run",
123    "subprocess.call",
124    "subprocess.Popen",
125];
126
127/// Collection mutation methods
128const COLLECTION_MUTATIONS: &[&str] = &[
129    "append",
130    "extend",
131    "insert",
132    "remove",
133    "pop",
134    "clear",
135    "update",
136    "add",
137    "discard",
138    "setdefault",
139    "sort",
140    "reverse",
141];
142
143/// Known pure builtins
144const PURE_BUILTINS: &[&str] = &[
145    "len",
146    "range",
147    "int",
148    "float",
149    "str",
150    "bool",
151    "list",
152    "dict",
153    "set",
154    "tuple",
155    "sorted",
156    "reversed",
157    "enumerate",
158    "zip",
159    "map",
160    "filter",
161    "min",
162    "max",
163    "sum",
164    "abs",
165    "round",
166    "isinstance",
167    "issubclass",
168    "type",
169    "id",
170    "hash",
171    "repr",
172    "next",
173    "iter",
174    "all",
175    "any",
176    "chr",
177    "ord",
178    "hex",
179    "oct",
180    "bin",
181    "pow",
182    "divmod",
183    "super",
184    "property",
185    "staticmethod",
186    "classmethod",
187];
188
189// =============================================================================
190// Tree-sitter Multi-Language Parsing
191// =============================================================================
192
193/// Get function node kinds for a given language
194fn get_function_node_kinds(language: Language) -> &'static [&'static str] {
195    match language {
196        Language::Python => &["function_definition", "async_function_definition"],
197        Language::TypeScript | Language::JavaScript => &[
198            "function_declaration",
199            "arrow_function",
200            "method_definition",
201            "function",
202        ],
203        Language::Go => &["function_declaration", "method_declaration"],
204        Language::Rust => &["function_item"],
205        Language::Java => &["method_declaration", "constructor_declaration"],
206        Language::Kotlin => &["function_declaration"],
207        Language::CSharp => &["method_declaration", "constructor_declaration"],
208        Language::Ruby => &["method", "singleton_method"],
209        Language::Php => &["function_definition", "method_declaration"],
210        Language::Scala => &["function_definition"],
211        Language::Swift => &["function_declaration"],
212        Language::C | Language::Cpp => &["function_definition"],
213        Language::Lua | Language::Luau => &["function_declaration", "function_definition"],
214        Language::Elixir => &["call"], // Elixir def/defp are call nodes
215        Language::Ocaml => &["value_definition"],
216    }
217}
218
219/// Initialize tree-sitter parser for the detected language
220fn get_parser(language: Language) -> Result<Parser, RemainingError> {
221    let mut parser = Parser::new();
222
223    let ts_language = match language {
224        Language::Python => tree_sitter_python::LANGUAGE.into(),
225        Language::TypeScript => tree_sitter_typescript::LANGUAGE_TSX.into(),
226        Language::JavaScript => tree_sitter_typescript::LANGUAGE_TSX.into(),
227        Language::Go => tree_sitter_go::LANGUAGE.into(),
228        Language::Rust => tree_sitter_rust::LANGUAGE.into(),
229        Language::Java => tree_sitter_java::LANGUAGE.into(),
230        Language::C => tree_sitter_c::LANGUAGE.into(),
231        Language::Cpp => tree_sitter_cpp::LANGUAGE.into(),
232        Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
233        Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(),
234        Language::Scala => tree_sitter_scala::LANGUAGE.into(),
235        Language::Php => tree_sitter_php::LANGUAGE_PHP.into(),
236        Language::Ruby => tree_sitter_ruby::LANGUAGE.into(),
237        Language::Lua => tree_sitter_lua::LANGUAGE.into(),
238        Language::Luau => tree_sitter_luau::LANGUAGE.into(),
239        Language::Elixir => tree_sitter_elixir::LANGUAGE.into(),
240        Language::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
241        Language::Swift => tree_sitter_swift::LANGUAGE.into(),
242    };
243
244    parser.set_language(&ts_language).map_err(|e| {
245        RemainingError::parse_error(PathBuf::new(), format!("Failed to set language: {}", e))
246    })?;
247    Ok(parser)
248}
249
250/// Get text for a node from source
251fn node_text<'a>(node: Node, source: &'a [u8]) -> &'a str {
252    node.utf8_text(source).unwrap_or("")
253}
254
255/// Get the line number (1-indexed) for a node
256fn get_line_number(node: Node) -> u32 {
257    node.start_position().row as u32 + 1
258}
259
260/// Get the end line number (1-indexed) for a node
261fn get_end_line_number(node: Node) -> u32 {
262    node.end_position().row as u32 + 1
263}
264
265// =============================================================================
266// Function Finding
267// =============================================================================
268
269/// Find a function definition by name in the AST.
270///
271/// Accepts either a bare function name (`run`) or a qualified
272/// `Class.method` form (`Flask.run`). When a qualified name is given:
273///   1. The class is located via [`find_class_node_explain`].
274///   2. The method is searched within the class subtree.
275///   3. If the class is not found OR the method is not found inside it,
276///      falls back to the LAST component as a bare name.
277fn find_function_node<'a>(
278    root: Node<'a>,
279    source: &[u8],
280    function_name: &str,
281    func_kinds: &[&str],
282) -> Option<Node<'a>> {
283    if function_name.contains('.') {
284        let parts: Vec<&str> = function_name.split('.').collect();
285        if parts.len() >= 2 {
286            let class_name = parts[0];
287            let remainder = parts[1..].join(".");
288            if let Some(class_node) = find_class_node_explain(root, class_name, source) {
289                let scope = class_node
290                    .child_by_field_name("body")
291                    .unwrap_or(class_node);
292                if let Some(found) =
293                    find_function_recursive(scope, source, &remainder, func_kinds)
294                {
295                    return Some(found);
296                }
297            }
298            // Fallback: try the LAST component as a bare name.
299            let last = *parts.last().unwrap();
300            return find_function_recursive(root, source, last, func_kinds);
301        }
302    }
303    find_function_recursive(root, source, function_name, func_kinds)
304}
305
306/// Locate a class/struct/trait/interface container by name. Used to
307/// scope `Class.method` lookups in [`find_function_node`]. The set of
308/// container kinds intentionally covers all major OO/struct grammars
309/// supported by tldr.
310fn find_class_node_explain<'a>(
311    root: Node<'a>,
312    class_name: &str,
313    source: &[u8],
314) -> Option<Node<'a>> {
315    const CLASS_KINDS: &[&str] = &[
316        // Python
317        "class_definition",
318        // TS/JS/Java/PHP/C#/Kotlin/Swift/Ruby
319        "class_declaration",
320        "class",
321        "interface_declaration",
322        // Rust
323        "struct_item",
324        "enum_item",
325        "trait_item",
326        "impl_item",
327        "union_item",
328        // C++
329        "class_specifier",
330        "struct_specifier",
331        "union_specifier",
332        // Java
333        "enum_declaration",
334        "record_declaration",
335        // PHP
336        "trait_declaration",
337        // C#
338        "struct_declaration",
339        // Kotlin / Scala
340        "object_declaration",
341        "class_definition",
342        "object_definition",
343        "trait_definition",
344        // Swift
345        "protocol_declaration",
346        "extension_declaration",
347        // Ruby
348        "module",
349    ];
350
351    let mut stack = vec![root];
352    while let Some(node) = stack.pop() {
353        if CLASS_KINDS.contains(&node.kind()) {
354            // Try the conventional "name" field first.
355            let name_match = node.child_by_field_name("name").is_some_and(|n| {
356                node_text(n, source) == class_name
357            });
358            if name_match {
359                return Some(node);
360            }
361            // Fallback: scan named children for an identifier-shaped name
362            // (Rust struct/enum/trait/impl, C++ class_specifier).
363            let mut cursor = node.walk();
364            for child in node.children(&mut cursor) {
365                if matches!(
366                    child.kind(),
367                    "identifier" | "type_identifier" | "constant"
368                ) {
369                    if node_text(child, source) == class_name {
370                        return Some(node);
371                    }
372                    break;
373                }
374            }
375        }
376        let mut cursor = node.walk();
377        let children: Vec<_> = node.children(&mut cursor).collect();
378        for child in children.into_iter().rev() {
379            stack.push(child);
380        }
381    }
382    None
383}
384
385fn find_function_recursive<'a>(
386    node: Node<'a>,
387    source: &[u8],
388    function_name: &str,
389    func_kinds: &[&str],
390) -> Option<Node<'a>> {
391    if func_kinds.contains(&node.kind()) {
392        // Check if this function has the name we're looking for
393        // Try field name first (most reliable)
394        if let Some(name_node) = node.child_by_field_name("name") {
395            let name = node_text(name_node, source);
396            if name == function_name {
397                return Some(node);
398            }
399        }
400        // C/C++: function_definition -> declarator -> function_declarator -> identifier
401        if let Some(declarator) = node.child_by_field_name("declarator") {
402            if let Some(name) = extract_c_declarator_name_explain(declarator, source) {
403                if name == function_name {
404                    return Some(node);
405                }
406            }
407        }
408        // Fallback: search for identifier child (Python, etc.)
409        for child in node.children(&mut node.walk()) {
410            if child.kind() == "identifier" {
411                let name = node_text(child, source);
412                if name == function_name {
413                    return Some(node);
414                }
415                break;
416            }
417        }
418    }
419
420    // Check for arrow functions in variable declarations (TS/JS pattern):
421    // lexical_declaration / variable_declaration -> variable_declarator -> name + value(arrow_function)
422    if matches!(node.kind(), "lexical_declaration" | "variable_declaration") {
423        let mut cursor = node.walk();
424        for child in node.children(&mut cursor) {
425            if child.kind() == "variable_declarator" {
426                if let Some(name_node) = child.child_by_field_name("name") {
427                    let var_name = node_text(name_node, source);
428                    if var_name == function_name {
429                        if let Some(value_node) = child.child_by_field_name("value") {
430                            if matches!(
431                                value_node.kind(),
432                                "arrow_function"
433                                    | "function"
434                                    | "function_expression"
435                                    | "generator_function"
436                            ) {
437                                return Some(value_node);
438                            }
439                        }
440                    }
441                }
442            }
443        }
444    }
445
446    // (js-extract-function-expressions-v1) JS/TS function-expression assignments:
447    //   app.use = function() {}
448    //   Foo.prototype.bar = function() {}
449    //   handler = () => {}
450    if node.kind() == "assignment_expression" {
451        if let (Some(left), Some(right)) = (
452            node.child_by_field_name("left"),
453            node.child_by_field_name("right"),
454        ) {
455            let target_name = match left.kind() {
456                "identifier" => Some(node_text(left, source).to_string()),
457                "member_expression" => left
458                    .child_by_field_name("property")
459                    .map(|p| node_text(p, source).to_string()),
460                _ => None,
461            };
462            if let Some(name) = target_name {
463                if name == function_name
464                    && matches!(
465                        right.kind(),
466                        "arrow_function"
467                            | "function"
468                            | "function_expression"
469                            | "generator_function"
470                    )
471                {
472                    return Some(right);
473                }
474            }
475        }
476    }
477
478    // (js-extract-function-expressions-v1) Object literal pair:
479    //   { foo: function() {} }  /  { foo: () => {} }
480    if node.kind() == "pair" {
481        if let (Some(key), Some(value)) = (
482            node.child_by_field_name("key"),
483            node.child_by_field_name("value"),
484        ) {
485            let key_name = match key.kind() {
486                "property_identifier" | "identifier" => node_text(key, source).to_string(),
487                "string" => node_text(key, source)
488                    .trim_matches(|c| c == '"' || c == '\'' || c == '`')
489                    .to_string(),
490                _ => String::new(),
491            };
492            if key_name == function_name
493                && matches!(
494                    value.kind(),
495                    "arrow_function"
496                        | "function"
497                        | "function_expression"
498                        | "generator_function"
499                )
500            {
501                return Some(value);
502            }
503        }
504    }
505
506    // Elixir: def/defp are `call` nodes where the first child identifier is "def"/"defp"
507    // and the function name is in the arguments
508    if node.kind() == "call" && func_kinds.contains(&"call") {
509        for child in node.children(&mut node.walk()) {
510            if child.kind() == "identifier" {
511                let text = node_text(child, source);
512                if text == "def" || text == "defp" {
513                    if let Some(args) = child.next_sibling() {
514                        if args.kind() == "arguments" || args.kind() == "call" {
515                            if let Some(name_node) = args.child(0) {
516                                let fname = if name_node.kind() == "call" {
517                                    name_node
518                                        .child(0)
519                                        .map(|n| node_text(n, source))
520                                        .unwrap_or("")
521                                } else {
522                                    node_text(name_node, source)
523                                };
524                                if fname == function_name {
525                                    return Some(node);
526                                }
527                            }
528                        }
529                    }
530                }
531            }
532        }
533    }
534
535    // OCaml: value_definition -> let_binding -> pattern field contains the function name
536    if node.kind() == "value_definition" {
537        for child in node.children(&mut node.walk()) {
538            if child.kind() == "let_binding" {
539                if let Some(pattern_node) = child.child_by_field_name("pattern") {
540                    let name = node_text(pattern_node, source);
541                    if name == function_name {
542                        return Some(node);
543                    }
544                }
545            }
546        }
547    }
548
549    // Recurse into children
550    for child in node.children(&mut node.walk()) {
551        if let Some(found) = find_function_recursive(child, source, function_name, func_kinds) {
552            return Some(found);
553        }
554    }
555
556    None
557}
558
559/// Recursively extract function name from C/C++ nested declarator chain
560fn extract_c_declarator_name_explain(declarator: Node, source: &[u8]) -> Option<String> {
561    match declarator.kind() {
562        "identifier" | "field_identifier" => {
563            let name = node_text(declarator, source).to_string();
564            if !name.is_empty() {
565                Some(name)
566            } else {
567                None
568            }
569        }
570        "function_declarator"
571        | "pointer_declarator"
572        | "reference_declarator"
573        | "parenthesized_declarator" => declarator
574            .child_by_field_name("declarator")
575            .and_then(|inner| extract_c_declarator_name_explain(inner, source)),
576        _ => None,
577    }
578}
579
580// =============================================================================
581// Signature Extraction
582// =============================================================================
583
584/// Extract signature information from a function node
585fn extract_signature(func_node: Node, source: &[u8], language: Language) -> SignatureInfo {
586    let mut sig = SignatureInfo::new();
587
588    // Check if async (language-specific)
589    sig.is_async = match language {
590        Language::Python => func_node.kind() == "async_function_definition",
591        Language::TypeScript | Language::JavaScript => {
592            // Check for async modifier
593            let mut is_async = false;
594            for child in func_node.children(&mut func_node.walk()) {
595                if child.kind() == "async" {
596                    is_async = true;
597                    break;
598                }
599            }
600            is_async
601        }
602        Language::Rust => {
603            // Check for async keyword
604            node_text(func_node, source).contains("async")
605        }
606        _ => false,
607    };
608
609    // Extract parameters
610    if let Some(params_node) = func_node.child_by_field_name("parameters") {
611        sig.params = extract_params(params_node, source);
612    }
613
614    // Extract return type
615    if let Some(return_node) = func_node.child_by_field_name("return_type") {
616        sig.return_type = Some(node_text(return_node, source).to_string());
617    }
618
619    // Extract decorators (look for decorated_definition parent or decorator children)
620    sig.decorators = extract_decorators(func_node, source);
621
622    // Extract docstring
623    sig.docstring = extract_docstring(func_node, source);
624
625    sig
626}
627
628/// Extract parameters from a parameters node
629fn extract_params(params_node: Node, source: &[u8]) -> Vec<ParamInfo> {
630    let mut params = Vec::new();
631
632    for child in params_node.children(&mut params_node.walk()) {
633        match child.kind() {
634            "identifier" => {
635                // Simple parameter without annotation
636                let name = node_text(child, source);
637                if name != "self" && name != "cls" {
638                    params.push(ParamInfo::new(name));
639                }
640            }
641            "typed_parameter" | "typed_default_parameter" => {
642                // Parameter with type annotation
643                let mut param = ParamInfo::new("");
644                for part in child.children(&mut child.walk()) {
645                    match part.kind() {
646                        "identifier" => {
647                            let name = node_text(part, source);
648                            if name != "self" && name != "cls" && param.name.is_empty() {
649                                param.name = name.to_string();
650                            }
651                        }
652                        "type" => {
653                            param.type_hint = Some(node_text(part, source).to_string());
654                        }
655                        _ => {}
656                    }
657                }
658                // Only add if we got a name
659                if !param.name.is_empty() {
660                    params.push(param);
661                }
662            }
663            "default_parameter" => {
664                // Parameter with default value
665                let mut param = ParamInfo::new("");
666                let mut got_name = false;
667                for part in child.children(&mut child.walk()) {
668                    if part.kind() == "identifier" && !got_name {
669                        let name = node_text(part, source);
670                        if name != "self" && name != "cls" {
671                            param.name = name.to_string();
672                            got_name = true;
673                        }
674                    } else if got_name && param.default.is_none() && part.kind() != "=" {
675                        param.default = Some(node_text(part, source).to_string());
676                    }
677                }
678                if !param.name.is_empty() {
679                    params.push(param);
680                }
681            }
682            _ => {}
683        }
684    }
685
686    params
687}
688
689/// Extract decorators
690fn extract_decorators(func_node: Node, source: &[u8]) -> Vec<String> {
691    let mut decorators = Vec::new();
692
693    // Check if parent is decorated_definition
694    if let Some(parent) = func_node.parent() {
695        if parent.kind() == "decorated_definition" {
696            for child in parent.children(&mut parent.walk()) {
697                if child.kind() == "decorator" {
698                    let text = node_text(child, source);
699                    decorators.push(text.trim_start_matches('@').to_string());
700                }
701            }
702        }
703    }
704
705    decorators
706}
707
708/// Extract docstring from function body
709fn extract_docstring(func_node: Node, source: &[u8]) -> Option<String> {
710    // Look for the function body (block)
711    if let Some(body) = func_node.child_by_field_name("body") {
712        // First statement in body might be a docstring
713        if let Some(first_stmt) = body.child(0) {
714            if first_stmt.kind() == "expression_statement" {
715                if let Some(expr) = first_stmt.child(0) {
716                    if expr.kind() == "string" {
717                        let text = node_text(expr, source);
718                        // Remove quotes
719                        let cleaned = text
720                            .trim_start_matches("\"\"\"")
721                            .trim_start_matches("'''")
722                            .trim_start_matches('"')
723                            .trim_start_matches('\'')
724                            .trim_end_matches("\"\"\"")
725                            .trim_end_matches("'''")
726                            .trim_end_matches('"')
727                            .trim_end_matches('\'')
728                            .trim();
729                        return Some(cleaned.to_string());
730                    }
731                }
732            }
733        }
734    }
735    None
736}
737
738// =============================================================================
739// Purity Analysis
740// =============================================================================
741
742/// Analyze purity of a function
743fn analyze_purity(func_node: Node, source: &[u8]) -> PurityInfo {
744    let mut effects = Vec::new();
745    let mut has_unknown_calls = false;
746    let mut has_any_calls = false;
747
748    analyze_purity_recursive(
749        func_node,
750        source,
751        &mut effects,
752        &mut has_unknown_calls,
753        &mut has_any_calls,
754    );
755
756    if !effects.is_empty() {
757        // Has side effects -> impure
758        PurityInfo::impure(effects)
759    } else if has_unknown_calls {
760        // No known side effects, but calls unknown functions -> unknown
761        PurityInfo::unknown().with_confidence("medium")
762    } else if has_any_calls {
763        // All calls resolved to known-pure builtins -> pure
764        PurityInfo::pure()
765    } else {
766        // No calls detected at all (empty body or pure computation like a+b).
767        // Absence of evidence is not evidence of purity — classify as unknown
768        // with low confidence since we have nothing to base a purity claim on.
769        PurityInfo::unknown().with_confidence("low")
770    }
771}
772
773fn analyze_purity_recursive(
774    node: Node,
775    source: &[u8],
776    effects: &mut Vec<String>,
777    has_unknown_calls: &mut bool,
778    has_any_calls: &mut bool,
779) {
780    match node.kind() {
781        "global_statement" | "nonlocal_statement" => {
782            if !effects.contains(&"global_write".to_string()) {
783                effects.push("global_write".to_string());
784            }
785        }
786        "assignment" | "augmented_assignment" => {
787            // Check for attribute writes (self.x = ...)
788            if let Some(left) = node.child_by_field_name("left") {
789                if left.kind() == "attribute" && !effects.contains(&"attribute_write".to_string()) {
790                    effects.push("attribute_write".to_string());
791                }
792            }
793        }
794        "call" => {
795            *has_any_calls = true;
796            let call_name = extract_call_name(node, source);
797            if let Some(name) = &call_name {
798                // Check for I/O operations
799                for &io_op in IO_OPERATIONS {
800                    if name == io_op || name.ends_with(&format!(".{}", io_op)) {
801                        if !effects.contains(&"io".to_string()) {
802                            effects.push("io".to_string());
803                        }
804                        return;
805                    }
806                }
807
808                // Check for impure calls
809                for &impure in IMPURE_CALLS {
810                    if name == impure || name.ends_with(impure) {
811                        if !effects.contains(&"io".to_string()) {
812                            effects.push("io".to_string());
813                        }
814                        return;
815                    }
816                }
817
818                // Check for collection mutations
819                let method_name = name.split('.').next_back().unwrap_or(name);
820                for &mutation in COLLECTION_MUTATIONS {
821                    if method_name == mutation {
822                        if !effects.contains(&"collection_modify".to_string()) {
823                            effects.push("collection_modify".to_string());
824                        }
825                        return;
826                    }
827                }
828
829                // Check if it's a known pure builtin
830                let base = name.split('.').next_back().unwrap_or(name);
831                if !PURE_BUILTINS.contains(&name.as_str()) && !PURE_BUILTINS.contains(&base) {
832                    *has_unknown_calls = true;
833                }
834            }
835        }
836        _ => {}
837    }
838
839    // Recurse into children
840    for child in node.children(&mut node.walk()) {
841        analyze_purity_recursive(child, source, effects, has_unknown_calls, has_any_calls);
842    }
843}
844
845/// Extract call name from a call node
846fn extract_call_name(node: Node, source: &[u8]) -> Option<String> {
847    if let Some(func) = node.child_by_field_name("function") {
848        return Some(extract_name_from_expr(func, source));
849    }
850    // language-specific-bugs-v1 (P14.AGG14-16): Java / Kotlin / C# /
851    // Scala / Swift / TS expose the callsite name through different
852    // field names than Python's `function`. Try each in turn so the
853    // multi-language call-kind acceptance above can extract a usable
854    // name for `tldr explain`'s callees enumeration.
855    for field in &["name", "method", "callee"] {
856        if let Some(name_node) = node.child_by_field_name(field) {
857            return Some(extract_name_from_expr(name_node, source));
858        }
859    }
860
861    for child in node.children(&mut node.walk()) {
862        match child.kind() {
863            "identifier" | "simple_identifier" => {
864                return Some(node_text(child, source).to_string())
865            }
866            "attribute"
867            | "field_access"
868            | "member_access_expression"
869            | "navigation_expression"
870            | "scoped_identifier" => return Some(extract_name_from_expr(child, source)),
871            _ => continue,
872        }
873    }
874    None
875}
876
877/// Extract a dotted name from an expression.
878///
879/// non-judgment-call-bugs-v1 (P17.AGG17-2): the previous fallback
880/// (`_ => node_text(node, source).to_string()`) returned the *full
881/// source text* for any non-identifier, non-Python-attribute node.
882/// For TypeScript member-call chains like
883/// `arr.flatMap(...).concat(...)` the call-expression's `function`
884/// field is a `member_expression` whose `object` is itself a
885/// `call_expression` — emitting `node_text(member_expression)`
886/// produced multi-line strings (with embedded `\n` and full argument
887/// source) as `callees[].name`. P17 flagged 54/270 callees
888/// corrupted in `tldr explain emitter.ts emitWebIdl`.
889///
890/// Fix: explicitly handle the property-access node kinds emitted by
891/// every multi-language tree-sitter grammar we accept and extract
892/// just the rightmost property identifier (`property` /
893/// `field` / `name` field, or the last `identifier` child).
894/// `tldr context` already produced clean identifiers — this aligns
895/// `tldr explain` with the same canonicalisation.
896fn extract_name_from_expr(node: Node, source: &[u8]) -> String {
897    match node.kind() {
898        "identifier" | "simple_identifier" | "shorthand_property_identifier" => {
899            node_text(node, source).to_string()
900        }
901        "attribute" => {
902            let mut parts = Vec::new();
903            let mut current = node;
904
905            loop {
906                if let Some(attr) = current.child_by_field_name("attribute") {
907                    parts.push(node_text(attr, source).to_string());
908                }
909
910                if let Some(obj) = current.child_by_field_name("object") {
911                    if obj.kind() == "attribute" {
912                        current = obj;
913                    } else if obj.kind() == "identifier" {
914                        parts.push(node_text(obj, source).to_string());
915                        break;
916                    } else {
917                        break;
918                    }
919                } else {
920                    break;
921                }
922            }
923
924            parts.reverse();
925            parts.join(".")
926        }
927        // TypeScript / JavaScript: `obj.method` — emit just the
928        // property name. Chained calls (`a.b().c`) reach here with
929        // `object` itself a `call_expression`; we no longer emit the
930        // full source, only the trailing property.
931        "member_expression" => {
932            if let Some(prop) = node.child_by_field_name("property") {
933                return node_text(prop, source).to_string();
934            }
935            extract_trailing_identifier(node, source)
936        }
937        // Java / C# / PHP: `obj.method`
938        "field_access" | "member_access_expression" => {
939            for field in &["name", "field"] {
940                if let Some(prop) = node.child_by_field_name(field) {
941                    return node_text(prop, source).to_string();
942                }
943            }
944            extract_trailing_identifier(node, source)
945        }
946        // Kotlin / Swift: `obj.method` (member access on a navigation
947        // expression).
948        "navigation_expression" => {
949            if let Some(suffix) = node.child_by_field_name("suffix") {
950                return extract_trailing_identifier(suffix, source);
951            }
952            extract_trailing_identifier(node, source)
953        }
954        // Go: `pkg.Symbol`
955        "selector_expression" => {
956            if let Some(field) = node.child_by_field_name("field") {
957                return node_text(field, source).to_string();
958            }
959            extract_trailing_identifier(node, source)
960        }
961        // Rust: `mod::path::item` — emit just the trailing path segment
962        "scoped_identifier" | "scoped_call_expression" => {
963            if let Some(name) = node.child_by_field_name("name") {
964                return node_text(name, source).to_string();
965            }
966            extract_trailing_identifier(node, source)
967        }
968        // Rust: `recv.method`
969        "field_expression" => {
970            if let Some(field) = node.child_by_field_name("field") {
971                return node_text(field, source).to_string();
972            }
973            extract_trailing_identifier(node, source)
974        }
975        // Anything else: walk the subtree and grab the last identifier
976        // we find. This is still better than emitting the full source
977        // text and matches the behaviour of `tldr context` for
978        // unfamiliar grammar shapes.
979        _ => extract_trailing_identifier(node, source),
980    }
981}
982
983/// Walk a subtree and return the rightmost identifier-like leaf token,
984/// or — as an absolute last resort — the node's source text *up to the
985/// first whitespace or `(` character* so we never emit the multi-line
986/// argument list that produced AGG17-2.
987fn extract_trailing_identifier(node: Node, source: &[u8]) -> String {
988    fn walk<'a>(node: Node<'a>, found: &mut Option<Node<'a>>) {
989        for child in node.children(&mut node.walk()) {
990            match child.kind() {
991                "identifier"
992                | "simple_identifier"
993                | "shorthand_property_identifier"
994                | "property_identifier"
995                | "field_identifier"
996                | "type_identifier" => {
997                    *found = Some(child);
998                }
999                _ => walk(child, found),
1000            }
1001        }
1002    }
1003    let mut last_id: Option<Node> = None;
1004    walk(node, &mut last_id);
1005    if let Some(n) = last_id {
1006        return node_text(n, source).to_string();
1007    }
1008    // Fallback: clip the raw source so we never emit multi-line text.
1009    let raw = node_text(node, source);
1010    let cut = raw
1011        .find(|c: char| c.is_whitespace() || c == '(' || c == '<')
1012        .unwrap_or(raw.len());
1013    raw[..cut].to_string()
1014}
1015
1016// =============================================================================
1017// Complexity Analysis
1018// =============================================================================
1019
1020/// Compute complexity metrics for a function
1021fn compute_complexity(func_node: Node) -> ComplexityInfo {
1022    // cross-command-consistency-v3 (P5.BUG-N2): the local
1023    // `count_complexity_recursive` walker is preserved for `num_blocks`,
1024    // `num_edges`, and `has_loops` (fields that are unique to
1025    // `ComplexityInfo` and have no canonical equivalent). The cyclomatic
1026    // value is intentionally discarded here — the caller `ExplainArgs::run`
1027    // overwrites it with the canonical
1028    // `tldr_core::calculate_complexity` value so `tldr explain` and
1029    // `tldr complexity` always agree on cyclomatic for the same function.
1030    let mut cyclomatic = 1; // Base complexity (overwritten by caller)
1031    let mut num_blocks = 1;
1032    let mut num_edges = 0;
1033    let mut has_loops = false;
1034
1035    count_complexity_recursive(
1036        func_node,
1037        &mut cyclomatic,
1038        &mut num_blocks,
1039        &mut num_edges,
1040        &mut has_loops,
1041    );
1042
1043    ComplexityInfo::new(cyclomatic, num_blocks, num_edges, has_loops)
1044}
1045
1046fn count_complexity_recursive(
1047    node: Node,
1048    cyclomatic: &mut u32,
1049    num_blocks: &mut u32,
1050    num_edges: &mut u32,
1051    has_loops: &mut bool,
1052) {
1053    match node.kind() {
1054        "if_statement" | "elif_clause" => {
1055            *cyclomatic += 1;
1056            *num_blocks += 1;
1057            *num_edges += 2;
1058        }
1059        "for_statement" | "while_statement" => {
1060            *cyclomatic += 1;
1061            *num_blocks += 1;
1062            *num_edges += 2;
1063            *has_loops = true;
1064        }
1065        "try_statement" => {
1066            *cyclomatic += 1;
1067            *num_blocks += 1;
1068            *num_edges += 1;
1069        }
1070        "except_clause" => {
1071            *cyclomatic += 1;
1072            *num_blocks += 1;
1073            *num_edges += 1;
1074        }
1075        "and_operator" | "or_operator" => {
1076            *cyclomatic += 1;
1077        }
1078        "conditional_expression" => {
1079            // Ternary: x if cond else y
1080            *cyclomatic += 1;
1081            *num_edges += 1;
1082        }
1083        "list_comprehension"
1084        | "set_comprehension"
1085        | "dictionary_comprehension"
1086        | "generator_expression" => {
1087            *cyclomatic += 1;
1088            *has_loops = true;
1089        }
1090        _ => {}
1091    }
1092
1093    for child in node.children(&mut node.walk()) {
1094        count_complexity_recursive(child, cyclomatic, num_blocks, num_edges, has_loops);
1095    }
1096}
1097
1098// =============================================================================
1099// Call Graph Analysis
1100// =============================================================================
1101
1102/// Find callees (functions called by this function)
1103fn find_callees(
1104    func_node: Node,
1105    source: &[u8],
1106    file_path: &str,
1107    local_functions: &HashSet<String>,
1108) -> Vec<CallInfo> {
1109    let mut callees = Vec::new();
1110    find_callees_recursive(func_node, source, file_path, local_functions, &mut callees);
1111    callees
1112}
1113
1114fn find_callees_recursive(
1115    node: Node,
1116    source: &[u8],
1117    file_path: &str,
1118    local_functions: &HashSet<String>,
1119    callees: &mut Vec<CallInfo>,
1120) {
1121    // language-specific-bugs-v1 (P14.AGG14-16): Java / Kotlin / C# tree-sitter
1122    // grammars expose callsites as `method_invocation` /
1123    // `invocation_expression` rather than the Python-shaped `call` node.
1124    // The original `node.kind() == "call"` filter therefore returned
1125    // `callees=[]` for every Java function in `tldr explain`, even when
1126    // the same function call site was visible in `context` and reachable
1127    // via the project call graph. Match the multi-language call-shaped
1128    // nodes already accepted elsewhere in this crate (`looks_like_call`
1129    // in `specs.rs` enumerates the same set).
1130    let is_call = matches!(
1131        node.kind(),
1132        "call"
1133            | "call_expression"
1134            | "invocation_expression"
1135            | "method_invocation"
1136            | "function_call"
1137            | "function_call_statement"
1138            | "macro_invocation"
1139            | "member_call_expression"
1140            | "function_call_expression"
1141            | "scoped_call_expression"
1142            | "nullsafe_member_call_expression"
1143    );
1144    if is_call {
1145        if let Some(name) = extract_call_name(node, source) {
1146            // Get base name for local function check
1147            let base_name = name.split('.').next().unwrap_or(&name);
1148
1149            // Add if it's a local function or a known call
1150            let file = if local_functions.contains(base_name) {
1151                file_path.to_string()
1152            } else {
1153                "<external>".to_string()
1154            };
1155
1156            // Avoid duplicates.
1157            // cross-cutting-and-clear-fix-bugs-v1 (P18.Pattern-B): the
1158            // same call site can yield two emissions in some languages —
1159            // a bare-name (`trickleDownMin`) AND a qualified-name
1160            // (`Heap.trickleDownMin`) — when the call expression is
1161            // disambiguated by an outer enclosing-class scope. Dedup by
1162            // (line, last_segment) so the call is reported once. Falls
1163            // back to the legacy exact-name check when line numbers
1164            // cannot be resolved.
1165            let line = get_line_number(node);
1166            let last_seg = name.rsplit('.').next().unwrap_or(&name).to_string();
1167            if !callees.iter().any(|c| {
1168                c.name == name
1169                    || (c.line == line
1170                        && c.name.rsplit('.').next().unwrap_or(&c.name) == last_seg)
1171            }) {
1172                callees.push(CallInfo::new(name, file, line));
1173            }
1174        }
1175    }
1176
1177    for child in node.children(&mut node.walk()) {
1178        find_callees_recursive(child, source, file_path, local_functions, callees);
1179    }
1180}
1181
1182/// Find callers (functions that call this function) - searches the entire file
1183fn find_callers(
1184    root: Node,
1185    source: &[u8],
1186    target_function: &str,
1187    file_path: &str,
1188    func_kinds: &[&str],
1189) -> Vec<CallInfo> {
1190    let mut callers = Vec::new();
1191    find_callers_in_file(
1192        root,
1193        source,
1194        target_function,
1195        file_path,
1196        &mut callers,
1197        None,
1198        func_kinds,
1199    );
1200    callers
1201}
1202
1203fn find_callers_in_file(
1204    node: Node,
1205    source: &[u8],
1206    target_function: &str,
1207    file_path: &str,
1208    callers: &mut Vec<CallInfo>,
1209    current_function: Option<&str>,
1210    func_kinds: &[&str],
1211) {
1212    if func_kinds.contains(&node.kind()) {
1213        // Get this function's name
1214        let mut func_name = None;
1215
1216        // Try field name first
1217        if let Some(name_node) = node.child_by_field_name("name") {
1218            func_name = Some(node_text(name_node, source));
1219        } else {
1220            // Fallback: search for identifier child
1221            for child in node.children(&mut node.walk()) {
1222                if child.kind() == "identifier" {
1223                    func_name = Some(node_text(child, source));
1224                    break;
1225                }
1226            }
1227        }
1228
1229        // Recurse with this function as current
1230        for child in node.children(&mut node.walk()) {
1231            find_callers_in_file(
1232                child,
1233                source,
1234                target_function,
1235                file_path,
1236                callers,
1237                func_name,
1238                func_kinds,
1239            );
1240        }
1241        return;
1242    } else if node.kind() == "call" {
1243        if let Some(name) = extract_call_name(node, source) {
1244            // Check if this call is to our target function
1245            let base = name.split('.').next_back().unwrap_or(&name);
1246            if base == target_function || name == target_function {
1247                if let Some(caller_name) = current_function {
1248                    // Avoid duplicates and self-references
1249                    if caller_name != target_function
1250                        && !callers.iter().any(|c| c.name == caller_name)
1251                    {
1252                        callers.push(CallInfo::new(caller_name, file_path, get_line_number(node)));
1253                    }
1254                }
1255            }
1256        }
1257    }
1258
1259    for child in node.children(&mut node.walk()) {
1260        find_callers_in_file(
1261            child,
1262            source,
1263            target_function,
1264            file_path,
1265            callers,
1266            current_function,
1267            func_kinds,
1268        );
1269    }
1270}
1271
1272/// Collect all function names in a file
1273fn collect_function_names(root: Node, source: &[u8], func_kinds: &[&str]) -> HashSet<String> {
1274    let mut names = HashSet::new();
1275    collect_function_names_recursive(root, source, &mut names, func_kinds);
1276    names
1277}
1278
1279fn collect_function_names_recursive(
1280    node: Node,
1281    source: &[u8],
1282    names: &mut HashSet<String>,
1283    func_kinds: &[&str],
1284) {
1285    if func_kinds.contains(&node.kind()) {
1286        // Try field name first
1287        if let Some(name_node) = node.child_by_field_name("name") {
1288            names.insert(node_text(name_node, source).to_string());
1289        } else {
1290            // Fallback: search for identifier child
1291            for child in node.children(&mut node.walk()) {
1292                if child.kind() == "identifier" {
1293                    names.insert(node_text(child, source).to_string());
1294                    break;
1295                }
1296            }
1297        }
1298    }
1299
1300    for child in node.children(&mut node.walk()) {
1301        collect_function_names_recursive(child, source, names, func_kinds);
1302    }
1303}
1304
1305// =============================================================================
1306// Text Formatting
1307// =============================================================================
1308
1309/// Format an ExplainReport as human-readable text
1310fn format_explain_text(report: &ExplainReport) -> String {
1311    let mut lines = Vec::new();
1312
1313    lines.push(format!("Function: {}", report.function_name));
1314    lines.push(format!("File: {}", report.file));
1315    lines.push(format!("Lines: {}-{}", report.line_start, report.line_end));
1316    lines.push(format!("Language: {}", report.language));
1317    lines.push(String::new());
1318
1319    // Signature
1320    lines.push("Signature:".to_string());
1321    if report.signature.is_async {
1322        lines.push("  async: yes".to_string());
1323    }
1324    lines.push(format!("  Parameters: {}", report.signature.params.len()));
1325    for param in &report.signature.params {
1326        let type_str = param.type_hint.as_deref().unwrap_or("untyped");
1327        lines.push(format!("    - {}: {}", param.name, type_str));
1328    }
1329    if let Some(ref ret) = report.signature.return_type {
1330        lines.push(format!("  Returns: {}", ret));
1331    }
1332    if !report.signature.decorators.is_empty() {
1333        lines.push(format!(
1334            "  Decorators: {}",
1335            report.signature.decorators.join(", ")
1336        ));
1337    }
1338    if let Some(ref doc) = report.signature.docstring {
1339        let preview = if doc.len() > 100 {
1340            format!("{}...", &doc[..100])
1341        } else {
1342            doc.clone()
1343        };
1344        lines.push(format!("  Docstring: {}", preview));
1345    }
1346    lines.push(String::new());
1347
1348    // Purity
1349    lines.push("Purity:".to_string());
1350    lines.push(format!(
1351        "  Classification: {}",
1352        report.purity.classification
1353    ));
1354    lines.push(format!("  Confidence: {}", report.purity.confidence));
1355    if !report.purity.effects.is_empty() {
1356        lines.push(format!("  Effects: {}", report.purity.effects.join(", ")));
1357    }
1358    lines.push(String::new());
1359
1360    // Complexity
1361    if let Some(ref cx) = report.complexity {
1362        lines.push("Complexity:".to_string());
1363        lines.push(format!("  Cyclomatic: {}", cx.cyclomatic));
1364        lines.push(format!("  Blocks: {}", cx.num_blocks));
1365        lines.push(format!("  Edges: {}", cx.num_edges));
1366        lines.push(format!("  Has loops: {}", cx.has_loops));
1367        lines.push(String::new());
1368    }
1369
1370    // Callers
1371    if !report.callers.is_empty() {
1372        lines.push(format!("Callers ({}):", report.callers.len()));
1373        for caller in &report.callers {
1374            lines.push(format!(
1375                "  - {} ({}:{})",
1376                caller.name, caller.file, caller.line
1377            ));
1378        }
1379        lines.push(String::new());
1380    }
1381
1382    // Callees
1383    if !report.callees.is_empty() {
1384        lines.push(format!("Callees ({}):", report.callees.len()));
1385        for callee in &report.callees {
1386            lines.push(format!(
1387                "  - {} ({}:{})",
1388                callee.name, callee.file, callee.line
1389            ));
1390        }
1391    }
1392
1393    lines.join("\n")
1394}
1395
1396// =============================================================================
1397// Project-wide Call Graph Enrichment
1398// (explain-cross-command-consistency-v1: route callers/callees through the
1399// canonical project-wide call graph used by `impact`/`references`/`context`,
1400// matching the `cross-command-consistency-v3` pattern that aligned cyclomatic
1401// between `explain` and `complexity`.)
1402// =============================================================================
1403
1404/// Determine a project root for `file`. Walks up from the file's parent
1405/// directory until a recognised project marker is found
1406/// (`Cargo.toml`, `package.json`, `go.mod`, `pyproject.toml`, `setup.py`,
1407/// `pom.xml`, `build.gradle`, `.git`). Falls back to the immediate parent
1408/// directory so the call graph at least scans alongside files (which still
1409/// surfaces same-directory callers / callees that the per-file walker
1410/// misses).
1411///
1412/// explain-callers-cross-lang-v1 (P15.AGG15-1): canonicalize `file` first so
1413/// the walk-up traverses real ancestor directories. Without canonicalization,
1414/// a relative input like `lib/application.js` produces parent components
1415/// `["lib", ""]`; the empty-path component then `join("package.json")`
1416/// resolves against CWD as `package.json` and "exists", causing
1417/// `explain_project_root` to return the empty path. `build_project_call_graph`
1418/// invoked with an empty path then fails to discover any source files,
1419/// leaving `report.callers` empty even though `tldr impact` (which receives
1420/// an explicit path) returns the correct callers. Canonicalizing first
1421/// converts the input to an absolute path so each ancestor directory is real.
1422fn explain_project_root(file: &std::path::Path) -> std::path::PathBuf {
1423    let absolute = file
1424        .canonicalize()
1425        .unwrap_or_else(|_| {
1426            // Canonicalize failed (file may not exist on disk via this path).
1427            // Best-effort absolute form: join CWD with the relative input.
1428            if file.is_absolute() {
1429                file.to_path_buf()
1430            } else {
1431                std::env::current_dir()
1432                    .map(|cwd| cwd.join(file))
1433                    .unwrap_or_else(|_| file.to_path_buf())
1434            }
1435        });
1436    let parent = absolute
1437        .parent()
1438        .map(|p| p.to_path_buf())
1439        .unwrap_or_else(|| std::path::PathBuf::from("."));
1440    let markers = [
1441        "Cargo.toml",
1442        "package.json",
1443        "go.mod",
1444        "pyproject.toml",
1445        "setup.py",
1446        "pom.xml",
1447        "build.gradle",
1448        "build.gradle.kts",
1449        ".git",
1450    ];
1451    let mut cursor: Option<&std::path::Path> = Some(&parent);
1452    while let Some(dir) = cursor {
1453        // Skip empty-path components: an empty PathBuf joins as a relative
1454        // CWD-rooted path which can falsely "exist" for markers that live
1455        // in CWD but not in the (non-existent) empty ancestor directory.
1456        if dir.as_os_str().is_empty() {
1457            cursor = dir.parent();
1458            continue;
1459        }
1460        for m in &markers {
1461            if dir.join(m).exists() {
1462                return dir.to_path_buf();
1463            }
1464        }
1465        cursor = dir.parent();
1466    }
1467    parent
1468}
1469
1470/// Return true if `edge_path` and `target_file` refer to the same file.
1471/// Compares canonicalized paths first; falls back to suffix / equality
1472/// match if canonicalization fails (e.g. relative paths from the call
1473/// graph against an absolute target).
1474fn paths_equivalent(edge_path: &std::path::Path, target_file: &std::path::Path) -> bool {
1475    if edge_path == target_file {
1476        return true;
1477    }
1478    let edge_canon = edge_path.canonicalize().ok();
1479    let target_canon = target_file.canonicalize().ok();
1480    if let (Some(a), Some(b)) = (edge_canon.as_ref(), target_canon.as_ref()) {
1481        if a == b {
1482            return true;
1483        }
1484    }
1485    // Fall back to suffix match in either direction (relative vs absolute).
1486    if edge_path.ends_with(target_file) || target_file.ends_with(edge_path) {
1487        return true;
1488    }
1489    false
1490}
1491
1492/// Strict last-segment compare for qualified names
1493/// (mirrors `tldr_core::analysis::impact::last_segment` so the explain
1494/// merge applies the same matching rules `impact` uses).
1495fn explain_last_segment(qualified: &str) -> &str {
1496    let dot_idx = qualified.rfind('.');
1497    let coloncolon_idx = qualified.rfind("::").map(|i| i + 1);
1498    let cut = match (dot_idx, coloncolon_idx) {
1499        (Some(d), Some(c)) => Some(d.max(c)),
1500        (Some(d), None) => Some(d),
1501        (None, Some(c)) => Some(c),
1502        (None, None) => None,
1503    };
1504    match cut {
1505        Some(i) if i < qualified.len() => &qualified[i + 1..],
1506        _ => qualified,
1507    }
1508}
1509
1510/// Two function names are equivalent when their last segments match, or
1511/// one is a qualified form of the other.
1512fn explain_names_match(candidate: &str, target: &str) -> bool {
1513    if candidate == target {
1514        return true;
1515    }
1516    if explain_last_segment(candidate) == target {
1517        return true;
1518    }
1519    let target_has_qualifier = target.contains('.') || target.contains("::");
1520    if target_has_qualifier {
1521        let target_tail = explain_last_segment(target);
1522        if candidate == target_tail {
1523            return true;
1524        }
1525        if explain_last_segment(candidate) == target_tail {
1526            return true;
1527        }
1528    }
1529    false
1530}
1531
1532/// Path-aware caller dedup: returns true if `report.callers` already
1533/// contains an entry equivalent to `(name, file)`. ux-and-explain-completeness-v1
1534/// (P12.AGG12-1): the previous string-equality check missed the relative-vs-
1535/// absolute path mismatch between the per-file walker (absolute) and the
1536/// project-graph (relative-to-root), causing duplicate `locate_app` callers
1537/// in `flask` (one with `line=0`, one with the real line number).
1538fn caller_already_present(
1539    callers: &[CallInfo],
1540    candidate_name: &str,
1541    candidate_file: &str,
1542) -> bool {
1543    let candidate_path = std::path::Path::new(candidate_file);
1544    callers.iter().any(|c| {
1545        if !names_match(&c.name, candidate_name) && !names_match(candidate_name, &c.name) {
1546            return false;
1547        }
1548        let existing_path = std::path::Path::new(&c.file);
1549        c.file == candidate_file || paths_equivalent(existing_path, candidate_path)
1550    })
1551}
1552
1553/// Path-aware callee dedup, mirroring `caller_already_present`.
1554fn callee_already_present(
1555    callees: &[CallInfo],
1556    candidate_name: &str,
1557    candidate_file: &str,
1558) -> bool {
1559    let candidate_path = std::path::Path::new(candidate_file);
1560    callees.iter().any(|c| {
1561        if !names_match(&c.name, candidate_name) && !names_match(candidate_name, &c.name) {
1562            return false;
1563        }
1564        if c.file == "<external>" {
1565            return true;
1566        }
1567        let existing_path = std::path::Path::new(&c.file);
1568        c.file == candidate_file || paths_equivalent(existing_path, candidate_path)
1569    })
1570}
1571
1572/// language-specific-bugs-v1 (P14.AGG14-16): given a caller file and the
1573/// caller-function name + target-function name, scan the file's source
1574/// looking for a call site to `target_function` inside the body of
1575/// `caller_function`. Returns the 1-indexed line of the first matching
1576/// call site, or `None` if no match is found.
1577///
1578/// Implementation: read file -> parse with the file's language ->
1579/// locate the function-shaped node whose name matches `caller_function`
1580/// -> walk its descendants for any call-shaped node whose callee
1581/// (extracted via `extract_call_name`) tail-matches `target_function`.
1582fn locate_call_in_caller_file(
1583    file: &std::path::Path,
1584    caller_function: &str,
1585    target_function: &str,
1586) -> Option<u32> {
1587    use std::fs;
1588    let language = Language::from_path(file)?;
1589    let source = fs::read_to_string(file).ok()?;
1590    let func_kinds = get_function_node_kinds(language);
1591    // Class node kinds — kept inline (a tiny static slice) to avoid
1592    // pulling in `interface.rs::class_node_kinds`, which is not pub.
1593    let class_kinds: &[&str] = &[
1594        "class_definition",
1595        "class_declaration",
1596        "interface_declaration",
1597        "struct_item",
1598        "enum_item",
1599        "trait_item",
1600        "impl_item",
1601        "class_specifier",
1602        "struct_specifier",
1603        "enum_declaration",
1604        "record_declaration",
1605        "object_declaration",
1606        "object_definition",
1607        "trait_definition",
1608        "protocol_declaration",
1609        "extension_declaration",
1610        "module",
1611    ];
1612
1613    let mut parser = get_parser(language).ok()?;
1614    let tree = parser.parse(&source, None)?;
1615    let source_bytes = source.as_bytes();
1616
1617    // Strip any class qualifier from `caller_function` for tail matching:
1618    // `OwnerController.processFindForm` -> `processFindForm`.
1619    let caller_tail = caller_function
1620        .rsplit('.')
1621        .next()
1622        .unwrap_or(caller_function);
1623    let target_tail = target_function
1624        .rsplit('.')
1625        .next()
1626        .unwrap_or(target_function);
1627
1628    fn descend<'a>(
1629        node: tree_sitter::Node<'a>,
1630        source: &[u8],
1631        func_kinds: &[&str],
1632        class_kinds: &[&str],
1633        caller_tail: &str,
1634        target_tail: &str,
1635        in_target_func: bool,
1636    ) -> Option<u32> {
1637        // When we enter a function node whose name matches caller_tail,
1638        // turn on `in_target_func` for the descent.
1639        let kind = node.kind();
1640        let is_func_decl = func_kinds.contains(&kind);
1641        let mut now_in = in_target_func;
1642        if is_func_decl {
1643            // Try to read this function's name. Reuse the same fallback
1644            // logic as `find_callers_in_file`: prefer the `name` field,
1645            // else the first identifier child.
1646            let mut name: Option<String> = None;
1647            if let Some(name_node) = node.child_by_field_name("name") {
1648                name = Some(node_text(name_node, source).to_string());
1649            } else {
1650                for child in node.children(&mut node.walk()) {
1651                    if matches!(child.kind(), "identifier" | "simple_identifier") {
1652                        name = Some(node_text(child, source).to_string());
1653                        break;
1654                    }
1655                }
1656            }
1657            if let Some(n) = name.as_deref() {
1658                if explain_names_match(n, caller_tail) || n == caller_tail {
1659                    now_in = true;
1660                }
1661            }
1662        }
1663
1664        // While inside the caller function, look for any call node whose
1665        // tail-name matches target_tail.
1666        if now_in {
1667            let is_call = matches!(
1668                kind,
1669                "call"
1670                    | "call_expression"
1671                    | "invocation_expression"
1672                    | "method_invocation"
1673                    | "function_call"
1674                    | "function_call_statement"
1675                    | "macro_invocation"
1676                    | "member_call_expression"
1677                    | "function_call_expression"
1678                    | "scoped_call_expression"
1679                    | "nullsafe_member_call_expression"
1680            );
1681            if is_call {
1682                if let Some(callee) = extract_call_name(node, source) {
1683                    let tail = callee.rsplit('.').next().unwrap_or(&callee);
1684                    if tail == target_tail {
1685                        return Some(node.start_position().row as u32 + 1);
1686                    }
1687                }
1688            }
1689        }
1690
1691        // Avoid descending into nested classes when we've already
1692        // matched the outer caller — but DO descend into nested
1693        // function definitions so closures/lambda bodies are searched.
1694        if class_kinds.contains(&kind) && now_in && !is_func_decl {
1695            // Don't descend into nested classes — they have their own
1696            // method scope.
1697            return None;
1698        }
1699
1700        for child in node.children(&mut node.walk()) {
1701            if let Some(line) = descend(
1702                child,
1703                source,
1704                func_kinds,
1705                class_kinds,
1706                caller_tail,
1707                target_tail,
1708                now_in,
1709            ) {
1710                return Some(line);
1711            }
1712        }
1713        None
1714    }
1715
1716    descend(
1717        tree.root_node(),
1718        source_bytes,
1719        func_kinds,
1720        class_kinds,
1721        caller_tail,
1722        target_tail,
1723        false,
1724    )
1725}
1726
1727/// Enrich `report.callers` and `report.callees` with cross-file results
1728/// derived from the project-wide call graph (`build_project_call_graph` /
1729/// `impact_analysis_with_ast_fallback`) — the same data source used by
1730/// `tldr impact`, `tldr references`, and `tldr context`. Same-file
1731/// results from the existing per-file walker are preserved; cross-file
1732/// callers/callees that the per-file walker cannot see by construction
1733/// are appended (deduplicated path-aware by `name+file`). Any failure
1734/// here is silently ignored so explain still returns its other fields
1735/// when the project graph cannot be built.
1736fn enrich_with_project_graph(
1737    report: &mut ExplainReport,
1738    file: &std::path::Path,
1739    function: &str,
1740    language: Language,
1741) {
1742    let project_root = explain_project_root(file);
1743    let graph = match build_project_call_graph(&project_root, language, None, true) {
1744        Ok(g) => g,
1745        Err(_) => return,
1746    };
1747
1748    // critical-regressions-v1 (P13.AGG13-2): when the user supplies a Swift
1749    // file that defines the function in an `extension Heap { ... }` (or a
1750    // nested type's extension), the Swift call-graph builder may attribute
1751    // the target's `dst_file` to the FIRST file it processed for that
1752    // class (e.g. `Heap.swift`), not the file where the method actually
1753    // lives (`Heap+UnsafeHandle.swift`). The strict `paths_equivalent`
1754    // filter then drops every real caller. Confirm whether the function
1755    // truly lives in the user-supplied file by AST scan; if so, accept
1756    // callers from any homonym target.
1757    let function_defined_in_file = function_is_defined_in_file(file, function, language);
1758    // Callers: use the same path `tldr impact` uses so the results agree.
1759    if let Ok(impact) = impact_analysis_with_ast_fallback(
1760        &graph,
1761        function,
1762        1, // direct callers only (consistent with the per-file walker)
1763        None,
1764        &project_root,
1765        language,
1766    ) {
1767        for tree in impact.targets.values() {
1768            // Only enrich when the target's file matches our subject file —
1769            // explain is per-function-per-file, so cross-file callers of a
1770            // homonym in a different file should not be merged in. The
1771            // `function_defined_in_file` escape hatch covers the Swift
1772            // extension case described above.
1773            if !paths_equivalent(&tree.file, file) && !function_defined_in_file {
1774                continue;
1775            }
1776            for caller in &tree.callers {
1777                let caller_file = caller.file.display().to_string();
1778                let caller_name = caller.function.clone();
1779                // Avoid self-references and duplicates.
1780                if explain_names_match(&caller_name, function)
1781                    && paths_equivalent(&caller.file, file)
1782                {
1783                    continue;
1784                }
1785                if caller_already_present(&report.callers, &caller_name, &caller_file) {
1786                    continue;
1787                }
1788                // language-specific-bugs-v1 (P14.AGG14-16): the call-graph
1789                // edge does not carry the source line of the callsite,
1790                // so the original code unconditionally pushed
1791                // `line: 0` — which made `tldr explain` agree with itself
1792                // on a bogus value across every Java/Kotlin/CSharp
1793                // caller. Resolve the line by scanning the caller file
1794                // for a callsite to `function` inside the named caller
1795                // function. Falls back to 0 only when no match is found.
1796                //
1797                // CallerTree's `file` is a project-relative path; resolve
1798                // it against the project root so `locate_call_in_caller_file`
1799                // can read the source.
1800                let abs_caller_file = if caller.file.is_absolute() {
1801                    caller.file.clone()
1802                } else {
1803                    project_root.join(&caller.file)
1804                };
1805                let line = locate_call_in_caller_file(
1806                    &abs_caller_file,
1807                    &caller_name,
1808                    function,
1809                )
1810                .unwrap_or(0);
1811                report
1812                    .callers
1813                    .push(CallInfo::new(caller_name, caller_file, line));
1814            }
1815        }
1816    }
1817
1818    // Callees: scan project edges for `src_func == function` defined in `file`.
1819    for edge in graph.edges() {
1820        if !explain_names_match(&edge.src_func, function) {
1821            continue;
1822        }
1823        if !paths_equivalent(&edge.src_file, file) {
1824            continue;
1825        }
1826        let dst_file = edge.dst_file.display().to_string();
1827        let dst_name = edge.dst_func.clone();
1828        // Skip self-recursion duplicates of the same target name.
1829        if explain_names_match(&dst_name, function)
1830            && paths_equivalent(&edge.dst_file, file)
1831        {
1832            continue;
1833        }
1834        if callee_already_present(&report.callees, &dst_name, &dst_file) {
1835            continue;
1836        }
1837        // language-specific-bugs-v1 (P14.AGG14-16): same line-recovery
1838        // approach as for callers. The call-graph edge does not carry
1839        // the source line of the callsite, so look it up by AST scan.
1840        // The caller here is `function` itself (the function we are
1841        // explaining); its file is `file`.
1842        let line = locate_call_in_caller_file(file, function, &dst_name).unwrap_or(0);
1843        // cross-cutting-and-clear-fix-bugs-v1 (P18.Pattern-B): line-aware
1844        // dedup. When `find_callees` already emitted a bare-name entry
1845        // for this call site (e.g. `trickleDownMin` at line 382),
1846        // adding the call-graph's qualified-name version
1847        // (`Heap.trickleDownMin` at the same line, possibly with a
1848        // relative-vs-absolute path mismatch the `paths_equivalent`
1849        // check missed) would re-introduce the Pattern-B bare+qualified
1850        // duplicate. Skip when an entry sharing the same line and the
1851        // same last-segment already exists.
1852        if line > 0 {
1853            let last_seg = dst_name
1854                .rsplit('.')
1855                .next()
1856                .unwrap_or(&dst_name)
1857                .to_string();
1858            if report.callees.iter().any(|c| {
1859                c.line == line
1860                    && c.name.rsplit('.').next().unwrap_or(&c.name) == last_seg
1861            }) {
1862                continue;
1863            }
1864        }
1865        report
1866            .callees
1867            .push(CallInfo::new(dst_name, dst_file, line));
1868    }
1869
1870    // sibling-resolver-gaps-v1 (P14.AGG14-14): the Swift call-graph
1871    // builder may attribute a callee's `dst_file` to a test file that
1872    // *uses* the method (e.g. `Tests/HeapTests/HeapTests.swift`) rather
1873    // than the file that *defines* it (e.g.
1874    // `Sources/HeapModule/Heap+UnsafeHandle.swift`). For each callee
1875    // whose attributed file does NOT define a matching function, search
1876    // the project for a definition file and rewrite `.file` to the
1877    // canonical definition. Skip if no unique definition is found
1878    // (preserves the original attribution as a best-effort fallback).
1879    if language == Language::Swift {
1880        // Build a unique candidate set from the call-graph edges, plus
1881        // any same-language file under the project root the walker
1882        // already visited. Resolve relative paths against `project_root`
1883        // so `function_is_defined_in_file` can call `extract_file`
1884        // successfully regardless of whether the call-graph emitted
1885        // relative or absolute paths.
1886        let resolve = |p: &std::path::Path| -> std::path::PathBuf {
1887            if p.is_absolute() {
1888                p.to_path_buf()
1889            } else {
1890                project_root.join(p)
1891            }
1892        };
1893        let mut cand_set: std::collections::HashSet<std::path::PathBuf> =
1894            std::collections::HashSet::new();
1895        for e in graph.edges() {
1896            cand_set.insert(resolve(&e.dst_file));
1897            cand_set.insert(resolve(&e.src_file));
1898        }
1899        let candidates: Vec<std::path::PathBuf> = cand_set.into_iter().collect();
1900        for callee in report.callees.iter_mut() {
1901            let attributed_file = resolve(std::path::Path::new(&callee.file));
1902            // If the attributed file already defines the callee, leave
1903            // it alone (the common, correct case).
1904            if function_is_defined_in_file(&attributed_file, &callee.name, language) {
1905                continue;
1906            }
1907            // Collect every project file (from the graph's edges) that
1908            // actually defines the callee, then prefer the
1909            // non-test-scope one.
1910            let mut def_files: Vec<std::path::PathBuf> = Vec::new();
1911            for cand in &candidates {
1912                if function_is_defined_in_file(cand, &callee.name, language) {
1913                    def_files.push(cand.clone());
1914                }
1915            }
1916            if def_files.is_empty() {
1917                continue;
1918            }
1919            // Prefer files whose path does NOT contain `/Tests/` or
1920            // `/test/` over those that do. This matches the convention
1921            // that swift-collections etc. keep production sources under
1922            // `Sources/` and tests under `Tests/`.
1923            def_files.sort_by_key(|p| {
1924                let s = p.to_string_lossy().to_lowercase();
1925                let is_test = s.contains("/tests/")
1926                    || s.contains("/test/")
1927                    || s.contains("test.swift")
1928                    || s.ends_with("tests.swift");
1929                if is_test {
1930                    1
1931                } else {
1932                    0
1933                }
1934            });
1935            if let Some(canonical) = def_files.first() {
1936                // Emit a project-relative path when possible (matches
1937                // the existing convention used by callees from the
1938                // call-graph edges) so downstream consumers see a
1939                // homogeneous shape.
1940                let display = canonical
1941                    .strip_prefix(&project_root)
1942                    .map(|p| p.display().to_string())
1943                    .unwrap_or_else(|_| canonical.display().to_string());
1944                callee.file = display;
1945            }
1946        }
1947    }
1948}
1949
1950/// Enrich `report.callers` using `find_references` for languages whose
1951/// project call graph misses cross-file caller edges (notably C# and
1952/// other class-heavy languages).
1953///
1954/// ux-and-explain-completeness-v1 (P12.AGG12-1): the call-graph builder
1955/// for some languages (CSharp, Kotlin, Scala, OCaml functor wrappers,
1956/// etc.) under-reports cross-file edges — `tldr references` finds calls
1957/// that `tldr impact` cannot. Mirror that same data source here so
1958/// `explain.callers` is non-empty whenever any reference of kind `call`
1959/// exists.
1960///
1961/// For each Call reference found, locate the enclosing function in the
1962/// caller file via `extract_file` (matches the surface used by
1963/// `enumerate_function_lines`). Skip self-references (call inside the
1964/// target function in the target file). Dedup path-aware against
1965/// existing entries.
1966fn enrich_with_references(
1967    report: &mut ExplainReport,
1968    file: &std::path::Path,
1969    function: &str,
1970    language: Language,
1971) {
1972    let project_root = explain_project_root(file);
1973    let mut options = ReferencesOptions::new();
1974    options.kinds = Some(vec![ReferenceKind::Call]);
1975    options.language = Some(language.as_str().to_string());
1976    options.limit = Some(500); // generous; explain doesn't need to return everything
1977
1978    let report_refs = match find_references(function, &project_root, &options) {
1979        Ok(r) => r,
1980        Err(_) => return,
1981    };
1982
1983    // Cache of caller-file -> Vec<(function_name, line_start, line_end)> so we
1984    // don't re-parse the same file repeatedly when multiple call sites
1985    // share an enclosing function file.
1986    use std::collections::HashMap;
1987    let mut file_funcs_cache: HashMap<std::path::PathBuf, Vec<(String, u32, u32)>> = HashMap::new();
1988
1989    for r in &report_refs.references {
1990        push_caller_from_reference(report, file, function, r, &mut file_funcs_cache);
1991    }
1992
1993    // critical-regressions-v1 (P13.AGG13-12): Lua's cross-module-alias call
1994    // graph does not always resolve `<alias>.<method>(...)` to the
1995    // matching `function m.<method>` definition (the `m.reset` case
1996    // happened to resolve via the call-graph but `m.open` did not — see
1997    // audit cell). Augment by querying references for the bare method
1998    // name and accepting only Call hits whose context contains `\.<method>(`,
1999    // i.e. truly a method invocation through an alias. This is per-language
2000    // because other languages' references are already covered by the
2001    // primary call-graph path.
2002    if matches!(language, Language::Lua | Language::Luau) {
2003        if let Some(bare) = function.split('.').next_back() {
2004            if bare != function && !bare.is_empty() {
2005                let mut bare_options = ReferencesOptions::new();
2006                bare_options.kinds = Some(vec![ReferenceKind::Call]);
2007                bare_options.language = Some(language.as_str().to_string());
2008                bare_options.limit = Some(500);
2009                if let Ok(bare_refs) = find_references(bare, &project_root, &bare_options) {
2010                    let dot_pat = format!(".{}(", bare);
2011                    let space_pat = format!(".{} (", bare);
2012                    for r in &bare_refs.references {
2013                        // Filter: context must look like `<receiver>.<bare>(`
2014                        // — not a bare `bare(...)` call. Avoid promoting
2015                        // genuine homonym references on unrelated scopes.
2016                        if !r.context.contains(&dot_pat) && !r.context.contains(&space_pat) {
2017                            continue;
2018                        }
2019                        push_caller_from_reference(
2020                            report,
2021                            file,
2022                            function,
2023                            r,
2024                            &mut file_funcs_cache,
2025                        );
2026                    }
2027                }
2028            }
2029        }
2030    }
2031}
2032
2033/// Helper used by both the primary references walk and the Lua bare-name
2034/// enrichment to convert a single `Reference` into a caller entry on
2035/// `report.callers`.
2036fn push_caller_from_reference(
2037    report: &mut ExplainReport,
2038    file: &std::path::Path,
2039    function: &str,
2040    r: &tldr_core::analysis::references::Reference,
2041    file_funcs_cache: &mut std::collections::HashMap<std::path::PathBuf, Vec<(String, u32, u32)>>,
2042) {
2043    let ref_path = &r.file;
2044    let funcs = file_funcs_cache
2045        .entry(ref_path.clone())
2046        .or_insert_with(|| collect_functions_with_bounds(ref_path));
2047    let enclosing = funcs
2048        .iter()
2049        .find(|(_, start, end)| {
2050            let line = r.line as u32;
2051            line >= *start && (*end == 0 || line <= *end)
2052        })
2053        .map(|(name, _, _)| name.clone());
2054
2055    let caller_name = match enclosing {
2056        Some(n) => n,
2057        None => "<module>".to_string(),
2058    };
2059    let caller_file = ref_path.display().to_string();
2060
2061    if explain_names_match(&caller_name, function) && paths_equivalent(ref_path, file) {
2062        return;
2063    }
2064    if caller_already_present(&report.callers, &caller_name, &caller_file) {
2065        return;
2066    }
2067    report
2068        .callers
2069        .push(CallInfo::new(caller_name, caller_file, r.line as u32));
2070}
2071
2072/// critical-regressions-v1 (P13.AGG13-2): does `file` define a function whose
2073/// (bare or class-qualified) name matches `function`? Used by
2074/// `enrich_with_project_graph` to confirm a Swift extension's actual
2075/// owning file when impact's `tree.file` points at a sibling extension.
2076fn function_is_defined_in_file(
2077    file: &std::path::Path,
2078    function: &str,
2079    _language: Language,
2080) -> bool {
2081    let module = match tldr_core::extract_file(file, None) {
2082        Ok(m) => m,
2083        Err(_) => return false,
2084    };
2085    let target_tail = explain_last_segment(function);
2086    for f in &module.functions {
2087        if f.name == function || f.name == target_tail {
2088            return true;
2089        }
2090    }
2091    for class in &module.classes {
2092        for m in &class.methods {
2093            if m.name == function || m.name == target_tail {
2094                return true;
2095            }
2096            let qualified = format!("{}.{}", class.name, m.name);
2097            if qualified == function || explain_last_segment(&qualified) == target_tail {
2098                return true;
2099            }
2100        }
2101    }
2102    false
2103}
2104
2105/// Collect `(function_name, line_start, line_end)` triples for every
2106/// top-level function and method in `file`. Returns an empty Vec if the
2107/// file fails to parse — callers tolerate this by attributing call
2108/// sites to `<module>`.
2109fn collect_functions_with_bounds(file: &std::path::Path) -> Vec<(String, u32, u32)> {
2110    let module = match tldr_core::extract_file(file, None) {
2111        Ok(m) => m,
2112        Err(_) => return Vec::new(),
2113    };
2114    let mut out: Vec<(String, u32, u32)> = Vec::new();
2115    for f in &module.functions {
2116        out.push((f.name.clone(), f.line_number, f.line_end));
2117    }
2118    for class in &module.classes {
2119        for m in &class.methods {
2120            // Index both the bare method name and the qualified Class.method
2121            // form so `find` can match either shape from the call-graph /
2122            // references emitter.
2123            out.push((m.name.clone(), m.line_number, m.line_end));
2124            out.push((
2125                format!("{}.{}", class.name, m.name),
2126                m.line_number,
2127                m.line_end,
2128            ));
2129        }
2130    }
2131    // Sort so the most-specific (innermost) function comes first when
2132    // multiple bounds contain the same line — by ascending line_start
2133    // descending end, but in practice find() returns first match so we
2134    // sort by descending line_start (innermost wins).
2135    out.sort_by(|a, b| b.1.cmp(&a.1));
2136    out
2137}
2138
2139// =============================================================================
2140// Entry Point
2141// =============================================================================
2142
2143impl ExplainArgs {
2144    /// Run the explain command
2145    pub fn run(&self, format: OutputFormat, quiet: bool) -> Result<()> {
2146        let writer = OutputWriter::new(format, quiet);
2147
2148        writer.progress(&format!(
2149            "Analyzing function {} in {}...",
2150            self.function,
2151            self.file.display()
2152        ));
2153
2154        // Check file exists
2155        if !self.file.exists() {
2156            return Err(RemainingError::file_not_found(&self.file).into());
2157        }
2158
2159        // Detect language from file extension
2160        let language = Language::from_path(&self.file)
2161            .ok_or_else(|| RemainingError::parse_error(&self.file, "Unsupported language"))?;
2162
2163        // Get function node kinds for this language
2164        let func_kinds = get_function_node_kinds(language);
2165
2166        // Read source
2167        let source = std::fs::read_to_string(&self.file)
2168            .map_err(|e| RemainingError::parse_error(&self.file, e.to_string()))?;
2169        let source_bytes = source.as_bytes();
2170
2171        // Parse with tree-sitter
2172        let mut parser = get_parser(language)?;
2173        let tree = parser
2174            .parse(&source, None)
2175            .ok_or_else(|| RemainingError::parse_error(&self.file, "Failed to parse file"))?;
2176
2177        let root = tree.root_node();
2178
2179        // Find the function. ux-and-explain-completeness-v1 (P12.AGG12-1):
2180        // delegate to the canonical `tldr_core::ast::function_finder::find_function_node`
2181        // first — it covers cross-language patterns (Lua/Luau dot-indexed
2182        // `function m.reset()`, JS arrow / object pair / assignment forms,
2183        // qualified `Class.method`, etc.) that the local explain walker
2184        // historically missed. Fall back to the local walker only on canonical
2185        // failure to preserve any pattern the canonical impl doesn't handle yet.
2186        let canonical_node = tldr_core::ast::function_finder::find_function_node(
2187            root,
2188            &self.function,
2189            language,
2190            &source,
2191        );
2192        let func_node = canonical_node
2193            .or_else(|| find_function_node(root, source_bytes, &self.function, func_kinds))
2194            .ok_or_else(|| RemainingError::symbol_not_found(&self.function, &self.file))?;
2195
2196        // Get file path string
2197        let file_path = self.file.to_string_lossy().to_string();
2198
2199        // Get language name for report
2200        let language_name = match language {
2201            Language::Python => "python",
2202            Language::TypeScript => "typescript",
2203            Language::JavaScript => "javascript",
2204            Language::Go => "go",
2205            Language::Rust => "rust",
2206            Language::Java => "java",
2207            Language::C => "c",
2208            Language::Cpp => "cpp",
2209            Language::CSharp => "csharp",
2210            Language::Kotlin => "kotlin",
2211            Language::Scala => "scala",
2212            Language::Php => "php",
2213            Language::Ruby => "ruby",
2214            Language::Lua => "lua",
2215            Language::Luau => "luau",
2216            Language::Elixir => "elixir",
2217            Language::Ocaml => "ocaml",
2218            Language::Swift => "swift",
2219        };
2220
2221        // Build report
2222        let mut report = ExplainReport::new(
2223            &self.function,
2224            &file_path,
2225            get_line_number(func_node),
2226            get_end_line_number(func_node),
2227            language_name,
2228        );
2229
2230        // Extract signature
2231        report.signature = extract_signature(func_node, source_bytes, language);
2232
2233        // Analyze purity
2234        report.purity = analyze_purity(func_node, source_bytes);
2235
2236        // Compute complexity. Local walker fills `num_blocks`, `num_edges`,
2237        // and `has_loops`; cyclomatic is then overwritten with the canonical
2238        // value from `tldr_core::calculate_complexity` so `tldr explain` and
2239        // `tldr complexity` always agree (cross-command-consistency-v3
2240        // P5.BUG-N2). Falling back to the local cyclomatic only on canonical
2241        // failure preserves explain output for files that the canonical path
2242        // cannot find the function in (e.g. nested-class disambiguation
2243        // edge cases).
2244        let mut complexity_info = compute_complexity(func_node);
2245        if let Ok(canonical) = tldr_core::calculate_complexity(
2246            self.file.to_str().unwrap_or_default(),
2247            &self.function,
2248            language,
2249        ) {
2250            complexity_info.cyclomatic = canonical.cyclomatic;
2251        }
2252        report.complexity = Some(complexity_info);
2253
2254        // Collect local function names for call graph analysis
2255        let local_functions = collect_function_names(root, source_bytes, func_kinds);
2256
2257        // Find callees
2258        report.callees = find_callees(func_node, source_bytes, &file_path, &local_functions);
2259
2260        // Find callers
2261        report.callers = find_callers(root, source_bytes, &self.function, &file_path, func_kinds);
2262
2263        // explain-cross-command-consistency-v1 (P11.BUG-AGG-1): the
2264        // per-file walker above only sees callers/callees defined in the
2265        // same source file. Enrich with cross-file results from the
2266        // project-wide call graph used by `tldr impact` /
2267        // `tldr references` / `tldr context` so the four commands agree
2268        // on relationships. Same-file results are preserved; only
2269        // additional cross-file edges get appended.
2270        enrich_with_project_graph(&mut report, &self.file, &self.function, language);
2271
2272        // ux-and-explain-completeness-v1 (P12.AGG12-1): some languages
2273        // under-report call edges in the project call graph (e.g. C#,
2274        // Kotlin, Scala class-method invocations). For those, `tldr
2275        // references` still surfaces real call sites via text+AST
2276        // verification. Mirror that data source so explain's caller list
2277        // matches the "real" set users see from `tldr references`.
2278        // Path-aware dedup means same-file walker results and
2279        // call-graph results that already populated the list won't be
2280        // duplicated.
2281        enrich_with_references(&mut report, &self.file, &self.function, language);
2282
2283        // Output based on format
2284        if writer.is_text() {
2285            let text = format_explain_text(&report);
2286            writer.write_text(&text)?;
2287        } else {
2288            writer.write(&report)?;
2289        }
2290
2291        // Write to output file if specified
2292        if let Some(ref output_path) = self.output {
2293            let output_str = if format == OutputFormat::Text {
2294                format_explain_text(&report)
2295            } else {
2296                serde_json::to_string_pretty(&report)?
2297            };
2298            std::fs::write(output_path, &output_str)?;
2299        }
2300
2301        Ok(())
2302    }
2303}
2304
2305// =============================================================================
2306// Tests
2307// =============================================================================
2308
2309#[cfg(test)]
2310mod tests {
2311    use super::*;
2312
2313    const SAMPLE_CODE: &str = r#"
2314def calculate_total(items: list[dict], tax_rate: float = 0.1) -> float:
2315    """Calculate total price with tax.
2316
2317    Args:
2318        items: List of items with 'price' key
2319        tax_rate: Tax rate as decimal (default 10%)
2320
2321    Returns:
2322        Total price including tax
2323    """
2324    subtotal = sum(item['price'] for item in items)
2325    return subtotal * (1 + tax_rate)
2326
2327def helper_function(x):
2328    return x * 2
2329
2330def main():
2331    items = [{'price': 10}, {'price': 20}]
2332    total = calculate_total(items)
2333    doubled = helper_function(total)
2334    print(doubled)
2335"#;
2336
2337    #[test]
2338    fn test_find_function() {
2339        let language = Language::Python;
2340        let func_kinds = get_function_node_kinds(language);
2341        let mut parser = get_parser(language).unwrap();
2342        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
2343        let root = tree.root_node();
2344
2345        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "calculate_total", func_kinds);
2346        assert!(func.is_some());
2347
2348        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "nonexistent", func_kinds);
2349        assert!(func.is_none());
2350    }
2351
2352    #[test]
2353    fn test_extract_signature() {
2354        let language = Language::Python;
2355        let func_kinds = get_function_node_kinds(language);
2356        let mut parser = get_parser(language).unwrap();
2357        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
2358        let root = tree.root_node();
2359
2360        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "calculate_total", func_kinds)
2361            .unwrap();
2362        let sig = extract_signature(func, SAMPLE_CODE.as_bytes(), language);
2363
2364        assert_eq!(sig.params.len(), 2);
2365        assert_eq!(sig.params[0].name, "items");
2366        assert_eq!(sig.params[1].name, "tax_rate");
2367        assert!(sig.return_type.is_some());
2368        assert!(sig.docstring.is_some());
2369    }
2370
2371    #[test]
2372    fn test_purity_analysis() {
2373        let language = Language::Python;
2374        let func_kinds = get_function_node_kinds(language);
2375        let mut parser = get_parser(language).unwrap();
2376        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
2377        let root = tree.root_node();
2378
2379        // calculate_total should be pure
2380        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "calculate_total", func_kinds)
2381            .unwrap();
2382        let purity = analyze_purity(func, SAMPLE_CODE.as_bytes());
2383        assert_eq!(purity.classification, "pure");
2384
2385        // main calls print, so impure
2386        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "main", func_kinds).unwrap();
2387        let purity = analyze_purity(func, SAMPLE_CODE.as_bytes());
2388        assert_eq!(purity.classification, "impure");
2389        assert!(purity.effects.contains(&"io".to_string()));
2390    }
2391
2392    #[test]
2393    fn test_complexity_analysis() {
2394        let code = r#"
2395def complex_func(x, y):
2396    if x > 0:
2397        if y > 0:
2398            return x + y
2399        else:
2400            return x
2401    else:
2402        for i in range(10):
2403            x += i
2404        return x
2405"#;
2406        let language = Language::Python;
2407        let func_kinds = get_function_node_kinds(language);
2408        let mut parser = get_parser(language).unwrap();
2409        let tree = parser.parse(code, None).unwrap();
2410        let root = tree.root_node();
2411
2412        let func = find_function_node(root, code.as_bytes(), "complex_func", func_kinds).unwrap();
2413        let cx = compute_complexity(func);
2414
2415        assert!(cx.cyclomatic > 1);
2416        assert!(cx.has_loops);
2417    }
2418
2419    #[test]
2420    fn test_find_callees() {
2421        let language = Language::Python;
2422        let func_kinds = get_function_node_kinds(language);
2423        let mut parser = get_parser(language).unwrap();
2424        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
2425        let root = tree.root_node();
2426
2427        let local_funcs = collect_function_names(root, SAMPLE_CODE.as_bytes(), func_kinds);
2428        let func = find_function_node(root, SAMPLE_CODE.as_bytes(), "main", func_kinds).unwrap();
2429        let callees = find_callees(func, SAMPLE_CODE.as_bytes(), "test.py", &local_funcs);
2430
2431        assert!(callees.iter().any(|c| c.name == "calculate_total"));
2432        assert!(callees.iter().any(|c| c.name == "helper_function"));
2433    }
2434
2435    #[test]
2436    fn test_find_callers() {
2437        let language = Language::Python;
2438        let func_kinds = get_function_node_kinds(language);
2439        let mut parser = get_parser(language).unwrap();
2440        let tree = parser.parse(SAMPLE_CODE, None).unwrap();
2441        let root = tree.root_node();
2442
2443        let callers = find_callers(
2444            root,
2445            SAMPLE_CODE.as_bytes(),
2446            "calculate_total",
2447            "test.py",
2448            func_kinds,
2449        );
2450        assert!(callers.iter().any(|c| c.name == "main"));
2451    }
2452
2453    #[test]
2454    fn test_find_ts_arrow_function() {
2455        let ts_source = r#"
2456const getDuration = (start: Date, end: Date): number => {
2457    return end.getTime() - start.getTime();
2458};
2459
2460function regularFunc(x: number): number {
2461    return x * 2;
2462}
2463
2464export const processItems = (items: string[]) => {
2465    return items.map(i => i.trim());
2466};
2467"#;
2468        let language = Language::TypeScript;
2469        let func_kinds = get_function_node_kinds(language);
2470        let mut parser = get_parser(language).unwrap();
2471        let tree = parser.parse(ts_source, None).unwrap();
2472        let root = tree.root_node();
2473
2474        // Regular function should always work
2475        let regular = find_function_node(root, ts_source.as_bytes(), "regularFunc", func_kinds);
2476        assert!(regular.is_some(), "Should find regular TS function");
2477
2478        // Arrow function assigned to const should also work
2479        let arrow = find_function_node(root, ts_source.as_bytes(), "getDuration", func_kinds);
2480        assert!(
2481            arrow.is_some(),
2482            "Should find TS arrow function 'getDuration'"
2483        );
2484
2485        // Exported arrow function should also work
2486        let exported = find_function_node(root, ts_source.as_bytes(), "processItems", func_kinds);
2487        assert!(
2488            exported.is_some(),
2489            "Should find exported TS arrow function 'processItems'"
2490        );
2491    }
2492
2493    // =========================================================================
2494    // Bug: analyze_purity returns "pure" when it should return "unknown"
2495    // =========================================================================
2496
2497    /// A function with no function body content (empty/pass) should classify
2498    /// as "unknown", not "pure". We have no evidence of purity -- the analysis
2499    /// simply found nothing.
2500    #[test]
2501    fn test_empty_function_is_unknown_not_pure() {
2502        let source = r#"
2503def empty_func():
2504    pass
2505"#;
2506        let language = Language::Python;
2507        let func_kinds = get_function_node_kinds(language);
2508        let mut parser = get_parser(language).unwrap();
2509        let tree = parser.parse(source, None).unwrap();
2510        let root = tree.root_node();
2511
2512        let func_node = find_function_node(root, source.as_bytes(), "empty_func", func_kinds);
2513        assert!(func_node.is_some(), "Should find empty_func");
2514
2515        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
2516
2517        // The buggy code returns "pure" because no effects and no unknown calls.
2518        // But "pass" means we found nothing -- not that we proved purity.
2519        // A truly empty function (just `pass`) has no evidence to support "pure".
2520        assert_ne!(
2521            purity.classification, "pure",
2522            "A function with only `pass` (no calls, no computation) should NOT be classified as \
2523             'pure' with high confidence. We have no evidence to support a purity claim. \
2524             Got classification='{}', confidence='{}'. Expected 'unknown'.",
2525            purity.classification, purity.confidence
2526        );
2527    }
2528
2529    /// A function that calls other user-defined functions (not builtins, not IO)
2530    /// where those calls are unresolved should classify as "unknown", not "pure".
2531    ///
2532    /// The bug: when a call doesn't match IO_OPERATIONS, IMPURE_CALLS,
2533    /// COLLECTION_MUTATIONS, or PURE_BUILTINS, it sets has_unknown_calls=true.
2534    /// This case is actually handled correctly for unknown calls, BUT if the
2535    /// call name happens to match a PURE_BUILTIN substring, it incorrectly
2536    /// passes as pure. This test verifies the general "unknown calls" path works.
2537    #[test]
2538    fn test_function_with_unknown_calls_is_unknown() {
2539        let source = r#"
2540def my_func(x):
2541    result = compute_something(x)
2542    return transform_result(result)
2543"#;
2544        let language = Language::Python;
2545        let func_kinds = get_function_node_kinds(language);
2546        let mut parser = get_parser(language).unwrap();
2547        let tree = parser.parse(source, None).unwrap();
2548        let root = tree.root_node();
2549
2550        let func_node = find_function_node(root, source.as_bytes(), "my_func", func_kinds);
2551        assert!(func_node.is_some(), "Should find my_func");
2552
2553        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
2554
2555        // compute_something and transform_result are NOT in PURE_BUILTINS,
2556        // so has_unknown_calls should be true -> classification = "unknown"
2557        assert_eq!(
2558            purity.classification, "unknown",
2559            "Function calling unknown user functions should be 'unknown', got '{}'",
2560            purity.classification
2561        );
2562        assert_ne!(
2563            purity.confidence, "high",
2564            "Unknown classification should not have high confidence, got '{}'",
2565            purity.confidence
2566        );
2567    }
2568
2569    /// A function that ONLY calls known-pure builtins should classify as "pure".
2570    /// This is the legitimate pure case.
2571    #[test]
2572    fn test_only_pure_builtins_is_pure() {
2573        let source = r#"
2574def pure_func(items):
2575    return len(items) + sum(items)
2576"#;
2577        let language = Language::Python;
2578        let func_kinds = get_function_node_kinds(language);
2579        let mut parser = get_parser(language).unwrap();
2580        let tree = parser.parse(source, None).unwrap();
2581        let root = tree.root_node();
2582
2583        let func_node = find_function_node(root, source.as_bytes(), "pure_func", func_kinds);
2584        assert!(func_node.is_some(), "Should find pure_func");
2585
2586        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
2587
2588        assert_eq!(
2589            purity.classification, "pure",
2590            "Function calling only pure builtins (len, sum) should be 'pure', got '{}'",
2591            purity.classification
2592        );
2593        assert_eq!(
2594            purity.confidence, "high",
2595            "Pure classification should have high confidence"
2596        );
2597    }
2598
2599    /// A function with IO operations should classify as "impure".
2600    #[test]
2601    fn test_io_operations_is_impure() {
2602        let source = r#"
2603def impure_func(msg):
2604    print(msg)
2605    return True
2606"#;
2607        let language = Language::Python;
2608        let func_kinds = get_function_node_kinds(language);
2609        let mut parser = get_parser(language).unwrap();
2610        let tree = parser.parse(source, None).unwrap();
2611        let root = tree.root_node();
2612
2613        let func_node = find_function_node(root, source.as_bytes(), "impure_func", func_kinds);
2614        assert!(func_node.is_some(), "Should find impure_func");
2615
2616        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
2617
2618        assert_eq!(
2619            purity.classification, "impure",
2620            "Function with print() should be 'impure', got '{}'",
2621            purity.classification
2622        );
2623        assert_eq!(
2624            purity.confidence, "high",
2625            "Impure classification should have high confidence"
2626        );
2627        assert!(
2628            purity.effects.contains(&"io".to_string()),
2629            "Effects should contain 'io', got {:?}",
2630            purity.effects
2631        );
2632    }
2633
2634    /// A function with only arithmetic (no calls at all) should be "unknown"
2635    /// because we have no positive evidence of purity -- the analysis simply
2636    /// didn't find any calls to classify.
2637    #[test]
2638    fn test_no_calls_arithmetic_only_is_unknown() {
2639        let source = r#"
2640def add(a, b):
2641    return a + b
2642"#;
2643        let language = Language::Python;
2644        let func_kinds = get_function_node_kinds(language);
2645        let mut parser = get_parser(language).unwrap();
2646        let tree = parser.parse(source, None).unwrap();
2647        let root = tree.root_node();
2648
2649        let func_node = find_function_node(root, source.as_bytes(), "add", func_kinds);
2650        assert!(func_node.is_some(), "Should find add");
2651
2652        let purity = analyze_purity(func_node.unwrap(), source.as_bytes());
2653
2654        // The bug: analyze_purity returns "pure" because no effects and
2655        // no unknown calls. But we have no positive evidence -- we just
2656        // didn't find any calls. The correct answer is "unknown" with
2657        // low confidence, or at minimum not "pure/high".
2658        assert_ne!(
2659            purity.classification, "pure",
2660            "A simple arithmetic function with no calls should NOT confidently be 'pure'. \
2661             The analysis found no calls to evaluate -- absence of evidence is not evidence \
2662             of purity. Got classification='{}', confidence='{}'. \
2663             Expected 'unknown' since no calls were analyzed.",
2664            purity.classification, purity.confidence
2665        );
2666    }
2667}