Skip to main content

tldr_cli/commands/remaining/
definition.rs

1//! Definition command - Go-to-definition functionality
2//!
3//! Finds where a symbol is defined in the codebase.
4//! Supports both position-based and name-based lookup.
5//!
6//! # Example
7//!
8//! ```bash
9//! # Position-based: find definition of symbol at line 10, column 5
10//! tldr definition src/main.py 10 5
11//!
12//! # Name-based: find definition by symbol name
13//! tldr definition --symbol MyClass --file src/main.py
14//!
15//! # Cross-file resolution with project context
16//! tldr definition --symbol helper --file src/main.py --project .
17//! ```
18
19use std::collections::HashSet;
20use std::fs;
21use std::path::{Path, PathBuf};
22
23use anyhow::Result;
24use clap::Args;
25use tree_sitter::Node;
26
27use super::error::{RemainingError, RemainingResult};
28use super::types::{DefinitionResult, Location, SymbolInfo, SymbolKind};
29use crate::output::OutputWriter;
30
31use tldr_core::ast::parser::PARSER_POOL;
32use tldr_core::callgraph::cross_file_types::{ClassDef, FuncDef};
33use tldr_core::callgraph::languages::LanguageRegistry;
34use tldr_core::Language;
35
36// =============================================================================
37// Constants
38// =============================================================================
39
40/// Maximum depth for import resolution to prevent cycles
41const MAX_IMPORT_DEPTH: usize = 10;
42
43/// Python built-in functions
44const PYTHON_BUILTINS: &[&str] = &[
45    "abs",
46    "aiter",
47    "all",
48    "any",
49    "anext",
50    "ascii",
51    "bin",
52    "bool",
53    "breakpoint",
54    "bytearray",
55    "bytes",
56    "callable",
57    "chr",
58    "classmethod",
59    "compile",
60    "complex",
61    "delattr",
62    "dict",
63    "dir",
64    "divmod",
65    "enumerate",
66    "eval",
67    "exec",
68    "filter",
69    "float",
70    "format",
71    "frozenset",
72    "getattr",
73    "globals",
74    "hasattr",
75    "hash",
76    "help",
77    "hex",
78    "id",
79    "input",
80    "int",
81    "isinstance",
82    "issubclass",
83    "iter",
84    "len",
85    "list",
86    "locals",
87    "map",
88    "max",
89    "memoryview",
90    "min",
91    "next",
92    "object",
93    "oct",
94    "open",
95    "ord",
96    "pow",
97    "print",
98    "property",
99    "range",
100    "repr",
101    "reversed",
102    "round",
103    "set",
104    "setattr",
105    "slice",
106    "sorted",
107    "staticmethod",
108    "str",
109    "sum",
110    "super",
111    "tuple",
112    "type",
113    "vars",
114    "zip",
115    "__import__",
116];
117
118// =============================================================================
119// Graph Utils (TIGER-02 Mitigation)
120// =============================================================================
121
122/// Tracks visited nodes to detect cycles during import resolution
123pub struct DefinitionCycleDetector {
124    visited: HashSet<(PathBuf, String)>,
125}
126
127impl DefinitionCycleDetector {
128    /// Create a new cycle detector
129    pub fn new() -> Self {
130        Self {
131            visited: HashSet::new(),
132        }
133    }
134
135    /// Visit a (file, symbol) pair. Returns true if already visited (cycle detected).
136    pub fn visit(&mut self, file: &Path, symbol: &str) -> bool {
137        let key = (file.to_path_buf(), symbol.to_string());
138        !self.visited.insert(key)
139    }
140}
141
142impl Default for DefinitionCycleDetector {
143    fn default() -> Self {
144        Self::new()
145    }
146}
147
148// =============================================================================
149// CLI Arguments
150// =============================================================================
151
152/// Find symbol definition (go-to-definition)
153///
154/// Supports two modes:
155/// 1. Position-based: Find symbol at file:line:column and jump to its definition
156/// 2. Name-based: Find definition of a named symbol using --symbol and --file
157///
158/// # Example
159///
160/// ```bash
161/// # Position mode
162/// tldr definition src/main.py 10 5
163///
164/// # Name mode
165/// tldr definition --symbol MyClass --file src/main.py
166/// ```
167#[derive(Debug, Args)]
168pub struct DefinitionArgs {
169    /// Source file (positional, for position-based lookup)
170    pub file: Option<PathBuf>,
171
172    /// line number (1-indexed, for position-based lookup)
173    pub line: Option<u32>,
174
175    /// column number (0-indexed, for position-based lookup)
176    pub column: Option<u32>,
177
178    /// Find symbol by name instead of position
179    #[arg(long)]
180    pub symbol: Option<String>,
181
182    /// File to search in (used with --symbol)
183    #[arg(long = "file", name = "target_file")]
184    pub target_file: Option<PathBuf>,
185
186    /// Project root for cross-file resolution
187    #[arg(long)]
188    pub project: Option<PathBuf>,
189
190    /// Output file (optional, stdout if not specified)
191    #[arg(long, short = 'O')]
192    pub output: Option<PathBuf>,
193}
194
195impl DefinitionArgs {
196    /// Run the definition command
197    pub fn run(
198        &self,
199        format: crate::output::OutputFormat,
200        quiet: bool,
201        lang: Option<Language>,
202    ) -> Result<()> {
203        let writer = OutputWriter::new(format, quiet);
204
205        // Convert language option to string hint
206        let lang_hint = match lang {
207            Some(l) => format!("{:?}", l).to_lowercase(),
208            None => "auto".to_string(),
209        };
210
211        // Determine which mode we're in
212        let result = if let Some(ref symbol_name) = self.symbol {
213            // Name-based mode - require --file
214            let file = self.target_file.as_ref().ok_or_else(|| {
215                RemainingError::invalid_argument("--file is required with --symbol")
216            })?;
217
218            writer.progress(&format!(
219                "Finding definition of '{}' in {}...",
220                symbol_name,
221                file.display()
222            ));
223
224            find_definition_by_name(symbol_name, file, self.project.as_deref(), &lang_hint)?
225        } else {
226            // Position-based mode
227            let file = self
228                .file
229                .as_ref()
230                .ok_or_else(|| RemainingError::invalid_argument("file argument is required"))?;
231            let line = self
232                .line
233                .ok_or_else(|| RemainingError::invalid_argument("line argument is required"))?;
234            let column = self
235                .column
236                .ok_or_else(|| RemainingError::invalid_argument("column argument is required"))?;
237
238            writer.progress(&format!(
239                "Finding definition at {}:{}:{}...",
240                file.display(),
241                line,
242                column
243            ));
244
245            match find_definition_by_position(
246                file,
247                line,
248                column,
249                self.project.as_deref(),
250                &lang_hint,
251            ) {
252                Ok(result) => result,
253                Err(_) => {
254                    // Return a graceful "not found" result instead of failing
255                    DefinitionResult {
256                        symbol: SymbolInfo {
257                            name: format!("<unknown at {}:{}:{}>", file.display(), line, column),
258                            kind: SymbolKind::Variable,
259                            location: Some(Location::with_column(
260                                file.display().to_string(),
261                                line,
262                                column,
263                            )),
264                            type_annotation: None,
265                            docstring: None,
266                            is_builtin: false,
267                            module: None,
268                        },
269                        definition: None,
270                        type_definition: None,
271                    }
272                }
273            }
274        };
275
276        // Determine output format
277        let use_text = format == crate::output::OutputFormat::Text;
278
279        // Write output
280        if let Some(ref output_path) = self.output {
281            if use_text {
282                let text = format_definition_text(&result);
283                fs::write(output_path, text)?;
284            } else {
285                let json = serde_json::to_string_pretty(&result)?;
286                fs::write(output_path, json)?;
287            }
288        } else if use_text {
289            let text = format_definition_text(&result);
290            writer.write_text(&text)?;
291        } else {
292            writer.write(&result)?;
293        }
294
295        Ok(())
296    }
297}
298
299// =============================================================================
300// Core Functions
301// =============================================================================
302
303/// Find definition by symbol name
304pub fn find_definition_by_name(
305    symbol: &str,
306    file: &Path,
307    project: Option<&Path>,
308    lang_hint: &str,
309) -> RemainingResult<DefinitionResult> {
310    // Validate file exists
311    if !file.exists() {
312        return Err(RemainingError::file_not_found(file));
313    }
314
315    // Detect language. Returns UnsupportedLanguage for genuinely unknown
316    // extensions; the supported set covers all 18 TLDR languages (VAL-015).
317    let language = detect_language(file, lang_hint)?;
318
319    // Python builtins still surface as a builtin definition with no
320    // location — every other language goes straight to source resolution.
321    if is_builtin(symbol, &language) {
322        return Ok(DefinitionResult {
323            symbol: SymbolInfo {
324                name: symbol.to_string(),
325                kind: SymbolKind::Function,
326                location: None,
327                type_annotation: None,
328                docstring: None,
329                is_builtin: true,
330                module: Some("builtins".to_string()),
331            },
332            definition: None,
333            type_definition: None,
334        });
335    }
336
337    // Read and parse file
338    let source = fs::read_to_string(file).map_err(RemainingError::Io)?;
339
340    // Try to find the symbol in this file first
341    if let Some(result) = find_symbol_in_file(symbol, file, &source, language)? {
342        return Ok(result);
343    }
344
345    // If not found and we have a project context, try cross-file resolution.
346    if let Some(project_root) = project {
347        let mut detector = DefinitionCycleDetector::new();
348        if let Some(result) =
349            resolve_cross_file(symbol, file, project_root, language, &mut detector, 0)?
350        {
351            return Ok(result);
352        }
353    }
354
355    Err(RemainingError::symbol_not_found(symbol, file))
356}
357
358/// Find definition by position (line, column)
359pub fn find_definition_by_position(
360    file: &Path,
361    line: u32,
362    column: u32,
363    project: Option<&Path>,
364    lang_hint: &str,
365) -> RemainingResult<DefinitionResult> {
366    // Validate file exists
367    if !file.exists() {
368        return Err(RemainingError::file_not_found(file));
369    }
370
371    // Detect language. Supports all 18 TLDR languages (VAL-015).
372    let language = detect_language(file, lang_hint)?;
373
374    // Read and parse file
375    let source = fs::read_to_string(file).map_err(RemainingError::Io)?;
376
377    // Find symbol at position
378    let symbol_name = find_symbol_at_position(&source, line, column, language, file)?;
379
380    // Now find definition of that symbol
381    find_definition_by_name(&symbol_name, file, project, lang_hint)
382}
383
384/// Find symbol name at a given position.
385///
386/// Parses with the given language via `ParserPool` (route TS/JS through the
387/// right grammar dialect using the file path), then walks up the AST from
388/// the deepest node at `(line, column)` looking for an identifier-like node.
389/// Identifier kinds vary across languages — we accept any kind whose name
390/// ends in `"identifier"` to cover language-specific variants
391/// (`identifier`, `property_identifier`, `field_identifier`,
392/// `type_identifier`, `shorthand_property_identifier`, etc.).
393fn find_symbol_at_position(
394    source: &str,
395    line: u32,
396    column: u32,
397    language: Language,
398    file: &Path,
399) -> RemainingResult<String> {
400    let tree = PARSER_POOL
401        .parse_with_path(source, language, Some(file))
402        .map_err(|e| RemainingError::parse_error(file.to_path_buf(), e.to_string()))?;
403
404    // Convert 1-indexed line to 0-indexed
405    let target_line = line.saturating_sub(1) as usize;
406    let target_col = column as usize;
407
408    // Find the node at the position
409    let root = tree.root_node();
410    let point = tree_sitter::Point::new(target_line, target_col);
411
412    let node = root
413        .descendant_for_point_range(point, point)
414        .ok_or_else(|| {
415            RemainingError::invalid_argument(format!(
416                "No symbol found at line {}, column {}",
417                line, column
418            ))
419        })?;
420
421    let text = node.utf8_text(source.as_bytes()).map_err(|_| {
422        RemainingError::parse_error(file.to_path_buf(), "Invalid UTF-8".to_string())
423    })?;
424
425    if is_identifier_kind(node.kind()) {
426        return Ok(text.to_string());
427    }
428
429    // Walk up looking for an identifier-like node (covers cases where the
430    // tree-sitter cursor lands on a wrapper node such as `call_expression`).
431    let mut current = node.parent();
432    while let Some(n) = current {
433        if is_identifier_kind(n.kind()) {
434            let text = n.utf8_text(source.as_bytes()).map_err(|_| {
435                RemainingError::parse_error(file.to_path_buf(), "Invalid UTF-8".to_string())
436            })?;
437            return Ok(text.to_string());
438        }
439        current = n.parent();
440    }
441
442    // Fall back to the original token text — better than nothing.
443    Ok(text.to_string())
444}
445
446/// Returns true for any tree-sitter node kind that represents an identifier
447/// in one of the supported languages.
448fn is_identifier_kind(kind: &str) -> bool {
449    // Most languages use "identifier"; OO languages add "property_identifier",
450    // "field_identifier", "type_identifier"; Ruby uses "constant" for class
451    // names; Elixir/Erlang use "atom" sometimes; Lua uses "name".
452    kind == "identifier"
453        || kind == "property_identifier"
454        || kind == "field_identifier"
455        || kind == "type_identifier"
456        || kind == "shorthand_property_identifier"
457        || kind == "constant"
458        || kind == "name"
459        || kind.ends_with("_identifier")
460}
461
462/// Find a symbol definition within a single file.
463///
464/// Dispatches based on language:
465/// - Python keeps its bespoke recursive walker so module-level
466///   `assignment` definitions (variables, constants) are still found —
467///   that detail is missing from the shared `extract_definitions` API.
468/// - Every other language uses
469///   `CallGraphLanguageSupport::extract_definitions`, which already knows
470///   the per-language tree-sitter kinds for functions, methods, and
471///   classes.
472fn find_symbol_in_file(
473    symbol: &str,
474    file: &Path,
475    source: &str,
476    language: Language,
477) -> RemainingResult<Option<DefinitionResult>> {
478    if language == Language::Python {
479        return find_symbol_in_file_python(symbol, file, source);
480    }
481    find_symbol_in_file_generic(symbol, file, source, language)
482}
483
484/// Python-specific in-file search (legacy path: handles module-level
485/// `assignment` definitions in addition to functions and classes).
486fn find_symbol_in_file_python(
487    symbol: &str,
488    file: &Path,
489    source: &str,
490) -> RemainingResult<Option<DefinitionResult>> {
491    let tree = PARSER_POOL
492        .parse_with_path(source, Language::Python, Some(file))
493        .map_err(|e| RemainingError::parse_error(file.to_path_buf(), e.to_string()))?;
494
495    let root = tree.root_node();
496
497    if let Some((kind, location)) = find_definition_recursive(root, source, symbol, file) {
498        return Ok(Some(DefinitionResult {
499            symbol: SymbolInfo {
500                name: symbol.to_string(),
501                kind,
502                location: Some(location.clone()),
503                type_annotation: None,
504                docstring: None,
505                is_builtin: false,
506                module: None,
507            },
508            definition: Some(location),
509            type_definition: None,
510        }));
511    }
512
513    Ok(None)
514}
515
516/// Generic in-file search backed by `CallGraphLanguageSupport::extract_definitions`.
517///
518/// The handler returns `(Vec<FuncDef>, Vec<ClassDef>)`. We match the
519/// requested symbol against both vectors and translate the result into
520/// the CLI's `DefinitionResult` shape.
521fn find_symbol_in_file_generic(
522    symbol: &str,
523    file: &Path,
524    source: &str,
525    language: Language,
526) -> RemainingResult<Option<DefinitionResult>> {
527    let tree = PARSER_POOL
528        .parse_with_path(source, language, Some(file))
529        .map_err(|e| RemainingError::parse_error(file.to_path_buf(), e.to_string()))?;
530
531    let registry = LanguageRegistry::with_defaults();
532    let handler = registry
533        .get(language.as_str())
534        .ok_or_else(|| RemainingError::unsupported_language(format!("{:?}", language)))?;
535
536    let (funcs, classes) = handler
537        .extract_definitions(source, file, &tree)
538        .map_err(|e| RemainingError::parse_error(file.to_path_buf(), e.to_string()))?;
539
540    if let Some((kind, location)) = match_definition(symbol, &funcs, &classes, file) {
541        return Ok(Some(DefinitionResult {
542            symbol: SymbolInfo {
543                name: symbol.to_string(),
544                kind,
545                location: Some(location.clone()),
546                type_annotation: None,
547                docstring: None,
548                is_builtin: false,
549                module: None,
550            },
551            definition: Some(location),
552            type_definition: None,
553        }));
554    }
555
556    Ok(None)
557}
558
559/// Match `symbol` against extracted FuncDefs / ClassDefs and produce a
560/// `(SymbolKind, Location)` pair for the first match. Functions inside a
561/// class become `Method`; standalone functions are `Function`; classes
562/// (including Rust struct/enum/trait, which the handlers report as
563/// classes) become `Class`.
564fn match_definition(
565    symbol: &str,
566    funcs: &[FuncDef],
567    classes: &[ClassDef],
568    file: &Path,
569) -> Option<(SymbolKind, Location)> {
570    for f in funcs {
571        if f.name == symbol {
572            let kind = if f.is_method {
573                SymbolKind::Method
574            } else {
575                SymbolKind::Function
576            };
577            let loc = Location::new(file.display().to_string(), f.line);
578            return Some((kind, loc));
579        }
580    }
581    for c in classes {
582        if c.name == symbol {
583            let loc = Location::new(file.display().to_string(), c.line);
584            return Some((SymbolKind::Class, loc));
585        }
586    }
587    None
588}
589
590/// Recursively search the AST for a definition
591fn find_definition_recursive(
592    node: Node,
593    source: &str,
594    target_name: &str,
595    file: &Path,
596) -> Option<(SymbolKind, Location)> {
597    match node.kind() {
598        "function_definition" => {
599            // Get the name child
600            if let Some(name_node) = node.child_by_field_name("name") {
601                if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
602                    if name == target_name {
603                        // Check if inside a class by looking at parents
604                        let in_class = is_inside_class(node);
605                        let kind = if in_class {
606                            SymbolKind::Method
607                        } else {
608                            SymbolKind::Function
609                        };
610                        let location = Location::with_column(
611                            file.display().to_string(),
612                            name_node.start_position().row as u32 + 1,
613                            name_node.start_position().column as u32,
614                        );
615                        return Some((kind, location));
616                    }
617                }
618            }
619        }
620        "class_definition" => {
621            // Get the name child
622            if let Some(name_node) = node.child_by_field_name("name") {
623                if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
624                    if name == target_name {
625                        let location = Location::with_column(
626                            file.display().to_string(),
627                            name_node.start_position().row as u32 + 1,
628                            name_node.start_position().column as u32,
629                        );
630                        return Some((SymbolKind::Class, location));
631                    }
632                }
633            }
634        }
635        "assignment" => {
636            // Check for variable assignments at module level
637            if let Some(left) = node.child_by_field_name("left") {
638                if left.kind() == "identifier" {
639                    if let Ok(name) = left.utf8_text(source.as_bytes()) {
640                        if name == target_name {
641                            let location = Location::with_column(
642                                file.display().to_string(),
643                                left.start_position().row as u32 + 1,
644                                left.start_position().column as u32,
645                            );
646                            return Some((SymbolKind::Variable, location));
647                        }
648                    }
649                }
650            }
651        }
652        _ => {}
653    }
654
655    // Search children
656    for i in 0..node.child_count() {
657        if let Some(child) = node.child(i) {
658            if let Some(result) = find_definition_recursive(child, source, target_name, file) {
659                return Some(result);
660            }
661        }
662    }
663
664    None
665}
666
667/// Check if a node is inside a class definition
668fn is_inside_class(node: Node) -> bool {
669    let mut current = node.parent();
670    while let Some(n) = current {
671        if n.kind() == "class_definition" {
672            return true;
673        }
674        current = n.parent();
675    }
676    false
677}
678
679/// Resolve `symbol` across files in the project.
680///
681/// Python uses an import-based resolver (parses `from X import Y` /
682/// `import X` and follows them); other languages do a project-wide walk
683/// of files matching the language's extensions and run
684/// [`find_symbol_in_file`] on each. The walk-based approach is correct
685/// for the canonical small fixture and large enough to be useful in
686/// practice. For projects whose import topology is essential (large TS
687/// monorepos, etc.), the daemon-backed `ModuleIndex` already handles
688/// resolution and can be plugged in here as a follow-up.
689fn resolve_cross_file(
690    symbol: &str,
691    current_file: &Path,
692    project_root: &Path,
693    language: Language,
694    detector: &mut DefinitionCycleDetector,
695    depth: usize,
696) -> RemainingResult<Option<DefinitionResult>> {
697    // Prevent infinite recursion
698    if depth >= MAX_IMPORT_DEPTH {
699        return Ok(None);
700    }
701
702    // Check for cycle
703    if detector.visit(current_file, symbol) {
704        return Ok(None);
705    }
706
707    if language == Language::Python {
708        return resolve_cross_file_python(symbol, current_file, project_root, detector, depth);
709    }
710
711    // Generic project walk for the other 17 languages.
712    resolve_cross_file_walk(symbol, current_file, project_root, language)
713}
714
715/// Python-specific cross-file resolution via parsed import statements
716/// (preserves the pre-VAL-015 behaviour for Python).
717fn resolve_cross_file_python(
718    symbol: &str,
719    current_file: &Path,
720    project_root: &Path,
721    detector: &mut DefinitionCycleDetector,
722    depth: usize,
723) -> RemainingResult<Option<DefinitionResult>> {
724    let source = fs::read_to_string(current_file).map_err(RemainingError::Io)?;
725    let imports = extract_imports(&source);
726
727    for (module_path, imported_names) in imports {
728        let is_imported = imported_names.is_empty() || imported_names.contains(&symbol.to_string());
729
730        if is_imported {
731            if let Some(resolved_path) =
732                resolve_module_path(&module_path, current_file, project_root)
733            {
734                if resolved_path.exists() {
735                    let module_source =
736                        fs::read_to_string(&resolved_path).map_err(RemainingError::Io)?;
737
738                    if let Some(result) = find_symbol_in_file(
739                        symbol,
740                        &resolved_path,
741                        &module_source,
742                        Language::Python,
743                    )? {
744                        return Ok(Some(result));
745                    }
746
747                    if let Some(result) = resolve_cross_file(
748                        symbol,
749                        &resolved_path,
750                        project_root,
751                        Language::Python,
752                        detector,
753                        depth + 1,
754                    )? {
755                        return Ok(Some(result));
756                    }
757                }
758            }
759        }
760    }
761
762    Ok(None)
763}
764
765/// Generic cross-file resolution: walk the project for files whose
766/// extension belongs to `language` and probe each for the symbol.
767///
768/// Skips the file we already searched (`current_file`) and common
769/// non-source directories (`.git`, `target`, `node_modules`, etc.) to
770/// avoid pathological scans on real projects.
771fn resolve_cross_file_walk(
772    symbol: &str,
773    current_file: &Path,
774    project_root: &Path,
775    language: Language,
776) -> RemainingResult<Option<DefinitionResult>> {
777    let extensions = language.extensions();
778    let current_canonical = fs::canonicalize(current_file).ok();
779
780    let walker = walkdir::WalkDir::new(project_root)
781        .follow_links(false)
782        .into_iter()
783        .filter_entry(|e| !is_skipped_dir(e.path()));
784
785    for entry in walker.flatten() {
786        let path = entry.path();
787        if !path.is_file() {
788            continue;
789        }
790        // Skip non-matching extensions.
791        let matches_ext = path
792            .extension()
793            .and_then(|e| e.to_str())
794            .map(|e| {
795                extensions
796                    .iter()
797                    .any(|ext| ext.trim_start_matches('.').eq_ignore_ascii_case(e))
798            })
799            .unwrap_or(false);
800        if !matches_ext {
801            continue;
802        }
803        // Skip the file we already searched.
804        if let Some(ref c) = current_canonical {
805            if let Ok(p) = fs::canonicalize(path) {
806                if &p == c {
807                    continue;
808                }
809            }
810        }
811
812        let Ok(source) = fs::read_to_string(path) else {
813            continue;
814        };
815        if let Some(result) = find_symbol_in_file(symbol, path, &source, language)? {
816            return Ok(Some(result));
817        }
818    }
819
820    Ok(None)
821}
822
823/// Skip well-known non-source directories during the project walk.
824///
825/// Returning `true` here prunes the directory and its descendants from
826/// the walk, which keeps the cross-file resolver from descending into
827/// `node_modules`, `target`, build outputs, and version control caches.
828fn is_skipped_dir(path: &Path) -> bool {
829    let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
830        return false;
831    };
832    matches!(
833        name,
834        ".git"
835            | ".hg"
836            | ".svn"
837            | "node_modules"
838            | "target"
839            | "dist"
840            | "build"
841            | ".venv"
842            | "venv"
843            | "__pycache__"
844            | ".tox"
845            | ".mypy_cache"
846            | ".pytest_cache"
847            | ".idea"
848            | ".vscode"
849    )
850}
851
852/// Extract import statements from source code
853fn extract_imports(source: &str) -> Vec<(String, Vec<String>)> {
854    let mut imports = Vec::new();
855
856    for line in source.lines() {
857        let line = line.trim();
858        if line.starts_with("from ") {
859            if let Some(import_idx) = line.find(" import ") {
860                let module = &line[5..import_idx];
861                let names_str = &line[import_idx + 8..];
862                let names: Vec<String> = names_str
863                    .split(',')
864                    .map(|s| {
865                        s.trim()
866                            .split(" as ")
867                            .next()
868                            .unwrap_or("")
869                            .trim()
870                            .to_string()
871                    })
872                    .filter(|s| !s.is_empty() && s != "*")
873                    .collect();
874                imports.push((module.trim().to_string(), names));
875            }
876        } else if let Some(module) = line.strip_prefix("import ") {
877            let module = module.split(" as ").next().unwrap_or(module).trim();
878            imports.push((module.to_string(), Vec::new()));
879        }
880    }
881
882    imports
883}
884
885/// Resolve a module path to a file path
886///
887/// Handles both absolute imports (`os.path`) and relative imports (`.utils`, `..pkg.mod`).
888/// For relative imports, leading dots indicate the number of parent directories to traverse
889/// from the current file's location (1 dot = same package, 2 dots = parent, etc.).
890fn resolve_module_path(module: &str, current_file: &Path, project_root: &Path) -> Option<PathBuf> {
891    let current_dir = current_file.parent()?;
892
893    // Count leading dots for relative imports
894    let dot_count = module.chars().take_while(|&c| c == '.').count();
895
896    if dot_count > 0 {
897        // Relative import: strip the leading dots and resolve relative to current package
898        let remainder = &module[dot_count..];
899
900        // Navigate up (dot_count - 1) directories from the current file's directory.
901        // 1 dot  = same directory as current file
902        // 2 dots = parent directory
903        // 3 dots = grandparent directory, etc.
904        let mut base = current_dir.to_path_buf();
905        for _ in 1..dot_count {
906            base = base.parent()?.to_path_buf();
907        }
908
909        if remainder.is_empty() {
910            // "from . import X" - resolve to __init__.py in current package
911            let pkg_candidate = base.join("__init__.py");
912            if pkg_candidate.exists() {
913                return Some(pkg_candidate);
914            }
915            return None;
916        }
917
918        // Convert remaining dotted path to filesystem path
919        let rel_path = remainder.replace('.', "/");
920
921        // Try as a module file
922        let candidate = base.join(&rel_path).with_extension("py");
923        if candidate.exists() {
924            return Some(candidate);
925        }
926
927        // Try as a package directory
928        let pkg_candidate = base.join(&rel_path).join("__init__.py");
929        if pkg_candidate.exists() {
930            return Some(pkg_candidate);
931        }
932
933        return None;
934    }
935
936    // Absolute import: try relative to current directory first, then project root
937    let rel_path = module.replace('.', "/");
938
939    // Try relative to current file's directory
940    let candidate = current_dir.join(&rel_path).with_extension("py");
941    if candidate.exists() {
942        return Some(candidate);
943    }
944
945    // Try as package
946    let pkg_candidate = current_dir.join(&rel_path).join("__init__.py");
947    if pkg_candidate.exists() {
948        return Some(pkg_candidate);
949    }
950
951    // Try relative to project root
952    let candidate = project_root.join(&rel_path).with_extension("py");
953    if candidate.exists() {
954        return Some(candidate);
955    }
956
957    let pkg_candidate = project_root.join(&rel_path).join("__init__.py");
958    if pkg_candidate.exists() {
959        return Some(pkg_candidate);
960    }
961
962    None
963}
964
965// =============================================================================
966// Helper Functions
967// =============================================================================
968
969/// Check if a symbol is a language builtin
970pub fn is_builtin(name: &str, language: &Language) -> bool {
971    match language {
972        Language::Python => PYTHON_BUILTINS.contains(&name),
973        _ => false,
974    }
975}
976
977/// Detect language from a file extension or an explicit hint.
978///
979/// Supports all 18 TLDR languages (VAL-015). The hint is the lower-case
980/// language name (`"python"`, `"typescript"`, ..., `"ocaml"`); a hint of
981/// `"auto"` falls through to extension-based detection via
982/// [`Language::from_path`].
983fn detect_language(file: &Path, hint: &str) -> RemainingResult<Language> {
984    if hint != "auto" {
985        let normalized = hint.to_lowercase();
986        // Common short aliases.
987        let alias = match normalized.as_str() {
988            "py" => Some(Language::Python),
989            "ts" => Some(Language::TypeScript),
990            "tsx" => Some(Language::TypeScript),
991            "js" => Some(Language::JavaScript),
992            "jsx" => Some(Language::JavaScript),
993            "rs" => Some(Language::Rust),
994            "golang" => Some(Language::Go),
995            "c++" => Some(Language::Cpp),
996            "c#" => Some(Language::CSharp),
997            "cs" => Some(Language::CSharp),
998            "kt" => Some(Language::Kotlin),
999            "rb" => Some(Language::Ruby),
1000            "ex" | "exs" => Some(Language::Elixir),
1001            "ml" | "mli" => Some(Language::Ocaml),
1002            _ => None,
1003        };
1004        if let Some(lang) = alias {
1005            return Ok(lang);
1006        }
1007        // Match against the canonical lowercase name (matches Language::as_str).
1008        for lang in Language::all() {
1009            if lang.as_str() == normalized {
1010                return Ok(*lang);
1011            }
1012        }
1013        return Err(RemainingError::unsupported_language(hint));
1014    }
1015
1016    Language::from_path(file).ok_or_else(|| {
1017        let ext = file.extension().and_then(|e| e.to_str()).unwrap_or("");
1018        RemainingError::unsupported_language(ext)
1019    })
1020}
1021
1022/// Format definition result as text
1023fn format_definition_text(result: &DefinitionResult) -> String {
1024    let mut output = String::new();
1025
1026    output.push_str("=== Definition Result ===\n\n");
1027    output.push_str(&format!("Symbol: {}\n", result.symbol.name));
1028    output.push_str(&format!("Kind: {:?}\n", result.symbol.kind));
1029
1030    if result.symbol.is_builtin {
1031        output.push_str("Type: Built-in\n");
1032        if let Some(ref module) = result.symbol.module {
1033            output.push_str(&format!("Module: {}\n", module));
1034        }
1035    } else if let Some(ref location) = result.definition {
1036        output.push_str("\nDefinition Location:\n");
1037        output.push_str(&format!("  File: {}\n", location.file));
1038        output.push_str(&format!("  Line: {}\n", location.line));
1039        if location.column > 0 {
1040            output.push_str(&format!("  Column: {}\n", location.column));
1041        }
1042    } else {
1043        output.push_str("\nDefinition: Not found\n");
1044    }
1045
1046    if let Some(ref type_def) = result.type_definition {
1047        output.push_str("\nType Definition:\n");
1048        output.push_str(&format!("  File: {}\n", type_def.file));
1049        output.push_str(&format!("  Line: {}\n", type_def.line));
1050    }
1051
1052    if let Some(ref docstring) = result.symbol.docstring {
1053        output.push_str(&format!("\nDocstring:\n  {}\n", docstring));
1054    }
1055
1056    output
1057}
1058
1059// =============================================================================
1060// Tests
1061// =============================================================================
1062
1063#[cfg(test)]
1064mod tests {
1065    use super::*;
1066
1067    #[test]
1068    fn test_is_builtin_python() {
1069        assert!(is_builtin("len", &Language::Python));
1070        assert!(is_builtin("print", &Language::Python));
1071        assert!(is_builtin("range", &Language::Python));
1072        assert!(!is_builtin("my_func", &Language::Python));
1073    }
1074
1075    #[test]
1076    fn test_cycle_detector() {
1077        let mut detector = DefinitionCycleDetector::new();
1078
1079        // First visit should return false (not a cycle)
1080        assert!(!detector.visit(Path::new("file.py"), "symbol"));
1081
1082        // Second visit to same location should return true (cycle)
1083        assert!(detector.visit(Path::new("file.py"), "symbol"));
1084
1085        // Different location should return false
1086        assert!(!detector.visit(Path::new("other.py"), "symbol"));
1087    }
1088
1089    #[test]
1090    fn test_detect_language() {
1091        assert_eq!(
1092            detect_language(Path::new("test.py"), "auto").unwrap(),
1093            Language::Python
1094        );
1095    }
1096
1097    #[test]
1098    fn test_detect_language_with_hint() {
1099        assert_eq!(
1100            detect_language(Path::new("test.txt"), "python").unwrap(),
1101            Language::Python
1102        );
1103    }
1104
1105    #[test]
1106    fn test_extract_imports() {
1107        let source = r#"
1108from os import path, getcwd
1109from sys import argv
1110import json
1111import re as regex
1112"#;
1113        let imports = extract_imports(source);
1114
1115        assert_eq!(imports.len(), 4);
1116        assert_eq!(imports[0].0, "os");
1117        assert!(imports[0].1.contains(&"path".to_string()));
1118        assert!(imports[0].1.contains(&"getcwd".to_string()));
1119        assert_eq!(imports[1].0, "sys");
1120        assert!(imports[1].1.contains(&"argv".to_string()));
1121        assert_eq!(imports[2].0, "json");
1122        assert_eq!(imports[3].0, "re");
1123    }
1124
1125    #[test]
1126    fn test_extract_imports_relative() {
1127        let source = r#"
1128from .utils import echo, make_str
1129from .exceptions import Abort
1130from ._utils import FLAG_NEEDS_VALUE
1131from . import types
1132"#;
1133        let imports = extract_imports(source);
1134
1135        assert_eq!(imports.len(), 4);
1136        // Relative imports should preserve the dot prefix
1137        assert_eq!(imports[0].0, ".utils");
1138        assert!(imports[0].1.contains(&"echo".to_string()));
1139        assert!(imports[0].1.contains(&"make_str".to_string()));
1140        assert_eq!(imports[1].0, ".exceptions");
1141        assert!(imports[1].1.contains(&"Abort".to_string()));
1142        assert_eq!(imports[2].0, "._utils");
1143        assert!(imports[2].1.contains(&"FLAG_NEEDS_VALUE".to_string()));
1144        assert_eq!(imports[3].0, ".");
1145        assert!(imports[3].1.contains(&"types".to_string()));
1146    }
1147
1148    #[test]
1149    fn test_resolve_module_path_relative_import() {
1150        // Create a temp directory structure simulating a Python package
1151        let dir = tempfile::tempdir().unwrap();
1152        let pkg = dir.path().join("mypkg");
1153        fs::create_dir_all(&pkg).unwrap();
1154
1155        // Create files
1156        fs::write(pkg.join("__init__.py"), "").unwrap();
1157        fs::write(pkg.join("core.py"), "from .utils import helper\n").unwrap();
1158        fs::write(pkg.join("utils.py"), "def helper(): pass\n").unwrap();
1159
1160        let current_file = pkg.join("core.py");
1161        let project_root = dir.path();
1162
1163        // Relative import ".utils" from core.py should resolve to utils.py in the same directory
1164        let resolved = resolve_module_path(".utils", &current_file, project_root);
1165        assert!(
1166            resolved.is_some(),
1167            "resolve_module_path should find .utils relative to core.py"
1168        );
1169        assert_eq!(
1170            resolved.unwrap(),
1171            pkg.join("utils.py"),
1172            "Should resolve to sibling utils.py"
1173        );
1174    }
1175
1176    #[test]
1177    fn test_resolve_module_path_relative_import_subpackage() {
1178        let dir = tempfile::tempdir().unwrap();
1179        let pkg = dir.path().join("mypkg");
1180        let sub = pkg.join("sub");
1181        fs::create_dir_all(&sub).unwrap();
1182
1183        fs::write(pkg.join("__init__.py"), "").unwrap();
1184        fs::write(sub.join("__init__.py"), "").unwrap();
1185        fs::write(pkg.join("core.py"), "").unwrap();
1186        fs::write(sub.join("helpers.py"), "def helper(): pass\n").unwrap();
1187
1188        let current_file = pkg.join("core.py");
1189        let project_root = dir.path();
1190
1191        // ".sub.helpers" from core.py should resolve to sub/helpers.py
1192        let resolved = resolve_module_path(".sub.helpers", &current_file, project_root);
1193        assert!(
1194            resolved.is_some(),
1195            "resolve_module_path should find .sub.helpers relative to core.py"
1196        );
1197        assert_eq!(
1198            resolved.unwrap(),
1199            sub.join("helpers.py"),
1200            "Should resolve to sub/helpers.py"
1201        );
1202    }
1203
1204    #[test]
1205    fn test_cross_file_definition_via_relative_import() {
1206        let dir = tempfile::tempdir().unwrap();
1207        let pkg = dir.path().join("mypkg");
1208        fs::create_dir_all(&pkg).unwrap();
1209
1210        fs::write(pkg.join("__init__.py"), "").unwrap();
1211        fs::write(
1212            pkg.join("core.py"),
1213            "from .utils import echo\n\ndef main():\n    echo('hello')\n",
1214        )
1215        .unwrap();
1216        fs::write(pkg.join("utils.py"), "def echo(msg):\n    print(msg)\n").unwrap();
1217
1218        // Look for 'echo' starting from core.py with project context
1219        let result =
1220            find_definition_by_name("echo", &pkg.join("core.py"), Some(dir.path()), "python");
1221
1222        assert!(
1223            result.is_ok(),
1224            "Should find echo via cross-file resolution: {:?}",
1225            result.err()
1226        );
1227        let result = result.unwrap();
1228        assert_eq!(result.symbol.name, "echo");
1229        assert_eq!(result.symbol.kind, SymbolKind::Function);
1230        assert!(
1231            result.definition.is_some(),
1232            "Should have a definition location"
1233        );
1234        let def_loc = result.definition.unwrap();
1235        assert!(
1236            def_loc.file.contains("utils.py"),
1237            "Definition should be in utils.py, got: {}",
1238            def_loc.file
1239        );
1240        assert_eq!(def_loc.line, 1, "echo is defined on line 1 of utils.py");
1241    }
1242
1243    // -------------------------------------------------------------------------
1244    // VAL-015: multi-language go-to-definition
1245    //
1246    // Until VAL-015, find_definition_by_name and find_definition_by_position
1247    // returned UnsupportedLanguage for any non-Python file. These tests
1248    // verify the generalisation: the dispatch reuses each language handler's
1249    // CallGraphLanguageSupport::extract_definitions API to locate the
1250    // definition site of a top-level function in a single file.
1251    //
1252    // Coverage: Python (regression), TypeScript (brace-language family),
1253    // Rust (strict-types), Go (semicolon-free), Java (OOP).
1254    // -------------------------------------------------------------------------
1255
1256    #[test]
1257    fn test_find_definition_typescript_function() {
1258        let dir = tempfile::tempdir().unwrap();
1259        let file = dir.path().join("main.ts");
1260        fs::write(
1261            &file,
1262            "export function target_fn(): number { return 42; }\n\
1263             export function caller(): void { target_fn(); }\n",
1264        )
1265        .unwrap();
1266
1267        let result = find_definition_by_name("target_fn", &file, None, "typescript")
1268            .expect("definition lookup should succeed for TypeScript");
1269        assert_eq!(result.symbol.name, "target_fn");
1270        assert_eq!(result.symbol.kind, SymbolKind::Function);
1271        let loc = result.definition.expect("definition location must be Some");
1272        assert_eq!(loc.line, 1, "target_fn is on line 1, got {}", loc.line);
1273    }
1274
1275    #[test]
1276    fn test_find_definition_rust_function() {
1277        let dir = tempfile::tempdir().unwrap();
1278        let file = dir.path().join("lib.rs");
1279        fs::write(
1280            &file,
1281            "fn helper() -> i32 { 1 }\n\nfn target_fn() -> i32 { helper() }\n",
1282        )
1283        .unwrap();
1284
1285        let result = find_definition_by_name("target_fn", &file, None, "rust")
1286            .expect("definition lookup should succeed for Rust");
1287        assert_eq!(result.symbol.name, "target_fn");
1288        assert_eq!(result.symbol.kind, SymbolKind::Function);
1289        let loc = result.definition.expect("definition location must be Some");
1290        assert_eq!(loc.line, 3, "target_fn is on line 3, got {}", loc.line);
1291    }
1292
1293    #[test]
1294    fn test_find_definition_go_function() {
1295        let dir = tempfile::tempdir().unwrap();
1296        let file = dir.path().join("main.go");
1297        fs::write(
1298            &file,
1299            "package main\n\nfunc target_fn() int { return 1 }\n\nfunc main() { target_fn() }\n",
1300        )
1301        .unwrap();
1302
1303        let result = find_definition_by_name("target_fn", &file, None, "go")
1304            .expect("definition lookup should succeed for Go");
1305        assert_eq!(result.symbol.name, "target_fn");
1306        assert_eq!(result.symbol.kind, SymbolKind::Function);
1307        let loc = result.definition.expect("definition location must be Some");
1308        assert_eq!(loc.line, 3, "target_fn is on line 3, got {}", loc.line);
1309    }
1310
1311    #[test]
1312    fn test_find_definition_java_method() {
1313        let dir = tempfile::tempdir().unwrap();
1314        let file = dir.path().join("Main.java");
1315        // Java requires methods inside a class; the matrix fixture follows
1316        // the same pattern.
1317        fs::write(
1318            &file,
1319            "class Main {\n    public static int target_fn() { return 1; }\n    public static void main(String[] args) { target_fn(); }\n}\n",
1320        )
1321        .unwrap();
1322
1323        let result = find_definition_by_name("target_fn", &file, None, "java")
1324            .expect("definition lookup should succeed for Java");
1325        assert_eq!(result.symbol.name, "target_fn");
1326        // Methods inside a class must report Method, not Function.
1327        assert_eq!(
1328            result.symbol.kind,
1329            SymbolKind::Method,
1330            "Java method inside class should be Method, got {:?}",
1331            result.symbol.kind
1332        );
1333        let loc = result.definition.expect("definition location must be Some");
1334        assert_eq!(loc.line, 2, "target_fn is on line 2, got {}", loc.line);
1335    }
1336
1337    #[test]
1338    fn test_find_definition_class_typescript() {
1339        // Classes must surface as SymbolKind::Class regardless of language.
1340        let dir = tempfile::tempdir().unwrap();
1341        let file = dir.path().join("widget.ts");
1342        fs::write(&file, "export class Widget {\n    render(): void {}\n}\n").unwrap();
1343
1344        let result = find_definition_by_name("Widget", &file, None, "typescript")
1345            .expect("definition lookup should succeed for TS class");
1346        assert_eq!(result.symbol.name, "Widget");
1347        assert_eq!(
1348            result.symbol.kind,
1349            SymbolKind::Class,
1350            "Widget should be Class kind, got {:?}",
1351            result.symbol.kind
1352        );
1353        let loc = result.definition.expect("definition location must be Some");
1354        assert_eq!(loc.line, 1);
1355    }
1356
1357    #[test]
1358    fn test_find_definition_position_rust() {
1359        // Position-based lookup: jump from a call site to the definition.
1360        let dir = tempfile::tempdir().unwrap();
1361        let file = dir.path().join("lib.rs");
1362        let source = "fn target_fn() -> i32 { 1 }\n\nfn caller() -> i32 { target_fn() }\n";
1363        fs::write(&file, source).unwrap();
1364
1365        // Position of the `target_fn` reference inside caller.
1366        // Line 3, column 22 (0-indexed) — points at `target_fn` in the call.
1367        // "fn caller() -> i32 { target_fn() }"
1368        //  0123456789012345678901
1369        //                       ^ col 21 = 't'
1370        let result = find_definition_by_position(&file, 3, 22, None, "rust")
1371            .expect("position-based lookup should succeed for Rust");
1372        assert_eq!(result.symbol.name, "target_fn");
1373        let loc = result.definition.expect("definition location must be Some");
1374        assert_eq!(loc.line, 1, "definition is on line 1");
1375    }
1376
1377    #[test]
1378    fn test_detect_language_all_18() {
1379        // All 18 languages must be detectable from extension or hint.
1380        // This catches missing entries in detect_language as we add support.
1381        let cases: &[(&str, &str, Language)] = &[
1382            ("a.py", "auto", Language::Python),
1383            ("a.ts", "auto", Language::TypeScript),
1384            ("a.tsx", "auto", Language::TypeScript),
1385            ("a.js", "auto", Language::JavaScript),
1386            ("a.jsx", "auto", Language::JavaScript),
1387            ("a.rs", "auto", Language::Rust),
1388            ("a.go", "auto", Language::Go),
1389            ("a.java", "auto", Language::Java),
1390            ("a.c", "auto", Language::C),
1391            ("a.h", "auto", Language::C),
1392            ("a.cpp", "auto", Language::Cpp),
1393            ("a.cc", "auto", Language::Cpp),
1394            ("a.hpp", "auto", Language::Cpp),
1395            ("a.rb", "auto", Language::Ruby),
1396            ("a.kt", "auto", Language::Kotlin),
1397            ("a.swift", "auto", Language::Swift),
1398            ("a.cs", "auto", Language::CSharp),
1399            ("a.scala", "auto", Language::Scala),
1400            ("a.php", "auto", Language::Php),
1401            ("a.lua", "auto", Language::Lua),
1402            ("a.luau", "auto", Language::Luau),
1403            ("a.ex", "auto", Language::Elixir),
1404            ("a.exs", "auto", Language::Elixir),
1405            ("a.ml", "auto", Language::Ocaml),
1406        ];
1407        for (path, hint, expected) in cases {
1408            let got = detect_language(Path::new(path), hint)
1409                .unwrap_or_else(|e| panic!("detect_language failed for {}: {:?}", path, e));
1410            assert_eq!(got, *expected, "wrong language for {}", path);
1411        }
1412    }
1413}