Skip to main content

aptu_coder_core/
analyze.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Main analysis engine for extracting code structure from files and directories.
4//!
5//! Implements the four MCP tools: `analyze_directory` (Overview), `analyze_file` (`FileDetails`),
6//! `analyze_symbol` (call graph), and `analyze_module` (lightweight index). Handles parallel processing and cancellation.
7
8use crate::formatter::{
9    format_file_details, format_focused_internal, format_focused_summary_internal, format_structure,
10};
11use crate::graph::{CallGraph, InternalCallChain};
12use crate::lang::{language_for_extension, supported_languages};
13use crate::parser::{ElementExtractor, SemanticExtractor};
14use crate::test_detection::is_test_file;
15use crate::traversal::{WalkEntry, walk_directory};
16use crate::types::{
17    AnalysisMode, FileInfo, ImplTraitInfo, ImportInfo, SemanticAnalysis, SymbolMatchMode,
18};
19use rayon::prelude::*;
20#[cfg(feature = "schemars")]
21use schemars::JsonSchema;
22use serde::{Deserialize, Serialize};
23use std::path::{Path, PathBuf};
24use std::sync::Arc;
25use std::sync::atomic::{AtomicUsize, Ordering};
26use std::time::Instant;
27use thiserror::Error;
28use tokio_util::sync::CancellationToken;
29use tracing::instrument;
30
31pub const MAX_FILE_SIZE_BYTES: u64 = 10_000_000;
32
33#[derive(Debug, Error)]
34#[non_exhaustive]
35pub enum AnalyzeError {
36    #[error("Traversal error: {0}")]
37    Traversal(#[from] crate::traversal::TraversalError),
38    #[error("Parser error: {0}")]
39    Parser(#[from] crate::parser::ParserError),
40    #[error("Graph error: {0}")]
41    Graph(#[from] crate::graph::GraphError),
42    #[error("Formatter error: {0}")]
43    Formatter(#[from] crate::formatter::FormatterError),
44    #[error("Analysis cancelled")]
45    Cancelled,
46    #[error("unsupported language: {0}")]
47    UnsupportedLanguage(String),
48    #[error("I/O error: {0}")]
49    Io(#[from] std::io::Error),
50    #[error("invalid range: start ({start}) > end ({end}); file has {total} lines")]
51    InvalidRange {
52        start: usize,
53        end: usize,
54        total: usize,
55    },
56    #[error("path is a directory, not a file: {0}")]
57    NotAFile(PathBuf),
58    #[error(
59        "file has {total_lines} lines; provide start_line and end_line, or call analyze_module first to locate the range"
60    )]
61    RangelessLargeFile { total_lines: usize },
62    #[error("parse timeout exceeded for {path}: {micros} microseconds")]
63    ParseTimeout { path: PathBuf, micros: u64 },
64}
65
66/// Result of directory analysis containing both formatted output and file data.
67#[derive(Debug, Clone, Serialize, Deserialize)]
68#[cfg_attr(feature = "schemars", derive(JsonSchema))]
69#[non_exhaustive]
70pub struct AnalysisOutput {
71    #[cfg_attr(
72        feature = "schemars",
73        schemars(description = "Formatted text representation of the analysis")
74    )]
75    pub formatted: String,
76    #[cfg_attr(
77        feature = "schemars",
78        schemars(description = "List of files analyzed in the directory")
79    )]
80    pub files: Vec<FileInfo>,
81    /// Walk entries used internally for summary generation; not serialized.
82    #[serde(skip)]
83    #[serde(default)]
84    #[cfg_attr(feature = "schemars", schemars(skip))]
85    pub entries: Vec<WalkEntry>,
86    /// Subtree file counts computed from an unbounded walk; used by `format_summary`; not serialized.
87    #[serde(skip)]
88    #[serde(default)]
89    #[cfg_attr(feature = "schemars", schemars(skip))]
90    pub subtree_counts: Option<Vec<(std::path::PathBuf, usize)>>,
91    #[serde(skip_serializing_if = "Option::is_none")]
92    #[cfg_attr(
93        feature = "schemars",
94        schemars(
95            description = "Opaque cursor token for the next page of results (absent when no more results)"
96        )
97    )]
98    pub next_cursor: Option<String>,
99}
100
101/// Result of file-level semantic analysis.
102#[derive(Debug, Clone, Serialize, Deserialize)]
103#[cfg_attr(feature = "schemars", derive(JsonSchema))]
104#[non_exhaustive]
105pub struct FileAnalysisOutput {
106    #[cfg_attr(
107        feature = "schemars",
108        schemars(description = "Formatted text representation of the analysis")
109    )]
110    pub formatted: String,
111    #[cfg_attr(
112        feature = "schemars",
113        schemars(description = "Semantic analysis data including functions, classes, and imports")
114    )]
115    pub semantic: SemanticAnalysis,
116    #[cfg_attr(
117        feature = "schemars",
118        schemars(description = "Total line count of the analyzed file")
119    )]
120    #[cfg_attr(
121        feature = "schemars",
122        schemars(schema_with = "crate::schema_helpers::integer_schema")
123    )]
124    pub line_count: usize,
125    #[serde(skip_serializing_if = "Option::is_none")]
126    #[cfg_attr(
127        feature = "schemars",
128        schemars(
129            description = "Opaque cursor token for the next page of results (absent when no more results)"
130        )
131    )]
132    pub next_cursor: Option<String>,
133}
134
135impl FileAnalysisOutput {
136    /// Create a new `FileAnalysisOutput`.
137    #[must_use]
138    pub fn new(
139        formatted: String,
140        semantic: SemanticAnalysis,
141        line_count: usize,
142        next_cursor: Option<String>,
143    ) -> Self {
144        Self {
145            formatted,
146            semantic,
147            line_count,
148            next_cursor,
149        }
150    }
151}
152/// Check if a file is eligible for analysis based on size and language support.
153fn check_file_eligibility(entry: &WalkEntry) -> bool {
154    // Check file size before reading
155    if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
156        tracing::debug!("skipping large file: {}", entry.path.display());
157        return false;
158    }
159
160    // Try to read file content; skip binary or unreadable files
161    std::fs::read_to_string(&entry.path).is_ok()
162}
163
164/// Process a single file entry and extract its analysis data.
165fn process_file_entry(entry: &WalkEntry, source: &str) -> FileInfo {
166    let path_str = entry.path.display().to_string();
167    let line_count = source.lines().count();
168
169    // Detect language from extension
170    let ext = entry.path.extension().and_then(|e| e.to_str());
171
172    // Detect language and extract counts
173    let (language, function_count, class_count) = if let Some(ext_str) = ext {
174        if let Some(lang) = language_for_extension(ext_str) {
175            let lang_str = lang.to_string();
176            match ElementExtractor::extract_with_depth(source, &lang_str) {
177                Ok((func_count, class_count)) => (lang_str, func_count, class_count),
178                Err(_) => (lang_str, 0, 0),
179            }
180        } else {
181            ("unknown".to_string(), 0, 0)
182        }
183    } else {
184        ("unknown".to_string(), 0, 0)
185    };
186
187    let is_test = is_test_file(&entry.path);
188
189    FileInfo {
190        path: path_str,
191        line_count,
192        function_count,
193        class_count,
194        language,
195        is_test,
196    }
197}
198
199/// Analyze a single file entry in parallel context.
200fn analyze_single_file(
201    entry: &WalkEntry,
202    progress: &Arc<AtomicUsize>,
203    ct: &CancellationToken,
204) -> Option<FileInfo> {
205    // Check cancellation per file
206    if ct.is_cancelled() {
207        return None;
208    }
209
210    // Check file eligibility
211    if !check_file_eligibility(entry) {
212        progress.fetch_add(1, Ordering::Relaxed);
213        return None;
214    }
215
216    // Read file content (already checked in check_file_eligibility)
217    let Ok(source) = std::fs::read_to_string(&entry.path) else {
218        progress.fetch_add(1, Ordering::Relaxed);
219        return None;
220    };
221
222    let file_info = process_file_entry(entry, &source);
223    progress.fetch_add(1, Ordering::Relaxed);
224
225    Some(file_info)
226}
227
228/// Initialize analysis context and collect file entries.
229fn init_analysis_context(entries: &[WalkEntry]) -> Vec<&WalkEntry> {
230    entries
231        .iter()
232        .filter(|e| !e.is_dir && !e.is_symlink)
233        .collect()
234}
235
236/// Build the final analysis output from results.
237fn build_analysis_output(
238    entries: Vec<WalkEntry>,
239    analysis_results: Vec<FileInfo>,
240) -> AnalysisOutput {
241    let formatted = format_structure(&entries, &analysis_results, None);
242    AnalysisOutput {
243        formatted,
244        files: analysis_results,
245        entries,
246        next_cursor: None,
247        subtree_counts: None,
248    }
249}
250
251/// Run parallel analysis on file entries and log completion.
252fn run_parallel_analysis(
253    file_entries: &[&WalkEntry],
254    progress: &Arc<AtomicUsize>,
255    ct: &CancellationToken,
256) -> Result<Vec<FileInfo>, AnalyzeError> {
257    let start = Instant::now();
258    tracing::debug!(file_count = file_entries.len(), "analysis start");
259
260    let _parse_span = tracing::info_span!("ast.parse_batch", count = file_entries.len()).entered();
261
262    // Parallel analysis of files
263    let analysis_results: Vec<FileInfo> = file_entries
264        .par_iter()
265        .filter_map(|entry| analyze_single_file(entry, progress, ct))
266        .collect();
267
268    // Check if cancelled after parallel processing
269    if ct.is_cancelled() {
270        return Err(AnalyzeError::Cancelled);
271    }
272
273    tracing::debug!(
274        file_count = file_entries.len(),
275        duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
276        "analysis complete"
277    );
278
279    Ok(analysis_results)
280}
281
282#[instrument(skip_all, fields(path = %root.display()))]
283// public API; callers expect owned semantics
284#[allow(clippy::needless_pass_by_value)]
285pub fn analyze_directory_with_progress(
286    root: &Path,
287    entries: Vec<WalkEntry>,
288    progress: Arc<AtomicUsize>,
289    ct: CancellationToken,
290) -> Result<AnalysisOutput, AnalyzeError> {
291    // Check if already cancelled
292    if ct.is_cancelled() {
293        return Err(AnalyzeError::Cancelled);
294    }
295
296    tracing::debug!(root = %root.display(), "analysis start");
297
298    let file_entries = init_analysis_context(&entries);
299    let analysis_results = run_parallel_analysis(&file_entries, &progress, &ct)?;
300
301    let _format_span = tracing::info_span!("output.format").entered();
302
303    // Build and return output
304    Ok(build_analysis_output(entries, analysis_results))
305}
306
307/// Analyze a directory structure and return formatted output and file data.
308#[instrument(skip_all, fields(path = %root.display()))]
309pub fn analyze_directory(
310    root: &Path,
311    max_depth: Option<u32>,
312) -> Result<AnalysisOutput, AnalyzeError> {
313    let entries = walk_directory(root, max_depth)?;
314    let counter = Arc::new(AtomicUsize::new(0));
315    let ct = CancellationToken::new();
316    analyze_directory_with_progress(root, entries, counter, ct)
317}
318
319/// Determine analysis mode based on parameters and path.
320#[must_use]
321pub fn determine_mode(path: &str, focus: Option<&str>) -> AnalysisMode {
322    if focus.is_some() {
323        return AnalysisMode::SymbolFocus;
324    }
325
326    let path_obj = Path::new(path);
327    if path_obj.is_dir() {
328        AnalysisMode::Overview
329    } else {
330        AnalysisMode::FileDetails
331    }
332}
333
334/// Analyze a single file and return semantic analysis with formatted output.
335#[instrument(skip_all, fields(path))]
336pub fn analyze_file(
337    path: &str,
338    ast_recursion_limit: Option<usize>,
339) -> Result<FileAnalysisOutput, AnalyzeError> {
340    let start = Instant::now();
341
342    // Check file size before reading
343    if Path::new(path).metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
344        tracing::debug!("skipping large file: {}", path);
345        return Err(AnalyzeError::Parser(
346            crate::parser::ParserError::ParseError("file too large".to_string()),
347        ));
348    }
349
350    let source = std::fs::read_to_string(path)
351        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
352
353    let line_count = source.lines().count();
354
355    // Detect language from extension
356    let ext = Path::new(path)
357        .extension()
358        .and_then(|e| e.to_str())
359        .and_then(language_for_extension)
360        .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string);
361
362    // Extract semantic information
363    let mut semantic = SemanticExtractor::extract(&source, &ext, ast_recursion_limit, None)?;
364
365    // Populate the file path on references now that the path is known
366    for r in &mut semantic.references {
367        r.location = path.to_string();
368    }
369
370    // Resolve Python wildcard imports
371    if ext == "python" {
372        resolve_wildcard_imports(Path::new(path), &mut semantic.imports);
373    }
374
375    // Detect if this is a test file
376    let is_test = is_test_file(Path::new(path));
377
378    // Extract parent directory for relative path display
379    let parent_dir = Path::new(path).parent();
380
381    // Format output
382    let formatted = format_file_details(path, &semantic, line_count, is_test, parent_dir);
383
384    tracing::debug!(path = %path, language = %ext, functions = semantic.functions.len(), classes = semantic.classes.len(), imports = semantic.imports.len(), duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX), "file analysis complete");
385
386    Ok(FileAnalysisOutput::new(
387        formatted, semantic, line_count, None,
388    ))
389}
390
391/// Analyze source code from a string buffer without filesystem access.
392///
393/// This function analyzes in-memory source code by language identifier. The `language`
394/// parameter can be either a language name (e.g., `"rust"`, `"python"`, `"go"`) or a file
395/// extension (e.g., `"rs"`, `"py"`).
396///
397/// Accepted language identifiers depend on compiled features. Use [`supported_languages()`] to
398/// discover the available language names at runtime, and [`language_for_extension()`] to resolve
399/// a file extension to its supported language identifier.
400///
401/// # Arguments
402///
403/// * `source` - The source code to analyze
404/// * `language` - The language identifier (language name or extension)
405/// * `ast_recursion_limit` - Optional limit for AST traversal depth
406///
407/// # Returns
408///
409/// - `Ok(FileAnalysisOutput)` on success
410/// - `Err(AnalyzeError::UnsupportedLanguage)` if the language is not recognized
411/// - `Err(AnalyzeError::Parser)` if parsing fails
412///
413/// # Notes
414///
415/// - Python wildcard import resolution is skipped for in-memory analysis (no filesystem path available)
416/// - The formatted output uses the standard file-details formatter, so it includes a `FILE:` header with an empty path
417#[inline]
418pub fn analyze_str(
419    source: &str,
420    language: &str,
421    ast_recursion_limit: Option<usize>,
422) -> Result<FileAnalysisOutput, AnalyzeError> {
423    // Resolve language: first try as a file extension, then as a language name
424    // (case-insensitive match against supported_languages()).
425    let lang = language_for_extension(language).or_else(|| {
426        let lower = language.to_ascii_lowercase();
427        supported_languages()
428            .iter()
429            .find(|&&name| name == lower)
430            .copied()
431    });
432    let lang = lang.ok_or_else(|| AnalyzeError::UnsupportedLanguage(language.to_string()))?;
433
434    // Extract semantic information
435    let mut semantic = SemanticExtractor::extract(source, lang, ast_recursion_limit, None)?;
436
437    // Populate a stable in-memory sentinel on all reference locations
438    for r in &mut semantic.references {
439        r.location = "<memory>".to_string();
440    }
441
442    // Count lines in the source
443    let line_count = source.lines().count();
444
445    // Format output with empty path (no filesystem access)
446    let formatted = format_file_details("", &semantic, line_count, false, None);
447
448    Ok(FileAnalysisOutput::new(
449        formatted, semantic, line_count, None,
450    ))
451}
452
453/// Single entry in a call chain (depth-1 direct caller or callee).
454#[non_exhaustive]
455#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
456#[cfg_attr(feature = "schemars", derive(JsonSchema))]
457pub struct CallChainEntry {
458    #[cfg_attr(
459        feature = "schemars",
460        schemars(description = "Symbol name of the caller or callee")
461    )]
462    pub symbol: String,
463    #[cfg_attr(
464        feature = "schemars",
465        schemars(description = "File path relative to the repository root")
466    )]
467    pub file: String,
468    #[cfg_attr(
469        feature = "schemars",
470        schemars(
471            description = "Line number of the definition or call site (1-indexed)",
472            schema_with = "crate::schema_helpers::integer_schema"
473        )
474    )]
475    pub line: usize,
476}
477
478/// Result of focused symbol analysis.
479#[derive(Debug, Serialize, Deserialize)]
480#[cfg_attr(feature = "schemars", derive(JsonSchema))]
481#[non_exhaustive]
482pub struct FocusedAnalysisOutput {
483    #[cfg_attr(
484        feature = "schemars",
485        schemars(description = "Formatted text representation of the call graph analysis")
486    )]
487    pub formatted: String,
488    #[serde(skip_serializing_if = "Option::is_none")]
489    #[cfg_attr(
490        feature = "schemars",
491        schemars(
492            description = "Opaque cursor token for the next page of results (absent when no more results)"
493        )
494    )]
495    pub next_cursor: Option<String>,
496    /// Production caller chains (partitioned from incoming chains, excluding test callers).
497    /// Not serialized; used for pagination in lib.rs.
498    #[serde(skip)]
499    #[serde(default)]
500    #[cfg_attr(feature = "schemars", schemars(skip))]
501    pub prod_chains: Vec<InternalCallChain>,
502    /// Test caller chains. Not serialized; used for pagination summary in lib.rs.
503    #[serde(skip)]
504    #[serde(default)]
505    #[cfg_attr(feature = "schemars", schemars(skip))]
506    pub test_chains: Vec<InternalCallChain>,
507    /// Outgoing (callee) chains. Not serialized; used for pagination in lib.rs.
508    #[serde(skip)]
509    #[serde(default)]
510    #[cfg_attr(feature = "schemars", schemars(skip))]
511    pub outgoing_chains: Vec<InternalCallChain>,
512    /// Number of definitions for the symbol. Not serialized; used for pagination headers.
513    #[serde(skip)]
514    #[serde(default)]
515    #[cfg_attr(feature = "schemars", schemars(skip))]
516    pub def_count: usize,
517    /// Total unique callers before `impl_only` filter. Not serialized; used for FILTER header.
518    #[serde(skip)]
519    #[serde(default)]
520    #[cfg_attr(feature = "schemars", schemars(skip))]
521    pub unfiltered_caller_count: usize,
522    /// Unique callers after `impl_only` filter. Not serialized; used for FILTER header.
523    #[serde(skip)]
524    #[serde(default)]
525    #[cfg_attr(feature = "schemars", schemars(skip))]
526    pub impl_trait_caller_count: usize,
527    /// Direct (depth-1) production callers. `follow_depth` does not affect this field.
528    #[serde(skip_serializing_if = "Option::is_none")]
529    pub callers: Option<Vec<CallChainEntry>>,
530    /// Direct (depth-1) test callers. `follow_depth` does not affect this field.
531    #[serde(skip_serializing_if = "Option::is_none")]
532    pub test_callers: Option<Vec<CallChainEntry>>,
533    /// Direct (depth-1) callees. `follow_depth` does not affect this field.
534    #[serde(skip_serializing_if = "Option::is_none")]
535    pub callees: Option<Vec<CallChainEntry>>,
536    /// Definition and use sites for the symbol.
537    #[serde(default)]
538    pub def_use_sites: Vec<crate::types::DefUseSite>,
539}
540
541/// Parameters for focused symbol analysis. Groups high-arity parameters to keep
542/// function signatures under clippy's default 7-argument threshold.
543#[derive(Clone)]
544pub struct FocusedAnalysisConfig {
545    pub focus: String,
546    pub match_mode: SymbolMatchMode,
547    pub follow_depth: u32,
548    pub max_depth: Option<u32>,
549    pub ast_recursion_limit: Option<usize>,
550    pub use_summary: bool,
551    pub impl_only: Option<bool>,
552    pub def_use: bool,
553    pub parse_timeout_micros: Option<u64>,
554}
555
556/// Internal parameters for focused analysis phases.
557#[derive(Clone)]
558struct InternalFocusedParams {
559    focus: String,
560    match_mode: SymbolMatchMode,
561    follow_depth: u32,
562    ast_recursion_limit: Option<usize>,
563    use_summary: bool,
564    impl_only: Option<bool>,
565    def_use: bool,
566    parse_timeout_micros: Option<u64>,
567}
568
569/// Type alias for analysis results: (`file_path`, `semantic_analysis`) pairs and impl-trait info.
570type FileAnalysisBatch = (Vec<(PathBuf, SemanticAnalysis)>, Vec<ImplTraitInfo>);
571
572/// Phase 1: Collect semantic analysis for all files in parallel.
573fn collect_file_analysis(
574    entries: &[WalkEntry],
575    progress: &Arc<AtomicUsize>,
576    ct: &CancellationToken,
577    ast_recursion_limit: Option<usize>,
578    parse_timeout_micros: Option<u64>,
579) -> Result<FileAnalysisBatch, AnalyzeError> {
580    // Check if already cancelled
581    if ct.is_cancelled() {
582        return Err(AnalyzeError::Cancelled);
583    }
584
585    // Use pre-walked entries (passed by caller)
586    // Collect semantic analysis for all files in parallel
587    let file_entries: Vec<&WalkEntry> = entries
588        .iter()
589        .filter(|e| !e.is_dir && !e.is_symlink)
590        .collect();
591
592    // Collect per-file timeout events so they can be surfaced as AnalyzeError::ParseTimeout.
593    let timed_out: std::sync::Mutex<Vec<(PathBuf, u64)>> = std::sync::Mutex::new(Vec::new());
594
595    let analysis_results: Vec<(PathBuf, SemanticAnalysis)> = file_entries
596        .par_iter()
597        .filter_map(|entry| {
598            // Check cancellation per file
599            if ct.is_cancelled() {
600                return None;
601            }
602
603            let ext = entry.path.extension().and_then(|e| e.to_str());
604
605            // Check file size before reading
606            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
607                tracing::debug!("skipping large file: {}", entry.path.display());
608                progress.fetch_add(1, Ordering::Relaxed);
609                return None;
610            }
611
612            // Try to read file content
613            let Ok(source) = std::fs::read_to_string(&entry.path) else {
614                progress.fetch_add(1, Ordering::Relaxed);
615                return None;
616            };
617
618            // Detect language and extract semantic information
619            let language = if let Some(ext_str) = ext {
620                language_for_extension(ext_str)
621                    .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string)
622            } else {
623                "unknown".to_string()
624            };
625
626            match SemanticExtractor::extract(
627                &source,
628                &language,
629                ast_recursion_limit,
630                parse_timeout_micros,
631            ) {
632                Ok(mut semantic) => {
633                    // Populate file path on references
634                    for r in &mut semantic.references {
635                        r.location = entry.path.display().to_string();
636                    }
637                    // Populate file path on impl_traits (already extracted during SemanticExtractor::extract)
638                    for trait_info in &mut semantic.impl_traits {
639                        trait_info.path.clone_from(&entry.path);
640                    }
641                    progress.fetch_add(1, Ordering::Relaxed);
642                    Some((entry.path.clone(), semantic))
643                }
644                Err(crate::parser::ParserError::Timeout(micros)) => {
645                    tracing::warn!(
646                        "parse timeout exceeded for {}: {} microseconds",
647                        entry.path.display(),
648                        micros
649                    );
650                    if let Ok(mut v) = timed_out.lock() {
651                        v.push((entry.path.clone(), micros));
652                    }
653                    progress.fetch_add(1, Ordering::Relaxed);
654                    None
655                }
656                Err(_) => {
657                    progress.fetch_add(1, Ordering::Relaxed);
658                    None
659                }
660            }
661        })
662        .collect();
663
664    // Check if cancelled after parallel processing
665    if ct.is_cancelled() {
666        return Err(AnalyzeError::Cancelled);
667    }
668
669    // Surface the first timeout as AnalyzeError::ParseTimeout so callers can detect it.
670    if let Ok(mut v) = timed_out.lock()
671        && let Some((path, micros)) = v.drain(..).next()
672    {
673        return Err(AnalyzeError::ParseTimeout { path, micros });
674    }
675
676    // Collect all impl-trait info from analysis results
677    let all_impl_traits: Vec<ImplTraitInfo> = analysis_results
678        .iter()
679        .flat_map(|(_, sem)| sem.impl_traits.iter().cloned())
680        .collect();
681
682    Ok((analysis_results, all_impl_traits))
683}
684
685/// Phase 2: Build call graph from analysis results.
686fn build_call_graph(
687    analysis_results: Vec<(PathBuf, SemanticAnalysis)>,
688    all_impl_traits: &[ImplTraitInfo],
689) -> Result<CallGraph, AnalyzeError> {
690    // Build call graph. Always build without impl_only filter first so we can
691    // record the unfiltered caller count before discarding those edges.
692    CallGraph::build_from_results(
693        analysis_results,
694        all_impl_traits,
695        false, // filter applied below after counting
696    )
697    .map_err(std::convert::Into::into)
698}
699
700/// Phase 3: Resolve symbol and apply `impl_only` filter.
701/// Returns (`resolved_focus`, `unfiltered_caller_count`, `impl_trait_caller_count`).
702/// CRITICAL: Must capture `unfiltered_caller_count` BEFORE `retain()`, then apply `retain()`,
703/// then compute `impl_trait_caller_count`.
704fn resolve_symbol(
705    graph: &mut CallGraph,
706    params: &InternalFocusedParams,
707) -> Result<(String, usize, usize), AnalyzeError> {
708    // Resolve symbol name using the requested match mode.
709    let resolved_focus = if params.match_mode == SymbolMatchMode::Exact {
710        let exists = graph.definitions.contains_key(&params.focus)
711            || graph.callers.contains_key(&params.focus)
712            || graph.callees.contains_key(&params.focus);
713        if exists {
714            params.focus.clone()
715        } else {
716            return Err(crate::graph::GraphError::SymbolNotFound {
717                symbol: params.focus.clone(),
718                hint: "Try match_mode=insensitive for a case-insensitive search, or match_mode=prefix to list symbols starting with this name.".to_string(),
719            }
720            .into());
721        }
722    } else {
723        graph.resolve_symbol_indexed(&params.focus, &params.match_mode)?
724    };
725
726    // Count unique callers for the focus symbol before applying impl_only filter.
727    let unfiltered_caller_count = graph.callers.get(&resolved_focus).map_or(0, |edges| {
728        edges
729            .iter()
730            .map(|e| &e.neighbor_name)
731            .collect::<std::collections::HashSet<_>>()
732            .len()
733    });
734
735    // Apply impl_only filter now if requested, then count filtered callers.
736    // Filter all caller adjacency lists so traversal and formatting are consistently
737    // restricted to impl-trait edges regardless of follow_depth.
738    let impl_trait_caller_count = if params.impl_only.unwrap_or(false) {
739        for edges in graph.callers.values_mut() {
740            edges.retain(|e| e.is_impl_trait);
741        }
742        graph.callers.get(&resolved_focus).map_or(0, |edges| {
743            edges
744                .iter()
745                .map(|e| &e.neighbor_name)
746                .collect::<std::collections::HashSet<_>>()
747                .len()
748        })
749    } else {
750        unfiltered_caller_count
751    };
752
753    Ok((
754        resolved_focus,
755        unfiltered_caller_count,
756        impl_trait_caller_count,
757    ))
758}
759
760/// Type alias for `compute_chains` return type: (`formatted_output`, `prod_chains`, `test_chains`, `outgoing_chains`, `def_count`).
761type ChainComputeResult = (
762    String,
763    Vec<InternalCallChain>,
764    Vec<InternalCallChain>,
765    Vec<InternalCallChain>,
766    usize,
767);
768
769/// Helper function to convert InternalCallChain data to CallChainEntry vec.
770/// Takes the first (depth-1) element of each chain and converts it to a CallChainEntry.
771/// Returns None if chains is empty, otherwise returns a vec of up to 10 entries.
772fn chains_to_entries(
773    chains: &[InternalCallChain],
774    root: Option<&std::path::Path>,
775) -> Option<Vec<CallChainEntry>> {
776    if chains.is_empty() {
777        return None;
778    }
779    let entries: Vec<CallChainEntry> = chains
780        .iter()
781        .take(10)
782        .filter_map(|chain| {
783            let (symbol, path, line) = chain.chain.first()?;
784            let file = match root {
785                Some(root) => path
786                    .strip_prefix(root)
787                    .unwrap_or(path.as_path())
788                    .to_string_lossy()
789                    .into_owned(),
790                None => path.to_string_lossy().into_owned(),
791            };
792            Some(CallChainEntry {
793                symbol: symbol.clone(),
794                file,
795                line: *line,
796            })
797        })
798        .collect();
799    if entries.is_empty() {
800        None
801    } else {
802        Some(entries)
803    }
804}
805
806/// Phase 4: Compute chains and format output.
807fn compute_chains(
808    graph: &CallGraph,
809    resolved_focus: &str,
810    root: &Path,
811    params: &InternalFocusedParams,
812    unfiltered_caller_count: usize,
813    impl_trait_caller_count: usize,
814    def_use_sites: &[crate::types::DefUseSite],
815) -> Result<ChainComputeResult, AnalyzeError> {
816    // Compute chain data for pagination (always, regardless of summary mode)
817    let def_count = graph.definitions.get(resolved_focus).map_or(0, Vec::len);
818    let incoming_chains = graph.find_incoming_chains(resolved_focus, params.follow_depth)?;
819    let outgoing_chains = graph.find_outgoing_chains(resolved_focus, params.follow_depth)?;
820
821    let (prod_chains, test_chains): (Vec<_>, Vec<_>) =
822        incoming_chains.iter().cloned().partition(|chain| {
823            chain
824                .chain
825                .first()
826                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
827        });
828
829    // Format output with pre-computed chains
830    let mut formatted = if params.use_summary {
831        format_focused_summary_internal(
832            graph,
833            resolved_focus,
834            params.follow_depth,
835            Some(root),
836            Some(&incoming_chains),
837            Some(&outgoing_chains),
838            def_use_sites,
839        )?
840    } else {
841        format_focused_internal(
842            graph,
843            resolved_focus,
844            params.follow_depth,
845            Some(root),
846            Some(&incoming_chains),
847            Some(&outgoing_chains),
848            def_use_sites,
849        )?
850    };
851
852    // Add FILTER header if impl_only filter was applied
853    if params.impl_only.unwrap_or(false) {
854        let filter_header = format!(
855            "FILTER: impl_only=true ({impl_trait_caller_count} of {unfiltered_caller_count} callers shown)\n",
856        );
857        formatted = format!("{filter_header}{formatted}");
858    }
859
860    Ok((
861        formatted,
862        prod_chains,
863        test_chains,
864        outgoing_chains,
865        def_count,
866    ))
867}
868
869/// Analyze a symbol's call graph across a directory with progress tracking.
870// public API; callers expect owned semantics
871#[allow(clippy::needless_pass_by_value)]
872pub fn analyze_focused_with_progress(
873    root: &Path,
874    params: &FocusedAnalysisConfig,
875    progress: Arc<AtomicUsize>,
876    ct: CancellationToken,
877) -> Result<FocusedAnalysisOutput, AnalyzeError> {
878    let entries = walk_directory(root, params.max_depth)?;
879    let internal_params = InternalFocusedParams {
880        focus: params.focus.clone(),
881        match_mode: params.match_mode.clone(),
882        follow_depth: params.follow_depth,
883        ast_recursion_limit: params.ast_recursion_limit,
884        use_summary: params.use_summary,
885        impl_only: params.impl_only,
886        def_use: params.def_use,
887        parse_timeout_micros: params.parse_timeout_micros,
888    };
889    analyze_focused_with_progress_with_entries_internal(
890        root,
891        params.max_depth,
892        &progress,
893        &ct,
894        &internal_params,
895        &entries,
896    )
897}
898
899/// Internal implementation of focused analysis using pre-walked entries and params struct.
900#[instrument(skip_all, fields(path = %root.display(), symbol = %params.focus))]
901fn analyze_focused_with_progress_with_entries_internal(
902    root: &Path,
903    _max_depth: Option<u32>,
904    progress: &Arc<AtomicUsize>,
905    ct: &CancellationToken,
906    params: &InternalFocusedParams,
907    entries: &[WalkEntry],
908) -> Result<FocusedAnalysisOutput, AnalyzeError> {
909    // Check if already cancelled
910    if ct.is_cancelled() {
911        return Err(AnalyzeError::Cancelled);
912    }
913
914    // Check if path is a file (hint to use directory)
915    if root.is_file() {
916        let formatted =
917            "Single-file focus not supported. Please provide a directory path for cross-file call graph analysis.\n"
918                .to_string();
919        return Ok(FocusedAnalysisOutput {
920            formatted,
921            next_cursor: None,
922            prod_chains: vec![],
923            test_chains: vec![],
924            outgoing_chains: vec![],
925            def_count: 0,
926            unfiltered_caller_count: 0,
927            impl_trait_caller_count: 0,
928            callers: None,
929            test_callers: None,
930            callees: None,
931            def_use_sites: vec![],
932        });
933    }
934
935    // Phase 1: Collect file analysis
936    let (analysis_results, all_impl_traits) = collect_file_analysis(
937        entries,
938        progress,
939        ct,
940        params.ast_recursion_limit,
941        params.parse_timeout_micros,
942    )?;
943
944    // Check for cancellation before building the call graph (phase 2)
945    if ct.is_cancelled() {
946        return Err(AnalyzeError::Cancelled);
947    }
948
949    // Phase 2: Build call graph
950    let mut graph = build_call_graph(analysis_results, &all_impl_traits)?;
951
952    // Check for cancellation before resolving the symbol (phase 3)
953    if ct.is_cancelled() {
954        return Err(AnalyzeError::Cancelled);
955    }
956
957    // Phase 3: Resolve symbol and apply impl_only filter.
958    // When def_use=true and the symbol is not in the call graph (e.g. a variable),
959    // fall through to def-use extraction instead of returning SymbolNotFound.
960    let resolve_result = resolve_symbol(&mut graph, params);
961    if let Err(AnalyzeError::Graph(crate::graph::GraphError::SymbolNotFound { .. })) =
962        &resolve_result
963    {
964        // Deliberately not collapsed: resolve_result must stay alive past this block
965        // so that the `?` below can propagate non-SymbolNotFound errors.
966        if params.def_use {
967            let def_use_sites =
968                collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct);
969            if def_use_sites.is_empty() {
970                // Symbol not found anywhere (neither in call graph nor as def/use site).
971                // Propagate the original SymbolNotFound error instead of returning an
972                // empty success response.
973                if let Err(e) = resolve_result {
974                    return Err(e);
975                }
976                unreachable!("resolve_result is Ok only when symbol was found");
977            }
978            use std::fmt::Write as _;
979            let mut formatted = String::new();
980            let _ = writeln!(
981                formatted,
982                "FOCUS: {} (0 defs, 0 callers, 0 callees)",
983                params.focus
984            );
985            {
986                let writes = def_use_sites
987                    .iter()
988                    .filter(|s| {
989                        matches!(
990                            s.kind,
991                            crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
992                        )
993                    })
994                    .count();
995                let reads = def_use_sites
996                    .iter()
997                    .filter(|s| s.kind == crate::types::DefUseKind::Read)
998                    .count();
999                let _ = writeln!(
1000                    formatted,
1001                    "DEF-USE SITES  {}  ({} total: {} writes, {} reads)",
1002                    params.focus,
1003                    def_use_sites.len(),
1004                    writes,
1005                    reads
1006                );
1007            }
1008            return Ok(FocusedAnalysisOutput {
1009                formatted,
1010                next_cursor: None,
1011                callers: None,
1012                test_callers: None,
1013                callees: None,
1014                prod_chains: vec![],
1015                test_chains: vec![],
1016                outgoing_chains: vec![],
1017                def_count: 0,
1018                unfiltered_caller_count: 0,
1019                impl_trait_caller_count: 0,
1020                def_use_sites,
1021            });
1022        }
1023    }
1024    let (resolved_focus, unfiltered_caller_count, impl_trait_caller_count) = resolve_result?;
1025
1026    // Check for cancellation before computing chains (phase 4)
1027    if ct.is_cancelled() {
1028        return Err(AnalyzeError::Cancelled);
1029    }
1030
1031    // Phase 5 (optional, before formatting): Def-use site extraction.
1032    // Use params.focus (the raw user-supplied string) rather than resolved_focus
1033    // so that variable/field names that are not in the call graph still work.
1034    let def_use_sites = if params.def_use {
1035        collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct)
1036    } else {
1037        Vec::new()
1038    };
1039
1040    // Phase 4: Compute chains and format output (includes def_use_sites in one pass)
1041    let (formatted, prod_chains, test_chains, outgoing_chains, def_count) = compute_chains(
1042        &graph,
1043        &resolved_focus,
1044        root,
1045        params,
1046        unfiltered_caller_count,
1047        impl_trait_caller_count,
1048        &def_use_sites,
1049    )?;
1050
1051    // Compute depth-1 chains for structured output fields (always direct relationships only,
1052    // regardless of `follow_depth` used for the text-formatted output).
1053    let (depth1_callers, depth1_test_callers, depth1_callees) = if params.follow_depth <= 1 {
1054        // Chains already at depth 1; reuse the partitioned vecs.
1055        let callers = chains_to_entries(&prod_chains, Some(root));
1056        let test_callers = chains_to_entries(&test_chains, Some(root));
1057        let callees = chains_to_entries(&outgoing_chains, Some(root));
1058        (callers, test_callers, callees)
1059    } else {
1060        // follow_depth > 1: re-query at depth 1 to get only direct edges.
1061        let incoming1 = graph
1062            .find_incoming_chains(&resolved_focus, 1)
1063            .unwrap_or_default();
1064        let outgoing1 = graph
1065            .find_outgoing_chains(&resolved_focus, 1)
1066            .unwrap_or_default();
1067        let (prod1, test1): (Vec<_>, Vec<_>) = incoming1.into_iter().partition(|chain| {
1068            chain
1069                .chain
1070                .first()
1071                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
1072        });
1073        let callers = chains_to_entries(&prod1, Some(root));
1074        let test_callers = chains_to_entries(&test1, Some(root));
1075        let callees = chains_to_entries(&outgoing1, Some(root));
1076        (callers, test_callers, callees)
1077    };
1078
1079    Ok(FocusedAnalysisOutput {
1080        formatted,
1081        next_cursor: None,
1082        callers: depth1_callers,
1083        test_callers: depth1_test_callers,
1084        callees: depth1_callees,
1085        prod_chains,
1086        test_chains,
1087        outgoing_chains,
1088        def_count,
1089        unfiltered_caller_count,
1090        impl_trait_caller_count,
1091        def_use_sites,
1092    })
1093}
1094
1095/// Phase 5: Extract def-use sites for `symbol` across all entries.
1096/// Writes go before reads; within each kind ordered by file, line, then column.
1097fn collect_def_use_sites(
1098    entries: &[WalkEntry],
1099    symbol: &str,
1100    ast_recursion_limit: Option<usize>,
1101    root: &std::path::Path,
1102    ct: &CancellationToken,
1103) -> Vec<crate::types::DefUseSite> {
1104    use crate::parser::SemanticExtractor;
1105
1106    let file_entries: Vec<&WalkEntry> = entries
1107        .iter()
1108        .filter(|e| !e.is_dir && !e.is_symlink)
1109        .collect();
1110
1111    let mut sites: Vec<crate::types::DefUseSite> = file_entries
1112        .par_iter()
1113        .filter_map(|entry| {
1114            if ct.is_cancelled() {
1115                return None;
1116            }
1117
1118            // Check file size before reading
1119            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1120                tracing::debug!("skipping large file: {}", entry.path.display());
1121                return None;
1122            }
1123
1124            let Ok(source) = std::fs::read_to_string(&entry.path) else {
1125                return None;
1126            };
1127            let ext = entry
1128                .path
1129                .extension()
1130                .and_then(|e| e.to_str())
1131                .unwrap_or("");
1132            let lang = crate::lang::language_for_extension(ext)?;
1133            let file_path = entry
1134                .path
1135                .strip_prefix(root)
1136                .unwrap_or(&entry.path)
1137                .display()
1138                .to_string();
1139            let sites = SemanticExtractor::extract_def_use_for_file(
1140                &source,
1141                lang,
1142                symbol,
1143                &file_path,
1144                ast_recursion_limit,
1145            );
1146            if sites.is_empty() { None } else { Some(sites) }
1147        })
1148        .flatten()
1149        .collect();
1150
1151    // Writes before reads; within each kind: file, line, then column for deterministic order
1152    sites.sort_by(|a, b| {
1153        use crate::types::DefUseKind;
1154        let kind_ord = |k: &DefUseKind| match k {
1155            DefUseKind::Write | DefUseKind::WriteRead => 0,
1156            DefUseKind::Read => 1,
1157        };
1158        kind_ord(&a.kind)
1159            .cmp(&kind_ord(&b.kind))
1160            .then_with(|| a.file.cmp(&b.file))
1161            .then_with(|| a.line.cmp(&b.line))
1162            .then_with(|| a.column.cmp(&b.column))
1163    });
1164
1165    sites
1166}
1167
1168/// Analyze a symbol's call graph using pre-walked directory entries.
1169pub fn analyze_focused_with_progress_with_entries(
1170    root: &Path,
1171    params: &FocusedAnalysisConfig,
1172    progress: &Arc<AtomicUsize>,
1173    ct: &CancellationToken,
1174    entries: &[WalkEntry],
1175) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1176    let internal_params = InternalFocusedParams {
1177        focus: params.focus.clone(),
1178        match_mode: params.match_mode.clone(),
1179        follow_depth: params.follow_depth,
1180        ast_recursion_limit: params.ast_recursion_limit,
1181        use_summary: params.use_summary,
1182        impl_only: params.impl_only,
1183        def_use: params.def_use,
1184        parse_timeout_micros: params.parse_timeout_micros,
1185    };
1186    analyze_focused_with_progress_with_entries_internal(
1187        root,
1188        params.max_depth,
1189        progress,
1190        ct,
1191        &internal_params,
1192        entries,
1193    )
1194}
1195
1196#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
1197pub fn analyze_focused(
1198    root: &Path,
1199    focus: &str,
1200    follow_depth: u32,
1201    max_depth: Option<u32>,
1202    ast_recursion_limit: Option<usize>,
1203) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1204    let entries = walk_directory(root, max_depth)?;
1205    let counter = Arc::new(AtomicUsize::new(0));
1206    let ct = CancellationToken::new();
1207    let params = FocusedAnalysisConfig {
1208        focus: focus.to_string(),
1209        match_mode: SymbolMatchMode::Exact,
1210        follow_depth,
1211        max_depth,
1212        ast_recursion_limit,
1213        use_summary: false,
1214        impl_only: None,
1215        def_use: false,
1216        parse_timeout_micros: None,
1217    };
1218    analyze_focused_with_progress_with_entries(root, &params, &counter, &ct, &entries)
1219}
1220
1221/// Analyze a single file and return a minimal fixed schema (name, line count, language,
1222/// functions, imports) for lightweight code understanding.
1223#[instrument(skip_all, fields(path))]
1224pub fn analyze_module_file(path: &str) -> Result<crate::types::ModuleInfo, AnalyzeError> {
1225    // Check file size before reading
1226    if Path::new(path).metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1227        tracing::debug!("skipping large file: {}", path);
1228        return Err(AnalyzeError::Parser(
1229            crate::parser::ParserError::ParseError("file too large".to_string()),
1230        ));
1231    }
1232
1233    let source = std::fs::read_to_string(path)
1234        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
1235
1236    let file_path = Path::new(path);
1237    let name = file_path
1238        .file_name()
1239        .and_then(|s| s.to_str())
1240        .unwrap_or("unknown")
1241        .to_string();
1242
1243    let line_count = source.lines().count();
1244
1245    let language = file_path
1246        .extension()
1247        .and_then(|e| e.to_str())
1248        .and_then(language_for_extension)
1249        .ok_or_else(|| {
1250            AnalyzeError::Parser(crate::parser::ParserError::ParseError(
1251                "unsupported or missing file extension".to_string(),
1252            ))
1253        })?;
1254
1255    let mut module_info = SemanticExtractor::extract_module_info(&source, language, None)?;
1256    module_info.name = name;
1257    module_info.line_count = line_count;
1258
1259    Ok(module_info)
1260}
1261
1262/// Scan a directory for files that import a given module path.
1263///
1264/// For each non-directory walk entry, extracts imports via [`SemanticExtractor`] and
1265/// checks whether `module` matches `ImportInfo.module` or appears in `ImportInfo.items`.
1266/// Returns a [`FocusedAnalysisOutput`] whose `formatted` field lists matching files.
1267pub fn analyze_import_lookup(
1268    root: &Path,
1269    module: &str,
1270    entries: &[WalkEntry],
1271    ast_recursion_limit: Option<usize>,
1272) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1273    let matches: Vec<(PathBuf, usize)> = entries
1274        .par_iter()
1275        .filter_map(|entry| {
1276            if entry.is_dir || entry.is_symlink {
1277                tracing::debug!("skipping symlink: {}", entry.path.display());
1278                return None;
1279            }
1280            let ext = entry
1281                .path
1282                .extension()
1283                .and_then(|e| e.to_str())
1284                .and_then(crate::lang::language_for_extension)?;
1285            let source = std::fs::read_to_string(&entry.path).ok()?;
1286            let semantic =
1287                SemanticExtractor::extract(&source, ext, ast_recursion_limit, None).ok()?;
1288            for import in &semantic.imports {
1289                if import.module == module || import.items.iter().any(|item| item == module) {
1290                    return Some((entry.path.clone(), import.line));
1291                }
1292            }
1293            None
1294        })
1295        .collect();
1296
1297    let mut text = format!("IMPORT_LOOKUP: {module}\n");
1298    text.push_str(&format!("ROOT: {}\n", root.display()));
1299    text.push_str(&format!("MATCHES: {}\n", matches.len()));
1300    for (path, line) in &matches {
1301        let rel = path.strip_prefix(root).unwrap_or(path);
1302        text.push_str(&format!("  {}:{line}\n", rel.display()));
1303    }
1304
1305    Ok(FocusedAnalysisOutput {
1306        formatted: text,
1307        next_cursor: None,
1308        prod_chains: vec![],
1309        test_chains: vec![],
1310        outgoing_chains: vec![],
1311        def_count: 0,
1312        unfiltered_caller_count: 0,
1313        impl_trait_caller_count: 0,
1314        callers: None,
1315        test_callers: None,
1316        callees: None,
1317        def_use_sites: vec![],
1318    })
1319}
1320
1321/// Resolve Python wildcard imports to actual symbol names.
1322///
1323/// For each import with items=`["*"]`, this function:
1324/// 1. Parses the relative dots (if any) and climbs the directory tree
1325/// 2. Finds the target .py file or __init__.py
1326/// 3. Extracts symbols (functions and classes) from the target
1327/// 4. Honors __all__ if defined, otherwise uses function+class names
1328///
1329/// All resolution failures are non-fatal: debug-logged and the wildcard is preserved.
1330fn resolve_wildcard_imports(file_path: &Path, imports: &mut [ImportInfo]) {
1331    use std::collections::HashMap;
1332
1333    let mut resolved_cache: HashMap<PathBuf, Vec<String>> = HashMap::new();
1334    let Ok(file_path_canonical) = file_path.canonicalize() else {
1335        tracing::debug!(file = ?file_path, "unable to canonicalize current file path");
1336        return;
1337    };
1338
1339    for import in imports.iter_mut() {
1340        if import.items != ["*"] {
1341            continue;
1342        }
1343        resolve_single_wildcard(import, file_path, &file_path_canonical, &mut resolved_cache);
1344    }
1345}
1346
1347/// Validate and canonicalize a wildcard target path, checking for self-references.
1348/// Returns the canonical path if valid, or None if validation fails.
1349fn validate_wildcard_target(
1350    target_to_read: &Path,
1351    file_path_canonical: &Path,
1352    module: &str,
1353) -> Option<PathBuf> {
1354    let Ok(canonical) = target_to_read.canonicalize() else {
1355        tracing::debug!(target = ?target_to_read, import = %module, "unable to canonicalize path");
1356        return None;
1357    };
1358
1359    if canonical == file_path_canonical {
1360        tracing::debug!(target = ?canonical, import = %module, "cannot import from self");
1361        return None;
1362    }
1363
1364    Some(canonical)
1365}
1366
1367/// Resolve one wildcard import in place. On any failure the import is left unchanged.
1368fn resolve_single_wildcard(
1369    import: &mut ImportInfo,
1370    file_path: &Path,
1371    file_path_canonical: &Path,
1372    resolved_cache: &mut std::collections::HashMap<PathBuf, Vec<String>>,
1373) {
1374    let module = import.module.clone();
1375    let dot_count = module.chars().take_while(|c| *c == '.').count();
1376    if dot_count == 0 {
1377        return;
1378    }
1379    let module_path = module.trim_start_matches('.');
1380
1381    let Some(target_to_read) = locate_target_file(file_path, dot_count, module_path, &module)
1382    else {
1383        return;
1384    };
1385
1386    let Some(canonical) = validate_wildcard_target(&target_to_read, file_path_canonical, &module)
1387    else {
1388        return;
1389    };
1390
1391    if let Some(cached) = resolved_cache.get(&canonical) {
1392        tracing::debug!(import = %module, symbols_count = cached.len(), "using cached symbols");
1393        import.items.clone_from(cached);
1394        return;
1395    }
1396
1397    if let Some(symbols) = parse_target_symbols(&target_to_read, &module) {
1398        tracing::debug!(import = %module, resolved_count = symbols.len(), "wildcard import resolved");
1399        import.items.clone_from(&symbols);
1400        resolved_cache.insert(canonical, symbols);
1401    }
1402}
1403
1404/// Locate the .py file that a wildcard import refers to. Returns None if not found.
1405fn locate_target_file(
1406    file_path: &Path,
1407    dot_count: usize,
1408    module_path: &str,
1409    module: &str,
1410) -> Option<PathBuf> {
1411    let mut target_dir = file_path.parent()?.to_path_buf();
1412
1413    for _ in 1..dot_count {
1414        if !target_dir.pop() {
1415            tracing::debug!(import = %module, "unable to climb {} levels", dot_count.saturating_sub(1));
1416            return None;
1417        }
1418    }
1419
1420    let target_file = if module_path.is_empty() {
1421        target_dir.join("__init__.py")
1422    } else {
1423        let rel_path = module_path.replace('.', "/");
1424        target_dir.join(format!("{rel_path}.py"))
1425    };
1426
1427    if target_file.exists() {
1428        Some(target_file)
1429    } else if target_file.with_extension("").is_dir() {
1430        let init = target_file.with_extension("").join("__init__.py");
1431        if init.exists() { Some(init) } else { None }
1432    } else {
1433        tracing::debug!(target = ?target_file, import = %module, "target file not found");
1434        None
1435    }
1436}
1437
1438/// Build a tree-sitter parser for Python and parse the source code.
1439fn build_parser_for_file(source: &str) -> Option<tree_sitter::Tree> {
1440    use tree_sitter::Parser;
1441
1442    let lang_info = crate::languages::get_language_info("python")?;
1443    let mut parser = Parser::new();
1444    if parser.set_language(&lang_info.language).is_err() {
1445        return None;
1446    }
1447    parser.parse(source, None)
1448}
1449
1450/// Extract all public symbols from a parsed tree (functions and classes).
1451fn extract_all_symbols(tree: &tree_sitter::Tree, source: &str) -> Vec<String> {
1452    let mut symbols = Vec::new();
1453    let root = tree.root_node();
1454    let mut cursor = root.walk();
1455    for child in root.children(&mut cursor) {
1456        if matches!(child.kind(), "function_definition" | "class_definition")
1457            && let Some(name_node) = child.child_by_field_name("name")
1458        {
1459            let name = source[name_node.start_byte()..name_node.end_byte()].to_string();
1460            if !name.starts_with('_') {
1461                symbols.push(name);
1462            }
1463        }
1464    }
1465    symbols
1466}
1467
1468/// Try to resolve symbols from __all__ or fallback to function/class extraction.
1469fn resolve_symbols_from_tree(tree: &tree_sitter::Tree, source: &str, module: &str) -> Vec<String> {
1470    let mut symbols = Vec::new();
1471    extract_all_from_tree(tree, source, &mut symbols);
1472    if !symbols.is_empty() {
1473        tracing::debug!(import = %module, symbols = ?symbols, "using __all__ symbols");
1474        return symbols;
1475    }
1476
1477    // Fallback: extract functions/classes from the tree
1478    let symbols = extract_all_symbols(tree, source);
1479    tracing::debug!(import = %module, fallback_symbols = ?symbols, "using fallback function/class names");
1480    symbols
1481}
1482
1483/// Read and parse a target .py file, returning its exported symbols.
1484fn parse_target_symbols(target_path: &Path, module: &str) -> Option<Vec<String>> {
1485    // Check file size before reading
1486    if target_path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1487        tracing::debug!("skipping large file: {}", target_path.display());
1488        return None;
1489    }
1490
1491    let source = match std::fs::read_to_string(target_path) {
1492        Ok(s) => s,
1493        Err(e) => {
1494            tracing::debug!(target = ?target_path, import = %module, error = %e, "unable to read target file");
1495            return None;
1496        }
1497    };
1498
1499    // Parse once with tree-sitter
1500    let tree = build_parser_for_file(&source)?;
1501
1502    // Try to extract __all__ or fallback to function/class extraction
1503    let symbols = resolve_symbols_from_tree(&tree, &source, module);
1504    Some(symbols)
1505}
1506
1507/// Extract __all__ from a tree-sitter tree.
1508fn extract_all_from_tree(tree: &tree_sitter::Tree, source: &str, result: &mut Vec<String>) {
1509    let root = tree.root_node();
1510    let mut cursor = root.walk();
1511    for child in root.children(&mut cursor) {
1512        if child.kind() == "simple_statement" {
1513            // simple_statement contains assignment and other statement types
1514            let mut simple_cursor = child.walk();
1515            for simple_child in child.children(&mut simple_cursor) {
1516                if simple_child.kind() == "assignment"
1517                    && let Some(left) = simple_child.child_by_field_name("left")
1518                {
1519                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1520                    if target_text == "__all__"
1521                        && let Some(right) = simple_child.child_by_field_name("right")
1522                    {
1523                        extract_string_list_from_list_node(&right, source, result);
1524                    }
1525                }
1526            }
1527        } else if child.kind() == "expression_statement" {
1528            // Fallback for older Python AST structures
1529            let mut stmt_cursor = child.walk();
1530            for stmt_child in child.children(&mut stmt_cursor) {
1531                if stmt_child.kind() == "assignment"
1532                    && let Some(left) = stmt_child.child_by_field_name("left")
1533                {
1534                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1535                    if target_text == "__all__"
1536                        && let Some(right) = stmt_child.child_by_field_name("right")
1537                    {
1538                        extract_string_list_from_list_node(&right, source, result);
1539                    }
1540                }
1541            }
1542        }
1543    }
1544}
1545
1546/// Extract string literals from a Python list node.
1547fn extract_string_list_from_list_node(
1548    list_node: &tree_sitter::Node,
1549    source: &str,
1550    result: &mut Vec<String>,
1551) {
1552    let mut cursor = list_node.walk();
1553    for child in list_node.named_children(&mut cursor) {
1554        if child.kind() == "string" {
1555            let raw = source[child.start_byte()..child.end_byte()].trim();
1556            // Strip quotes: "name" -> name
1557            let unquoted = raw.trim_matches('"').trim_matches('\'').to_string();
1558            if !unquoted.is_empty() {
1559                result.push(unquoted);
1560            }
1561        }
1562    }
1563}
1564
1565/// Read a file and return its raw content with line numbers for a specified range.
1566#[cfg(test)]
1567mod tests {
1568    use super::*;
1569    use crate::formatter::format_focused_paginated;
1570    use crate::graph::InternalCallChain;
1571    use crate::pagination::{PaginationMode, decode_cursor, paginate_slice};
1572    use std::fs;
1573    use std::path::PathBuf;
1574    use tempfile::TempDir;
1575
1576    #[cfg(feature = "lang-rust")]
1577    #[test]
1578    fn analyze_str_rust_happy_path() {
1579        let source = "fn hello() -> i32 { 42 }";
1580        let result = analyze_str(source, "rs", None);
1581        assert!(result.is_ok());
1582    }
1583
1584    #[cfg(feature = "lang-python")]
1585    #[test]
1586    fn analyze_str_python_happy_path() {
1587        let source = "def greet(name):\n    return f'Hello {name}'";
1588        let result = analyze_str(source, "py", None);
1589        assert!(result.is_ok());
1590    }
1591
1592    #[cfg(feature = "lang-rust")]
1593    #[test]
1594    fn analyze_str_rust_by_language_name() {
1595        let source = "fn hello() -> i32 { 42 }";
1596        let result = analyze_str(source, "rust", None);
1597        assert!(result.is_ok());
1598    }
1599
1600    #[cfg(feature = "lang-python")]
1601    #[test]
1602    fn analyze_str_python_by_language_name() {
1603        let source = "def greet(name):\n    return f'Hello {name}'";
1604        let result = analyze_str(source, "python", None);
1605        assert!(result.is_ok());
1606    }
1607
1608    #[cfg(feature = "lang-rust")]
1609    #[test]
1610    fn analyze_str_rust_mixed_case() {
1611        let source = "fn hello() -> i32 { 42 }";
1612        let result = analyze_str(source, "RuSt", None);
1613        assert!(result.is_ok());
1614    }
1615
1616    #[cfg(feature = "lang-python")]
1617    #[test]
1618    fn analyze_str_python_mixed_case() {
1619        let source = "def greet(name):\n    return f'Hello {name}'";
1620        let result = analyze_str(source, "PyThOn", None);
1621        assert!(result.is_ok());
1622    }
1623
1624    #[test]
1625    fn analyze_str_unsupported_language() {
1626        let result = analyze_str("code", "brainfuck", None);
1627        assert!(
1628            matches!(result, Err(AnalyzeError::UnsupportedLanguage(lang)) if lang == "brainfuck")
1629        );
1630    }
1631
1632    #[cfg(feature = "lang-rust")]
1633    #[test]
1634    fn test_symbol_focus_callers_pagination_first_page() {
1635        let temp_dir = TempDir::new().unwrap();
1636
1637        // Create a file with many callers of `target`
1638        let mut code = String::from("fn target() {}\n");
1639        for i in 0..15 {
1640            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1641        }
1642        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1643
1644        // Act
1645        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1646
1647        // Paginate prod callers with page_size=5
1648        let paginated = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1649            .expect("paginate failed");
1650        assert!(
1651            paginated.total >= 5,
1652            "should have enough callers to paginate"
1653        );
1654        assert!(
1655            paginated.next_cursor.is_some(),
1656            "should have next_cursor for page 1"
1657        );
1658
1659        // Verify cursor encodes callers mode
1660        assert_eq!(paginated.items.len(), 5);
1661    }
1662
1663    #[test]
1664    fn test_symbol_focus_callers_pagination_second_page() {
1665        let temp_dir = TempDir::new().unwrap();
1666
1667        let mut code = String::from("fn target() {}\n");
1668        for i in 0..12 {
1669            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1670        }
1671        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1672
1673        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1674        let total_prod = output.prod_chains.len();
1675
1676        if total_prod > 5 {
1677            // Get page 1 cursor
1678            let p1 = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1679                .expect("paginate failed");
1680            assert!(p1.next_cursor.is_some());
1681
1682            let cursor_str = p1.next_cursor.unwrap();
1683            let cursor_data = decode_cursor(&cursor_str).expect("decode failed");
1684
1685            // Get page 2
1686            let p2 = paginate_slice(
1687                &output.prod_chains,
1688                cursor_data.offset,
1689                5,
1690                PaginationMode::Callers,
1691            )
1692            .expect("paginate failed");
1693
1694            // Format paginated output
1695            let formatted = format_focused_paginated(
1696                &p2.items,
1697                total_prod,
1698                PaginationMode::Callers,
1699                "target",
1700                &output.prod_chains,
1701                &output.test_chains,
1702                &output.outgoing_chains,
1703                output.def_count,
1704                cursor_data.offset,
1705                Some(temp_dir.path()),
1706                true,
1707            );
1708
1709            // Assert: header shows correct range for page 2
1710            let expected_start = cursor_data.offset + 1;
1711            assert!(
1712                formatted.contains(&format!("CALLERS ({}", expected_start)),
1713                "header should show page 2 range, got: {}",
1714                formatted
1715            );
1716        }
1717    }
1718
1719    #[test]
1720    fn test_chains_to_entries_empty_returns_none() {
1721        // Arrange
1722        let chains: Vec<InternalCallChain> = vec![];
1723
1724        // Act
1725        let result = chains_to_entries(&chains, None);
1726
1727        // Assert
1728        assert!(result.is_none());
1729    }
1730
1731    #[test]
1732    fn test_chains_to_entries_with_data_returns_entries() {
1733        // Arrange
1734        let chains = vec![
1735            InternalCallChain {
1736                chain: vec![("caller1".to_string(), PathBuf::from("/root/lib.rs"), 10)],
1737            },
1738            InternalCallChain {
1739                chain: vec![("caller2".to_string(), PathBuf::from("/root/other.rs"), 20)],
1740            },
1741        ];
1742        let root = PathBuf::from("/root");
1743
1744        // Act
1745        let result = chains_to_entries(&chains, Some(root.as_path()));
1746
1747        // Assert
1748        assert!(result.is_some());
1749        let entries = result.unwrap();
1750        assert_eq!(entries.len(), 2);
1751        assert_eq!(entries[0].symbol, "caller1");
1752        assert_eq!(entries[0].file, "lib.rs");
1753        assert_eq!(entries[0].line, 10);
1754        assert_eq!(entries[1].symbol, "caller2");
1755        assert_eq!(entries[1].file, "other.rs");
1756        assert_eq!(entries[1].line, 20);
1757    }
1758
1759    #[test]
1760    fn test_symbol_focus_callees_pagination() {
1761        let temp_dir = TempDir::new().unwrap();
1762
1763        // target calls many functions
1764        let mut code = String::from("fn target() {\n");
1765        for i in 0..10 {
1766            code.push_str(&format!("    callee_{:02}();\n", i));
1767        }
1768        code.push_str("}\n");
1769        for i in 0..10 {
1770            code.push_str(&format!("fn callee_{:02}() {{}}\n", i));
1771        }
1772        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1773
1774        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1775        let total_callees = output.outgoing_chains.len();
1776
1777        if total_callees > 3 {
1778            let paginated = paginate_slice(&output.outgoing_chains, 0, 3, PaginationMode::Callees)
1779                .expect("paginate failed");
1780
1781            let formatted = format_focused_paginated(
1782                &paginated.items,
1783                total_callees,
1784                PaginationMode::Callees,
1785                "target",
1786                &output.prod_chains,
1787                &output.test_chains,
1788                &output.outgoing_chains,
1789                output.def_count,
1790                0,
1791                Some(temp_dir.path()),
1792                true,
1793            );
1794
1795            assert!(
1796                formatted.contains(&format!(
1797                    "CALLEES (1-{} of {})",
1798                    paginated.items.len(),
1799                    total_callees
1800                )),
1801                "header should show callees range, got: {}",
1802                formatted
1803            );
1804        }
1805    }
1806
1807    #[test]
1808    fn test_symbol_focus_empty_prod_callers() {
1809        let temp_dir = TempDir::new().unwrap();
1810
1811        // target is only called from test functions
1812        let code = r#"
1813fn target() {}
1814
1815#[cfg(test)]
1816mod tests {
1817    use super::*;
1818    #[test]
1819    fn test_something() { target(); }
1820}
1821"#;
1822        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1823
1824        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1825
1826        // prod_chains may be empty; pagination should handle it gracefully
1827        let paginated = paginate_slice(&output.prod_chains, 0, 100, PaginationMode::Callers)
1828            .expect("paginate failed");
1829        assert_eq!(paginated.items.len(), output.prod_chains.len());
1830        assert!(
1831            paginated.next_cursor.is_none(),
1832            "no next_cursor for empty or single-page prod_chains"
1833        );
1834    }
1835
1836    #[test]
1837    fn test_impl_only_filter_header_correct_counts() {
1838        let temp_dir = TempDir::new().unwrap();
1839
1840        // Create a Rust fixture with:
1841        // - A trait definition
1842        // - An impl Trait for SomeType block that calls the focus symbol
1843        // - A regular (non-trait-impl) function that also calls the focus symbol
1844        let code = r#"
1845trait MyTrait {
1846    fn focus_symbol();
1847}
1848
1849struct SomeType;
1850
1851impl MyTrait for SomeType {
1852    fn focus_symbol() {}
1853}
1854
1855fn impl_caller() {
1856    SomeType::focus_symbol();
1857}
1858
1859fn regular_caller() {
1860    SomeType::focus_symbol();
1861}
1862"#;
1863        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1864
1865        // Call analyze_focused with impl_only=Some(true)
1866        let params = FocusedAnalysisConfig {
1867            focus: "focus_symbol".to_string(),
1868            match_mode: SymbolMatchMode::Insensitive,
1869            follow_depth: 1,
1870            max_depth: None,
1871            ast_recursion_limit: None,
1872            use_summary: false,
1873            impl_only: Some(true),
1874            def_use: false,
1875            parse_timeout_micros: None,
1876        };
1877        let output = analyze_focused_with_progress(
1878            temp_dir.path(),
1879            &params,
1880            Arc::new(AtomicUsize::new(0)),
1881            CancellationToken::new(),
1882        )
1883        .unwrap();
1884
1885        // Assert the result contains "FILTER: impl_only=true"
1886        assert!(
1887            output.formatted.contains("FILTER: impl_only=true"),
1888            "formatted output should contain FILTER header for impl_only=true, got: {}",
1889            output.formatted
1890        );
1891
1892        // Assert the retained count N < total count M
1893        assert!(
1894            output.impl_trait_caller_count < output.unfiltered_caller_count,
1895            "impl_trait_caller_count ({}) should be less than unfiltered_caller_count ({})",
1896            output.impl_trait_caller_count,
1897            output.unfiltered_caller_count
1898        );
1899
1900        // Assert format is "FILTER: impl_only=true (N of M callers shown)"
1901        let filter_line = output
1902            .formatted
1903            .lines()
1904            .find(|line| line.contains("FILTER: impl_only=true"))
1905            .expect("should find FILTER line");
1906        assert!(
1907            filter_line.contains(&format!(
1908                "({} of {} callers shown)",
1909                output.impl_trait_caller_count, output.unfiltered_caller_count
1910            )),
1911            "FILTER line should show correct N of M counts, got: {}",
1912            filter_line
1913        );
1914    }
1915
1916    #[test]
1917    fn test_callers_count_matches_formatted_output() {
1918        let temp_dir = TempDir::new().unwrap();
1919
1920        // Create a file with multiple callers of `target`
1921        let code = r#"
1922fn target() {}
1923fn caller_a() { target(); }
1924fn caller_b() { target(); }
1925fn caller_c() { target(); }
1926"#;
1927        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1928
1929        // Analyze the symbol
1930        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1931
1932        // Extract CALLERS count from formatted output
1933        let formatted = &output.formatted;
1934        let callers_count_from_output = formatted
1935            .lines()
1936            .find(|line| line.contains("FOCUS:"))
1937            .and_then(|line| {
1938                line.split(',')
1939                    .find(|part| part.contains("callers"))
1940                    .and_then(|part| {
1941                        part.trim()
1942                            .split_whitespace()
1943                            .next()
1944                            .and_then(|s| s.parse::<usize>().ok())
1945                    })
1946            })
1947            .expect("should find CALLERS count in formatted output");
1948
1949        // Compute expected count from prod_chains (unique first-caller names)
1950        let expected_callers_count = output
1951            .prod_chains
1952            .iter()
1953            .filter_map(|chain| chain.chain.first().map(|(name, _, _)| name))
1954            .collect::<std::collections::HashSet<_>>()
1955            .len();
1956
1957        assert_eq!(
1958            callers_count_from_output, expected_callers_count,
1959            "CALLERS count in formatted output should match unique-first-caller count in prod_chains"
1960        );
1961    }
1962
1963    #[cfg(feature = "lang-rust")]
1964    #[test]
1965    fn test_def_use_focused_analysis() {
1966        let temp_dir = TempDir::new().unwrap();
1967        fs::write(
1968            temp_dir.path().join("lib.rs"),
1969            "fn example() {\n    let x = 10;\n    x += 1;\n    println!(\"{}\", x);\n    let y = x + 1;\n}\n",
1970        )
1971        .unwrap();
1972
1973        let entries = walk_directory(temp_dir.path(), None).unwrap();
1974        let counter = Arc::new(AtomicUsize::new(0));
1975        let ct = CancellationToken::new();
1976        let params = FocusedAnalysisConfig {
1977            focus: "x".to_string(),
1978            match_mode: SymbolMatchMode::Exact,
1979            follow_depth: 1,
1980            max_depth: None,
1981            ast_recursion_limit: None,
1982            use_summary: false,
1983            impl_only: None,
1984            def_use: true,
1985            parse_timeout_micros: None,
1986        };
1987
1988        let output = analyze_focused_with_progress_with_entries(
1989            temp_dir.path(),
1990            &params,
1991            &counter,
1992            &ct,
1993            &entries,
1994        )
1995        .expect("def_use analysis should succeed");
1996
1997        assert!(
1998            !output.def_use_sites.is_empty(),
1999            "should find def-use sites for x"
2000        );
2001        assert!(
2002            output
2003                .def_use_sites
2004                .iter()
2005                .any(|s| s.kind == crate::types::DefUseKind::Write),
2006            "should have at least one Write site",
2007        );
2008        // No location appears as both write and read
2009        let write_locs: std::collections::HashSet<_> = output
2010            .def_use_sites
2011            .iter()
2012            .filter(|s| {
2013                matches!(
2014                    s.kind,
2015                    crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
2016                )
2017            })
2018            .map(|s| (&s.file, s.line, s.column))
2019            .collect();
2020        assert!(
2021            output
2022                .def_use_sites
2023                .iter()
2024                .filter(|s| s.kind == crate::types::DefUseKind::Read)
2025                .all(|s| !write_locs.contains(&(&s.file, s.line, s.column))),
2026            "no location should appear as both write and read",
2027        );
2028        assert!(
2029            output.formatted.contains("DEF-USE SITES"),
2030            "formatted output should contain DEF-USE SITES"
2031        );
2032    }
2033
2034    fn make_temp_file(content: &str) -> tempfile::NamedTempFile {
2035        let mut f = tempfile::NamedTempFile::new().unwrap();
2036        use std::io::Write;
2037        f.write_all(content.as_bytes()).unwrap();
2038        f.flush().unwrap();
2039        f
2040    }
2041}