Skip to main content

aptu_coder_core/
analyze.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Main analysis engine for extracting code structure from files and directories.
4//!
5//! Implements the four MCP tools: `analyze_directory` (Overview), `analyze_file` (`FileDetails`),
6//! `analyze_symbol` (call graph), and `analyze_module` (lightweight index). Handles parallel processing and cancellation.
7
8use crate::formatter::{
9    format_file_details, format_focused_internal, format_focused_summary_internal, format_structure,
10};
11use crate::graph::{CallGraph, InternalCallChain};
12use crate::lang::{language_for_extension, supported_languages};
13use crate::parser::{ElementExtractor, SemanticExtractor};
14use crate::test_detection::is_test_file;
15use crate::traversal::{WalkEntry, walk_directory};
16use crate::types::{
17    AnalysisMode, FileInfo, ImplTraitInfo, ImportInfo, SemanticAnalysis, SymbolMatchMode,
18};
19use rayon::prelude::*;
20#[cfg(feature = "schemars")]
21use schemars::JsonSchema;
22use serde::{Deserialize, Serialize};
23use std::path::{Path, PathBuf};
24use std::sync::Arc;
25use std::sync::atomic::{AtomicUsize, Ordering};
26use std::time::Instant;
27use thiserror::Error;
28use tokio_util::sync::CancellationToken;
29use tracing::instrument;
30
31pub const MAX_FILE_SIZE_BYTES: u64 = 10_000_000;
32
33#[derive(Debug, Error)]
34#[non_exhaustive]
35pub enum AnalyzeError {
36    #[error("Traversal error: {0}")]
37    Traversal(#[from] crate::traversal::TraversalError),
38    #[error("Parser error: {0}")]
39    Parser(#[from] crate::parser::ParserError),
40    #[error("Graph error: {0}")]
41    Graph(#[from] crate::graph::GraphError),
42    #[error("Formatter error: {0}")]
43    Formatter(#[from] crate::formatter::FormatterError),
44    #[error("Analysis cancelled")]
45    Cancelled,
46    #[error("unsupported language: {0}")]
47    UnsupportedLanguage(String),
48    #[error("I/O error: {0}")]
49    Io(#[from] std::io::Error),
50    #[error("invalid range: start ({start}) > end ({end}); file has {total} lines")]
51    InvalidRange {
52        start: usize,
53        end: usize,
54        total: usize,
55    },
56    #[error("path is a directory, not a file: {0}")]
57    NotAFile(PathBuf),
58    #[error(
59        "file has {total_lines} lines; provide start_line and end_line, or call analyze_module first to locate the range"
60    )]
61    RangelessLargeFile { total_lines: usize },
62    #[error("parse timeout exceeded for {path}: {micros} microseconds")]
63    ParseTimeout { path: PathBuf, micros: u64 },
64}
65
66/// Result of directory analysis containing both formatted output and file data.
67#[derive(Debug, Clone, Serialize, Deserialize)]
68#[cfg_attr(feature = "schemars", derive(JsonSchema))]
69#[non_exhaustive]
70pub struct AnalysisOutput {
71    #[cfg_attr(
72        feature = "schemars",
73        schemars(description = "Formatted text representation of the analysis")
74    )]
75    pub formatted: String,
76    #[cfg_attr(
77        feature = "schemars",
78        schemars(description = "List of files analyzed in the directory")
79    )]
80    pub files: Vec<FileInfo>,
81    /// Walk entries used internally for summary generation; not serialized.
82    #[serde(skip)]
83    #[serde(default)]
84    #[cfg_attr(feature = "schemars", schemars(skip))]
85    pub entries: Vec<WalkEntry>,
86    /// Subtree file counts computed from an unbounded walk; used by `format_summary`; not serialized.
87    #[serde(skip)]
88    #[serde(default)]
89    #[cfg_attr(feature = "schemars", schemars(skip))]
90    pub subtree_counts: Option<Vec<(std::path::PathBuf, usize)>>,
91    #[serde(skip_serializing_if = "Option::is_none")]
92    #[cfg_attr(
93        feature = "schemars",
94        schemars(
95            description = "Opaque cursor token for the next page of results (absent when no more results)"
96        )
97    )]
98    pub next_cursor: Option<String>,
99}
100
101/// Result of file-level semantic analysis.
102#[derive(Debug, Clone, Serialize, Deserialize)]
103#[cfg_attr(feature = "schemars", derive(JsonSchema))]
104#[non_exhaustive]
105pub struct FileAnalysisOutput {
106    #[cfg_attr(
107        feature = "schemars",
108        schemars(description = "Formatted text representation of the analysis")
109    )]
110    pub formatted: String,
111    #[cfg_attr(
112        feature = "schemars",
113        schemars(description = "Semantic analysis data including functions, classes, and imports")
114    )]
115    pub semantic: SemanticAnalysis,
116    #[cfg_attr(
117        feature = "schemars",
118        schemars(description = "Total line count of the analyzed file")
119    )]
120    #[cfg_attr(
121        feature = "schemars",
122        schemars(schema_with = "crate::schema_helpers::integer_schema")
123    )]
124    pub line_count: usize,
125    #[serde(skip_serializing_if = "Option::is_none")]
126    #[cfg_attr(
127        feature = "schemars",
128        schemars(
129            description = "Opaque cursor token for the next page of results (absent when no more results)"
130        )
131    )]
132    pub next_cursor: Option<String>,
133}
134
135impl FileAnalysisOutput {
136    /// Create a new `FileAnalysisOutput`.
137    #[must_use]
138    pub fn new(
139        formatted: String,
140        semantic: SemanticAnalysis,
141        line_count: usize,
142        next_cursor: Option<String>,
143    ) -> Self {
144        Self {
145            formatted,
146            semantic,
147            line_count,
148            next_cursor,
149        }
150    }
151}
152/// Check if a file is eligible for analysis based on size and language support.
153fn check_file_eligibility(entry: &WalkEntry) -> bool {
154    // Check file size before reading
155    if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
156        tracing::debug!("skipping large file: {}", entry.path.display());
157        return false;
158    }
159
160    // Try to read file content; skip binary or unreadable files
161    std::fs::read_to_string(&entry.path).is_ok()
162}
163
164/// Process a single file entry and extract its analysis data.
165fn process_file_entry(entry: &WalkEntry, source: &str) -> FileInfo {
166    let path_str = entry.path.display().to_string();
167    let line_count = source.lines().count();
168
169    // Detect language from extension
170    let ext = entry.path.extension().and_then(|e| e.to_str());
171
172    // Detect language and extract counts
173    let (language, function_count, class_count) = if let Some(ext_str) = ext
174        && let Some(lang) = language_for_extension(ext_str)
175    {
176        let lang_str = lang.to_string();
177        match ElementExtractor::extract_with_depth(source, &lang_str) {
178            Ok((func_count, class_count)) => (lang_str, func_count, class_count),
179            Err(_) => (lang_str, 0, 0),
180        }
181    } else {
182        ("unknown".to_string(), 0, 0)
183    };
184
185    let is_test = is_test_file(&entry.path);
186
187    FileInfo {
188        path: path_str,
189        line_count,
190        function_count,
191        class_count,
192        language,
193        is_test,
194    }
195}
196
197/// Analyze a single file entry in parallel context.
198fn analyze_single_file(
199    entry: &WalkEntry,
200    progress: &Arc<AtomicUsize>,
201    ct: &CancellationToken,
202) -> Option<FileInfo> {
203    // Check cancellation per file
204    if ct.is_cancelled() {
205        return None;
206    }
207
208    // Check file eligibility
209    if !check_file_eligibility(entry) {
210        progress.fetch_add(1, Ordering::Relaxed);
211        return None;
212    }
213
214    // Read file content (already checked in check_file_eligibility)
215    let Ok(source) = std::fs::read_to_string(&entry.path) else {
216        progress.fetch_add(1, Ordering::Relaxed);
217        return None;
218    };
219
220    let file_info = process_file_entry(entry, &source);
221    progress.fetch_add(1, Ordering::Relaxed);
222
223    Some(file_info)
224}
225
226/// Initialize analysis context and collect file entries.
227fn init_analysis_context(entries: &[WalkEntry]) -> Vec<&WalkEntry> {
228    entries
229        .iter()
230        .filter(|e| !e.is_dir && !e.is_symlink)
231        .collect()
232}
233
234/// Build the final analysis output from results.
235fn build_analysis_output(
236    entries: Vec<WalkEntry>,
237    analysis_results: Vec<FileInfo>,
238) -> AnalysisOutput {
239    let formatted = format_structure(&entries, &analysis_results, None);
240    AnalysisOutput {
241        formatted,
242        files: analysis_results,
243        entries,
244        next_cursor: None,
245        subtree_counts: None,
246    }
247}
248
249/// Run parallel analysis on file entries and log completion.
250fn run_parallel_analysis(
251    file_entries: &[&WalkEntry],
252    progress: &Arc<AtomicUsize>,
253    ct: &CancellationToken,
254) -> Result<Vec<FileInfo>, AnalyzeError> {
255    let start = Instant::now();
256    tracing::debug!(file_count = file_entries.len(), "analysis start");
257
258    let _parse_span = tracing::info_span!("ast.parse_batch", count = file_entries.len()).entered();
259
260    // Parallel analysis of files
261    let analysis_results: Vec<FileInfo> = file_entries
262        .par_iter()
263        .filter_map(|entry| analyze_single_file(entry, progress, ct))
264        .collect();
265
266    // Check if cancelled after parallel processing
267    if ct.is_cancelled() {
268        return Err(AnalyzeError::Cancelled);
269    }
270
271    tracing::debug!(
272        file_count = file_entries.len(),
273        duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
274        "analysis complete"
275    );
276
277    Ok(analysis_results)
278}
279
280#[instrument(skip_all, fields(path = %root.display()))]
281// public API; callers expect owned semantics
282#[allow(clippy::needless_pass_by_value)]
283pub fn analyze_directory_with_progress(
284    root: &Path,
285    entries: Vec<WalkEntry>,
286    progress: Arc<AtomicUsize>,
287    ct: CancellationToken,
288) -> Result<AnalysisOutput, AnalyzeError> {
289    // Check if already cancelled
290    if ct.is_cancelled() {
291        return Err(AnalyzeError::Cancelled);
292    }
293
294    tracing::debug!(root = %root.display(), "analysis start");
295
296    let file_entries = init_analysis_context(&entries);
297    let analysis_results = run_parallel_analysis(&file_entries, &progress, &ct)?;
298
299    let _format_span = tracing::info_span!("output.format").entered();
300
301    // Build and return output
302    Ok(build_analysis_output(entries, analysis_results))
303}
304
305/// Analyze a directory structure and return formatted output and file data.
306#[instrument(skip_all, fields(path = %root.display()))]
307pub fn analyze_directory(
308    root: &Path,
309    max_depth: Option<u32>,
310) -> Result<AnalysisOutput, AnalyzeError> {
311    let entries = walk_directory(root, max_depth)?;
312    let counter = Arc::new(AtomicUsize::new(0));
313    let ct = CancellationToken::new();
314    analyze_directory_with_progress(root, entries, counter, ct)
315}
316
317/// Determine analysis mode based on parameters and path.
318#[must_use]
319pub fn determine_mode(path: &str, focus: Option<&str>) -> AnalysisMode {
320    if focus.is_some() {
321        return AnalysisMode::SymbolFocus;
322    }
323
324    let path_obj = Path::new(path);
325    if path_obj.is_dir() {
326        AnalysisMode::Overview
327    } else {
328        AnalysisMode::FileDetails
329    }
330}
331
332/// Analyze a single file and return semantic analysis with formatted output.
333#[instrument(skip_all, fields(path))]
334pub fn analyze_file(
335    path: &str,
336    ast_recursion_limit: Option<usize>,
337) -> Result<FileAnalysisOutput, AnalyzeError> {
338    let start = Instant::now();
339
340    // Check file size before reading
341    if Path::new(path).metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
342        tracing::debug!("skipping large file: {}", path);
343        return Err(AnalyzeError::Parser(
344            crate::parser::ParserError::ParseError("file too large".to_string()),
345        ));
346    }
347
348    let source = std::fs::read_to_string(path)
349        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
350
351    let line_count = source.lines().count();
352
353    // Detect language from extension
354    let ext = Path::new(path)
355        .extension()
356        .and_then(|e| e.to_str())
357        .and_then(language_for_extension)
358        .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string);
359
360    // Extract semantic information
361    let mut semantic = SemanticExtractor::extract(&source, &ext, ast_recursion_limit, None)?;
362
363    // Populate the file path on references now that the path is known
364    for r in &mut semantic.references {
365        r.location = path.to_string();
366    }
367
368    // Resolve Python wildcard imports
369    if ext == "python" {
370        resolve_wildcard_imports(Path::new(path), &mut semantic.imports);
371    }
372
373    // Detect if this is a test file
374    let is_test = is_test_file(Path::new(path));
375
376    // Extract parent directory for relative path display
377    let parent_dir = Path::new(path).parent();
378
379    // Format output
380    let formatted = format_file_details(path, &semantic, line_count, is_test, parent_dir);
381
382    tracing::debug!(path = %path, language = %ext, functions = semantic.functions.len(), classes = semantic.classes.len(), imports = semantic.imports.len(), duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX), "file analysis complete");
383
384    Ok(FileAnalysisOutput::new(
385        formatted, semantic, line_count, None,
386    ))
387}
388
389/// Analyze source code from a string buffer without filesystem access.
390///
391/// This function analyzes in-memory source code by language identifier. The `language`
392/// parameter can be either a language name (e.g., `"rust"`, `"python"`, `"go"`) or a file
393/// extension (e.g., `"rs"`, `"py"`).
394///
395/// Accepted language identifiers depend on compiled features. Use [`supported_languages()`] to
396/// discover the available language names at runtime, and [`language_for_extension()`] to resolve
397/// a file extension to its supported language identifier.
398///
399/// # Arguments
400///
401/// * `source` - The source code to analyze
402/// * `language` - The language identifier (language name or extension)
403/// * `ast_recursion_limit` - Optional limit for AST traversal depth
404///
405/// # Returns
406///
407/// - `Ok(FileAnalysisOutput)` on success
408/// - `Err(AnalyzeError::UnsupportedLanguage)` if the language is not recognized
409/// - `Err(AnalyzeError::Parser)` if parsing fails
410///
411/// # Notes
412///
413/// - Python wildcard import resolution is skipped for in-memory analysis (no filesystem path available)
414/// - The formatted output uses the standard file-details formatter, so it includes a `FILE:` header with an empty path
415#[inline]
416pub fn analyze_str(
417    source: &str,
418    language: &str,
419    ast_recursion_limit: Option<usize>,
420) -> Result<FileAnalysisOutput, AnalyzeError> {
421    // Resolve language: first try as a file extension, then as a language name
422    // (case-insensitive match against supported_languages()).
423    let lang = language_for_extension(language).or_else(|| {
424        let lower = language.to_ascii_lowercase();
425        supported_languages()
426            .iter()
427            .find(|&&name| name == lower)
428            .copied()
429    });
430    let lang = lang.ok_or_else(|| AnalyzeError::UnsupportedLanguage(language.to_string()))?;
431
432    // Extract semantic information
433    let mut semantic = SemanticExtractor::extract(source, lang, ast_recursion_limit, None)?;
434
435    // Populate a stable in-memory sentinel on all reference locations
436    for r in &mut semantic.references {
437        r.location = "<memory>".to_string();
438    }
439
440    // Count lines in the source
441    let line_count = source.lines().count();
442
443    // Format output with empty path (no filesystem access)
444    let formatted = format_file_details("", &semantic, line_count, false, None);
445
446    Ok(FileAnalysisOutput::new(
447        formatted, semantic, line_count, None,
448    ))
449}
450
451/// Single entry in a call chain (depth-1 direct caller or callee).
452#[non_exhaustive]
453#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
454#[cfg_attr(feature = "schemars", derive(JsonSchema))]
455pub struct CallChainEntry {
456    #[cfg_attr(
457        feature = "schemars",
458        schemars(description = "Symbol name of the caller or callee")
459    )]
460    pub symbol: String,
461    #[cfg_attr(
462        feature = "schemars",
463        schemars(description = "File path relative to the repository root")
464    )]
465    pub file: String,
466    #[cfg_attr(
467        feature = "schemars",
468        schemars(
469            description = "Line number of the definition or call site (1-indexed)",
470            schema_with = "crate::schema_helpers::integer_schema"
471        )
472    )]
473    pub line: usize,
474}
475
476/// Result of focused symbol analysis.
477#[derive(Debug, Serialize, Deserialize)]
478#[cfg_attr(feature = "schemars", derive(JsonSchema))]
479#[non_exhaustive]
480pub struct FocusedAnalysisOutput {
481    #[cfg_attr(
482        feature = "schemars",
483        schemars(description = "Formatted text representation of the call graph analysis")
484    )]
485    pub formatted: String,
486    #[serde(skip_serializing_if = "Option::is_none")]
487    #[cfg_attr(
488        feature = "schemars",
489        schemars(
490            description = "Opaque cursor token for the next page of results (absent when no more results)"
491        )
492    )]
493    pub next_cursor: Option<String>,
494    /// Production caller chains (partitioned from incoming chains, excluding test callers).
495    /// Not serialized; used for pagination in lib.rs.
496    #[serde(skip)]
497    #[serde(default)]
498    #[cfg_attr(feature = "schemars", schemars(skip))]
499    pub prod_chains: Vec<InternalCallChain>,
500    /// Test caller chains. Not serialized; used for pagination summary in lib.rs.
501    #[serde(skip)]
502    #[serde(default)]
503    #[cfg_attr(feature = "schemars", schemars(skip))]
504    pub test_chains: Vec<InternalCallChain>,
505    /// Outgoing (callee) chains. Not serialized; used for pagination in lib.rs.
506    #[serde(skip)]
507    #[serde(default)]
508    #[cfg_attr(feature = "schemars", schemars(skip))]
509    pub outgoing_chains: Vec<InternalCallChain>,
510    /// Number of definitions for the symbol. Not serialized; used for pagination headers.
511    #[serde(skip)]
512    #[serde(default)]
513    #[cfg_attr(feature = "schemars", schemars(skip))]
514    pub def_count: usize,
515    /// Total unique callers before `impl_only` filter. Not serialized; used for FILTER header.
516    #[serde(skip)]
517    #[serde(default)]
518    #[cfg_attr(feature = "schemars", schemars(skip))]
519    pub unfiltered_caller_count: usize,
520    /// Unique callers after `impl_only` filter. Not serialized; used for FILTER header.
521    #[serde(skip)]
522    #[serde(default)]
523    #[cfg_attr(feature = "schemars", schemars(skip))]
524    pub impl_trait_caller_count: usize,
525    /// Direct (depth-1) production callers. `follow_depth` does not affect this field.
526    #[serde(skip_serializing_if = "Option::is_none")]
527    pub callers: Option<Vec<CallChainEntry>>,
528    /// Direct (depth-1) test callers. `follow_depth` does not affect this field.
529    #[serde(skip_serializing_if = "Option::is_none")]
530    pub test_callers: Option<Vec<CallChainEntry>>,
531    /// Direct (depth-1) callees. `follow_depth` does not affect this field.
532    #[serde(skip_serializing_if = "Option::is_none")]
533    pub callees: Option<Vec<CallChainEntry>>,
534    /// Definition and use sites for the symbol.
535    #[serde(default)]
536    pub def_use_sites: Vec<crate::types::DefUseSite>,
537}
538
539/// Parameters for focused symbol analysis. Groups high-arity parameters to keep
540/// function signatures under clippy's default 7-argument threshold.
541#[derive(Clone)]
542pub struct FocusedAnalysisConfig {
543    pub focus: String,
544    pub match_mode: SymbolMatchMode,
545    pub follow_depth: u32,
546    pub max_depth: Option<u32>,
547    pub ast_recursion_limit: Option<usize>,
548    pub use_summary: bool,
549    pub impl_only: Option<bool>,
550    pub def_use: bool,
551    pub parse_timeout_micros: Option<u64>,
552}
553
554/// Internal parameters for focused analysis phases.
555#[derive(Clone)]
556struct InternalFocusedParams {
557    focus: String,
558    match_mode: SymbolMatchMode,
559    follow_depth: u32,
560    ast_recursion_limit: Option<usize>,
561    use_summary: bool,
562    impl_only: Option<bool>,
563    def_use: bool,
564    parse_timeout_micros: Option<u64>,
565}
566
567/// Type alias for analysis results: (`file_path`, `semantic_analysis`) pairs and impl-trait info.
568type FileAnalysisBatch = (Vec<(PathBuf, SemanticAnalysis)>, Vec<ImplTraitInfo>);
569
570/// Phase 1: Collect semantic analysis for all files in parallel.
571fn collect_file_analysis(
572    entries: &[WalkEntry],
573    progress: &Arc<AtomicUsize>,
574    ct: &CancellationToken,
575    ast_recursion_limit: Option<usize>,
576    parse_timeout_micros: Option<u64>,
577) -> Result<FileAnalysisBatch, AnalyzeError> {
578    // Check if already cancelled
579    if ct.is_cancelled() {
580        return Err(AnalyzeError::Cancelled);
581    }
582
583    // Use pre-walked entries (passed by caller)
584    // Collect semantic analysis for all files in parallel
585    let file_entries: Vec<&WalkEntry> = entries
586        .iter()
587        .filter(|e| !e.is_dir && !e.is_symlink)
588        .collect();
589
590    // Collect per-file timeout events so they can be surfaced as AnalyzeError::ParseTimeout.
591    let timed_out: std::sync::Mutex<Vec<(PathBuf, u64)>> = std::sync::Mutex::new(Vec::new());
592
593    let analysis_results: Vec<(PathBuf, SemanticAnalysis)> = file_entries
594        .par_iter()
595        .filter_map(|entry| {
596            // Check cancellation per file
597            if ct.is_cancelled() {
598                return None;
599            }
600
601            let ext = entry.path.extension().and_then(|e| e.to_str());
602
603            // Check file size before reading
604            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
605                tracing::debug!("skipping large file: {}", entry.path.display());
606                progress.fetch_add(1, Ordering::Relaxed);
607                return None;
608            }
609
610            // Try to read file content
611            let Ok(source) = std::fs::read_to_string(&entry.path) else {
612                progress.fetch_add(1, Ordering::Relaxed);
613                return None;
614            };
615
616            // Detect language and extract semantic information
617            let language = if let Some(ext_str) = ext {
618                language_for_extension(ext_str)
619                    .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string)
620            } else {
621                "unknown".to_string()
622            };
623
624            match SemanticExtractor::extract(
625                &source,
626                &language,
627                ast_recursion_limit,
628                parse_timeout_micros,
629            ) {
630                Ok(mut semantic) => {
631                    // Populate file path on references
632                    for r in &mut semantic.references {
633                        r.location = entry.path.display().to_string();
634                    }
635                    // Populate file path on impl_traits (already extracted during SemanticExtractor::extract)
636                    for trait_info in &mut semantic.impl_traits {
637                        trait_info.path.clone_from(&entry.path);
638                    }
639                    progress.fetch_add(1, Ordering::Relaxed);
640                    Some((entry.path.clone(), semantic))
641                }
642                Err(crate::parser::ParserError::Timeout(micros)) => {
643                    tracing::warn!(
644                        "parse timeout exceeded for {}: {} microseconds",
645                        entry.path.display(),
646                        micros
647                    );
648                    if let Ok(mut v) = timed_out.lock() {
649                        v.push((entry.path.clone(), micros));
650                    }
651                    progress.fetch_add(1, Ordering::Relaxed);
652                    None
653                }
654                Err(_) => {
655                    progress.fetch_add(1, Ordering::Relaxed);
656                    None
657                }
658            }
659        })
660        .collect();
661
662    // Check if cancelled after parallel processing
663    if ct.is_cancelled() {
664        return Err(AnalyzeError::Cancelled);
665    }
666
667    // Surface the first timeout as AnalyzeError::ParseTimeout so callers can detect it.
668    if let Ok(mut v) = timed_out.lock()
669        && let Some((path, micros)) = v.drain(..).next()
670    {
671        return Err(AnalyzeError::ParseTimeout { path, micros });
672    }
673
674    // Collect all impl-trait info from analysis results
675    let all_impl_traits: Vec<ImplTraitInfo> = analysis_results
676        .iter()
677        .flat_map(|(_, sem)| sem.impl_traits.iter().cloned())
678        .collect();
679
680    Ok((analysis_results, all_impl_traits))
681}
682
683/// Phase 2: Build call graph from analysis results.
684fn build_call_graph(
685    analysis_results: Vec<(PathBuf, SemanticAnalysis)>,
686    all_impl_traits: &[ImplTraitInfo],
687) -> Result<CallGraph, AnalyzeError> {
688    // Build call graph. Always build without impl_only filter first so we can
689    // record the unfiltered caller count before discarding those edges.
690    CallGraph::build_from_results(
691        analysis_results,
692        all_impl_traits,
693        false, // filter applied below after counting
694    )
695    .map_err(std::convert::Into::into)
696}
697
698/// Phase 3: Resolve symbol and apply `impl_only` filter.
699/// Returns (`resolved_focus`, `unfiltered_caller_count`, `impl_trait_caller_count`).
700/// CRITICAL: Must capture `unfiltered_caller_count` BEFORE `retain()`, then apply `retain()`,
701/// then compute `impl_trait_caller_count`.
702fn resolve_symbol(
703    graph: &mut CallGraph,
704    params: &InternalFocusedParams,
705) -> Result<(String, usize, usize), AnalyzeError> {
706    // Resolve symbol name using the requested match mode.
707    let resolved_focus = if params.match_mode == SymbolMatchMode::Exact {
708        let exists = graph.definitions.contains_key(&params.focus)
709            || graph.callers.contains_key(&params.focus)
710            || graph.callees.contains_key(&params.focus);
711        if exists {
712            params.focus.clone()
713        } else {
714            return Err(crate::graph::GraphError::SymbolNotFound {
715                symbol: params.focus.clone(),
716                hint: "Try match_mode=insensitive for a case-insensitive search, or match_mode=prefix to list symbols starting with this name.".to_string(),
717            }
718            .into());
719        }
720    } else {
721        graph.resolve_symbol_indexed(&params.focus, &params.match_mode)?
722    };
723
724    // Count unique callers for the focus symbol before applying impl_only filter.
725    let unfiltered_caller_count = graph.callers.get(&resolved_focus).map_or(0, |edges| {
726        edges
727            .iter()
728            .map(|e| &e.neighbor_name)
729            .collect::<std::collections::HashSet<_>>()
730            .len()
731    });
732
733    // Apply impl_only filter now if requested, then count filtered callers.
734    // Filter all caller adjacency lists so traversal and formatting are consistently
735    // restricted to impl-trait edges regardless of follow_depth.
736    let impl_trait_caller_count = if params.impl_only.unwrap_or(false) {
737        for edges in graph.callers.values_mut() {
738            edges.retain(|e| e.is_impl_trait);
739        }
740        graph.callers.get(&resolved_focus).map_or(0, |edges| {
741            edges
742                .iter()
743                .map(|e| &e.neighbor_name)
744                .collect::<std::collections::HashSet<_>>()
745                .len()
746        })
747    } else {
748        unfiltered_caller_count
749    };
750
751    Ok((
752        resolved_focus,
753        unfiltered_caller_count,
754        impl_trait_caller_count,
755    ))
756}
757
758/// Type alias for `compute_chains` return type: (`formatted_output`, `prod_chains`, `test_chains`, `outgoing_chains`, `def_count`).
759type ChainComputeResult = (
760    String,
761    Vec<InternalCallChain>,
762    Vec<InternalCallChain>,
763    Vec<InternalCallChain>,
764    usize,
765);
766
767/// Helper function to convert InternalCallChain data to CallChainEntry vec.
768/// Takes the first (depth-1) element of each chain and converts it to a CallChainEntry.
769/// Returns None if chains is empty, otherwise returns a vec of up to 10 entries.
770fn chains_to_entries(
771    chains: &[InternalCallChain],
772    root: Option<&std::path::Path>,
773) -> Option<Vec<CallChainEntry>> {
774    if chains.is_empty() {
775        return None;
776    }
777    let entries: Vec<CallChainEntry> = chains
778        .iter()
779        .take(10)
780        .filter_map(|chain| {
781            let (symbol, path, line) = chain.chain.first()?;
782            let file = match root {
783                Some(root) => path
784                    .strip_prefix(root)
785                    .unwrap_or(path.as_path())
786                    .to_string_lossy()
787                    .into_owned(),
788                None => path.to_string_lossy().into_owned(),
789            };
790            Some(CallChainEntry {
791                symbol: symbol.clone(),
792                file,
793                line: *line,
794            })
795        })
796        .collect();
797    if entries.is_empty() {
798        None
799    } else {
800        Some(entries)
801    }
802}
803
804/// Phase 4: Compute chains and format output.
805fn compute_chains(
806    graph: &CallGraph,
807    resolved_focus: &str,
808    root: &Path,
809    params: &InternalFocusedParams,
810    unfiltered_caller_count: usize,
811    impl_trait_caller_count: usize,
812    def_use_sites: &[crate::types::DefUseSite],
813) -> Result<ChainComputeResult, AnalyzeError> {
814    // Compute chain data for pagination (always, regardless of summary mode)
815    let def_count = graph.definitions.get(resolved_focus).map_or(0, Vec::len);
816    let incoming_chains = graph.find_incoming_chains(resolved_focus, params.follow_depth)?;
817    let outgoing_chains = graph.find_outgoing_chains(resolved_focus, params.follow_depth)?;
818
819    let (prod_chains, test_chains): (Vec<_>, Vec<_>) =
820        incoming_chains.iter().cloned().partition(|chain| {
821            chain
822                .chain
823                .first()
824                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
825        });
826
827    // Format output with pre-computed chains
828    let mut formatted = if params.use_summary {
829        format_focused_summary_internal(
830            graph,
831            resolved_focus,
832            params.follow_depth,
833            Some(root),
834            Some(&incoming_chains),
835            Some(&outgoing_chains),
836            def_use_sites,
837        )?
838    } else {
839        format_focused_internal(
840            graph,
841            resolved_focus,
842            params.follow_depth,
843            Some(root),
844            Some(&incoming_chains),
845            Some(&outgoing_chains),
846            def_use_sites,
847        )?
848    };
849
850    // Add FILTER header if impl_only filter was applied
851    if params.impl_only.unwrap_or(false) {
852        let filter_header = format!(
853            "FILTER: impl_only=true ({impl_trait_caller_count} of {unfiltered_caller_count} callers shown)\n",
854        );
855        formatted = format!("{filter_header}{formatted}");
856    }
857
858    Ok((
859        formatted,
860        prod_chains,
861        test_chains,
862        outgoing_chains,
863        def_count,
864    ))
865}
866
867/// Analyze a symbol's call graph across a directory with progress tracking.
868// public API; callers expect owned semantics
869#[allow(clippy::needless_pass_by_value)]
870pub fn analyze_focused_with_progress(
871    root: &Path,
872    params: &FocusedAnalysisConfig,
873    progress: Arc<AtomicUsize>,
874    ct: CancellationToken,
875) -> Result<FocusedAnalysisOutput, AnalyzeError> {
876    let entries = walk_directory(root, params.max_depth)?;
877    let internal_params = InternalFocusedParams {
878        focus: params.focus.clone(),
879        match_mode: params.match_mode.clone(),
880        follow_depth: params.follow_depth,
881        ast_recursion_limit: params.ast_recursion_limit,
882        use_summary: params.use_summary,
883        impl_only: params.impl_only,
884        def_use: params.def_use,
885        parse_timeout_micros: params.parse_timeout_micros,
886    };
887    analyze_focused_with_progress_with_entries_internal(
888        root,
889        params.max_depth,
890        &progress,
891        &ct,
892        &internal_params,
893        &entries,
894    )
895}
896
897/// Internal implementation of focused analysis using pre-walked entries and params struct.
898#[instrument(skip_all, fields(path = %root.display(), symbol = %params.focus))]
899fn analyze_focused_with_progress_with_entries_internal(
900    root: &Path,
901    _max_depth: Option<u32>,
902    progress: &Arc<AtomicUsize>,
903    ct: &CancellationToken,
904    params: &InternalFocusedParams,
905    entries: &[WalkEntry],
906) -> Result<FocusedAnalysisOutput, AnalyzeError> {
907    // Check if already cancelled
908    if ct.is_cancelled() {
909        return Err(AnalyzeError::Cancelled);
910    }
911
912    // Check if path is a file (hint to use directory)
913    if root.is_file() {
914        let formatted =
915            "Single-file focus not supported. Please provide a directory path for cross-file call graph analysis.\n"
916                .to_string();
917        return Ok(FocusedAnalysisOutput {
918            formatted,
919            next_cursor: None,
920            prod_chains: vec![],
921            test_chains: vec![],
922            outgoing_chains: vec![],
923            def_count: 0,
924            unfiltered_caller_count: 0,
925            impl_trait_caller_count: 0,
926            callers: None,
927            test_callers: None,
928            callees: None,
929            def_use_sites: vec![],
930        });
931    }
932
933    // Phase 1: Collect file analysis
934    let (analysis_results, all_impl_traits) = collect_file_analysis(
935        entries,
936        progress,
937        ct,
938        params.ast_recursion_limit,
939        params.parse_timeout_micros,
940    )?;
941
942    // Check for cancellation before building the call graph (phase 2)
943    if ct.is_cancelled() {
944        return Err(AnalyzeError::Cancelled);
945    }
946
947    // Phase 2: Build call graph
948    let mut graph = build_call_graph(analysis_results, &all_impl_traits)?;
949
950    // Check for cancellation before resolving the symbol (phase 3)
951    if ct.is_cancelled() {
952        return Err(AnalyzeError::Cancelled);
953    }
954
955    // Phase 3: Resolve symbol and apply impl_only filter.
956    // When def_use=true and the symbol is not in the call graph (e.g. a variable),
957    // fall through to def-use extraction instead of returning SymbolNotFound.
958    let resolve_result = resolve_symbol(&mut graph, params);
959    if let Err(AnalyzeError::Graph(crate::graph::GraphError::SymbolNotFound { .. })) =
960        &resolve_result
961    {
962        // Deliberately not collapsed: resolve_result must stay alive past this block
963        // so that the `?` below can propagate non-SymbolNotFound errors.
964        if params.def_use {
965            let def_use_sites =
966                collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct);
967            if def_use_sites.is_empty() {
968                // Symbol not found anywhere (neither in call graph nor as def/use site).
969                // Propagate the original SymbolNotFound error instead of returning an
970                // empty success response.
971                if let Err(e) = resolve_result {
972                    return Err(e);
973                }
974                unreachable!("resolve_result is Ok only when symbol was found");
975            }
976            use std::fmt::Write as _;
977            let mut formatted = String::new();
978            let _ = writeln!(
979                formatted,
980                "FOCUS: {} (0 defs, 0 callers, 0 callees)",
981                params.focus
982            );
983            {
984                let writes = def_use_sites
985                    .iter()
986                    .filter(|s| {
987                        matches!(
988                            s.kind,
989                            crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
990                        )
991                    })
992                    .count();
993                let reads = def_use_sites
994                    .iter()
995                    .filter(|s| s.kind == crate::types::DefUseKind::Read)
996                    .count();
997                let _ = writeln!(
998                    formatted,
999                    "DEF-USE SITES  {}  ({} total: {} writes, {} reads)",
1000                    params.focus,
1001                    def_use_sites.len(),
1002                    writes,
1003                    reads
1004                );
1005            }
1006            return Ok(FocusedAnalysisOutput {
1007                formatted,
1008                next_cursor: None,
1009                callers: None,
1010                test_callers: None,
1011                callees: None,
1012                prod_chains: vec![],
1013                test_chains: vec![],
1014                outgoing_chains: vec![],
1015                def_count: 0,
1016                unfiltered_caller_count: 0,
1017                impl_trait_caller_count: 0,
1018                def_use_sites,
1019            });
1020        }
1021    }
1022    let (resolved_focus, unfiltered_caller_count, impl_trait_caller_count) = resolve_result?;
1023
1024    // Check for cancellation before computing chains (phase 4)
1025    if ct.is_cancelled() {
1026        return Err(AnalyzeError::Cancelled);
1027    }
1028
1029    // Phase 5 (optional, before formatting): Def-use site extraction.
1030    // Use params.focus (the raw user-supplied string) rather than resolved_focus
1031    // so that variable/field names that are not in the call graph still work.
1032    let def_use_sites = if params.def_use {
1033        collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct)
1034    } else {
1035        Vec::new()
1036    };
1037
1038    // Phase 4: Compute chains and format output (includes def_use_sites in one pass)
1039    let (formatted, prod_chains, test_chains, outgoing_chains, def_count) = compute_chains(
1040        &graph,
1041        &resolved_focus,
1042        root,
1043        params,
1044        unfiltered_caller_count,
1045        impl_trait_caller_count,
1046        &def_use_sites,
1047    )?;
1048
1049    // Compute depth-1 chains for structured output fields (always direct relationships only,
1050    // regardless of `follow_depth` used for the text-formatted output).
1051    let (depth1_callers, depth1_test_callers, depth1_callees) = if params.follow_depth <= 1 {
1052        // Chains already at depth 1; reuse the partitioned vecs.
1053        let callers = chains_to_entries(&prod_chains, Some(root));
1054        let test_callers = chains_to_entries(&test_chains, Some(root));
1055        let callees = chains_to_entries(&outgoing_chains, Some(root));
1056        (callers, test_callers, callees)
1057    } else {
1058        // follow_depth > 1: re-query at depth 1 to get only direct edges.
1059        let incoming1 = graph
1060            .find_incoming_chains(&resolved_focus, 1)
1061            .unwrap_or_default();
1062        let outgoing1 = graph
1063            .find_outgoing_chains(&resolved_focus, 1)
1064            .unwrap_or_default();
1065        let (prod1, test1): (Vec<_>, Vec<_>) = incoming1.into_iter().partition(|chain| {
1066            chain
1067                .chain
1068                .first()
1069                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
1070        });
1071        let callers = chains_to_entries(&prod1, Some(root));
1072        let test_callers = chains_to_entries(&test1, Some(root));
1073        let callees = chains_to_entries(&outgoing1, Some(root));
1074        (callers, test_callers, callees)
1075    };
1076
1077    Ok(FocusedAnalysisOutput {
1078        formatted,
1079        next_cursor: None,
1080        callers: depth1_callers,
1081        test_callers: depth1_test_callers,
1082        callees: depth1_callees,
1083        prod_chains,
1084        test_chains,
1085        outgoing_chains,
1086        def_count,
1087        unfiltered_caller_count,
1088        impl_trait_caller_count,
1089        def_use_sites,
1090    })
1091}
1092
1093/// Phase 5: Extract def-use sites for `symbol` across all entries.
1094/// Writes go before reads; within each kind ordered by file, line, then column.
1095fn collect_def_use_sites(
1096    entries: &[WalkEntry],
1097    symbol: &str,
1098    ast_recursion_limit: Option<usize>,
1099    root: &std::path::Path,
1100    ct: &CancellationToken,
1101) -> Vec<crate::types::DefUseSite> {
1102    use crate::parser::SemanticExtractor;
1103
1104    let file_entries: Vec<&WalkEntry> = entries
1105        .iter()
1106        .filter(|e| !e.is_dir && !e.is_symlink)
1107        .collect();
1108
1109    let mut sites: Vec<crate::types::DefUseSite> = file_entries
1110        .par_iter()
1111        .filter_map(|entry| {
1112            if ct.is_cancelled() {
1113                return None;
1114            }
1115
1116            // Check file size before reading
1117            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1118                tracing::debug!("skipping large file: {}", entry.path.display());
1119                return None;
1120            }
1121
1122            let Ok(source) = std::fs::read_to_string(&entry.path) else {
1123                return None;
1124            };
1125            let ext = entry
1126                .path
1127                .extension()
1128                .and_then(|e| e.to_str())
1129                .unwrap_or("");
1130            let lang = crate::lang::language_for_extension(ext)?;
1131            let file_path = entry
1132                .path
1133                .strip_prefix(root)
1134                .unwrap_or(&entry.path)
1135                .display()
1136                .to_string();
1137            let sites = SemanticExtractor::extract_def_use_for_file(
1138                &source,
1139                lang,
1140                symbol,
1141                &file_path,
1142                ast_recursion_limit,
1143            );
1144            if sites.is_empty() { None } else { Some(sites) }
1145        })
1146        .flatten()
1147        .collect();
1148
1149    // Writes before reads; within each kind: file, line, then column for deterministic order
1150    sites.sort_by(|a, b| {
1151        use crate::types::DefUseKind;
1152        let kind_ord = |k: &DefUseKind| match k {
1153            DefUseKind::Write | DefUseKind::WriteRead => 0,
1154            DefUseKind::Read => 1,
1155        };
1156        kind_ord(&a.kind)
1157            .cmp(&kind_ord(&b.kind))
1158            .then_with(|| a.file.cmp(&b.file))
1159            .then_with(|| a.line.cmp(&b.line))
1160            .then_with(|| a.column.cmp(&b.column))
1161    });
1162
1163    sites
1164}
1165
1166/// Analyze a symbol's call graph using pre-walked directory entries.
1167pub fn analyze_focused_with_progress_with_entries(
1168    root: &Path,
1169    params: &FocusedAnalysisConfig,
1170    progress: &Arc<AtomicUsize>,
1171    ct: &CancellationToken,
1172    entries: &[WalkEntry],
1173) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1174    let internal_params = InternalFocusedParams {
1175        focus: params.focus.clone(),
1176        match_mode: params.match_mode.clone(),
1177        follow_depth: params.follow_depth,
1178        ast_recursion_limit: params.ast_recursion_limit,
1179        use_summary: params.use_summary,
1180        impl_only: params.impl_only,
1181        def_use: params.def_use,
1182        parse_timeout_micros: params.parse_timeout_micros,
1183    };
1184    analyze_focused_with_progress_with_entries_internal(
1185        root,
1186        params.max_depth,
1187        progress,
1188        ct,
1189        &internal_params,
1190        entries,
1191    )
1192}
1193
1194#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
1195pub fn analyze_focused(
1196    root: &Path,
1197    focus: &str,
1198    follow_depth: u32,
1199    max_depth: Option<u32>,
1200    ast_recursion_limit: Option<usize>,
1201) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1202    let entries = walk_directory(root, max_depth)?;
1203    let counter = Arc::new(AtomicUsize::new(0));
1204    let ct = CancellationToken::new();
1205    let params = FocusedAnalysisConfig {
1206        focus: focus.to_string(),
1207        match_mode: SymbolMatchMode::Exact,
1208        follow_depth,
1209        max_depth,
1210        ast_recursion_limit,
1211        use_summary: false,
1212        impl_only: None,
1213        def_use: false,
1214        parse_timeout_micros: None,
1215    };
1216    analyze_focused_with_progress_with_entries(root, &params, &counter, &ct, &entries)
1217}
1218
1219/// Analyze a single file and return a minimal fixed schema (name, line count, language,
1220/// functions, imports) for lightweight code understanding.
1221#[instrument(skip_all, fields(path))]
1222pub fn analyze_module_file(path: &str) -> Result<crate::types::ModuleInfo, AnalyzeError> {
1223    // Check file size before reading
1224    if Path::new(path).metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1225        tracing::debug!("skipping large file: {}", path);
1226        return Err(AnalyzeError::Parser(
1227            crate::parser::ParserError::ParseError("file too large".to_string()),
1228        ));
1229    }
1230
1231    let source = std::fs::read_to_string(path)
1232        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
1233
1234    let file_path = Path::new(path);
1235    let name = file_path
1236        .file_name()
1237        .and_then(|s| s.to_str())
1238        .unwrap_or("unknown")
1239        .to_string();
1240
1241    let line_count = source.lines().count();
1242
1243    let language = file_path
1244        .extension()
1245        .and_then(|e| e.to_str())
1246        .and_then(language_for_extension)
1247        .ok_or_else(|| {
1248            AnalyzeError::Parser(crate::parser::ParserError::ParseError(
1249                "unsupported or missing file extension".to_string(),
1250            ))
1251        })?;
1252
1253    let mut module_info = SemanticExtractor::extract_module_info(&source, language, None)?;
1254    module_info.name = name;
1255    module_info.line_count = line_count;
1256
1257    Ok(module_info)
1258}
1259
1260/// Scan a directory for files that import a given module path.
1261///
1262/// For each non-directory walk entry, extracts imports via [`SemanticExtractor`] and
1263/// checks whether `module` matches `ImportInfo.module` or appears in `ImportInfo.items`.
1264/// Returns a [`FocusedAnalysisOutput`] whose `formatted` field lists matching files.
1265pub fn analyze_import_lookup(
1266    root: &Path,
1267    module: &str,
1268    entries: &[WalkEntry],
1269    ast_recursion_limit: Option<usize>,
1270) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1271    let matches: Vec<(PathBuf, usize)> = entries
1272        .par_iter()
1273        .filter_map(|entry| {
1274            if entry.is_dir || entry.is_symlink {
1275                tracing::debug!("skipping symlink: {}", entry.path.display());
1276                return None;
1277            }
1278            let ext = entry
1279                .path
1280                .extension()
1281                .and_then(|e| e.to_str())
1282                .and_then(crate::lang::language_for_extension)?;
1283            let source = std::fs::read_to_string(&entry.path).ok()?;
1284            let semantic =
1285                SemanticExtractor::extract(&source, ext, ast_recursion_limit, None).ok()?;
1286            for import in &semantic.imports {
1287                if import.module == module || import.items.iter().any(|item| item == module) {
1288                    return Some((entry.path.clone(), import.line));
1289                }
1290            }
1291            None
1292        })
1293        .collect();
1294
1295    let mut text = format!("IMPORT_LOOKUP: {module}\n");
1296    text.push_str(&format!("ROOT: {}\n", root.display()));
1297    text.push_str(&format!("MATCHES: {}\n", matches.len()));
1298    for (path, line) in &matches {
1299        let rel = path.strip_prefix(root).unwrap_or(path);
1300        text.push_str(&format!("  {}:{line}\n", rel.display()));
1301    }
1302
1303    Ok(FocusedAnalysisOutput {
1304        formatted: text,
1305        next_cursor: None,
1306        prod_chains: vec![],
1307        test_chains: vec![],
1308        outgoing_chains: vec![],
1309        def_count: 0,
1310        unfiltered_caller_count: 0,
1311        impl_trait_caller_count: 0,
1312        callers: None,
1313        test_callers: None,
1314        callees: None,
1315        def_use_sites: vec![],
1316    })
1317}
1318
1319/// Resolve Python wildcard imports to actual symbol names.
1320///
1321/// For each import with items=`["*"]`, this function:
1322/// 1. Parses the relative dots (if any) and climbs the directory tree
1323/// 2. Finds the target .py file or __init__.py
1324/// 3. Extracts symbols (functions and classes) from the target
1325/// 4. Honors __all__ if defined, otherwise uses function+class names
1326///
1327/// All resolution failures are non-fatal: debug-logged and the wildcard is preserved.
1328fn resolve_wildcard_imports(file_path: &Path, imports: &mut [ImportInfo]) {
1329    use std::collections::HashMap;
1330
1331    let mut resolved_cache: HashMap<PathBuf, Vec<String>> = HashMap::new();
1332    let Ok(file_path_canonical) = file_path.canonicalize() else {
1333        tracing::debug!(file = ?file_path, "unable to canonicalize current file path");
1334        return;
1335    };
1336
1337    for import in imports.iter_mut() {
1338        if import.items != ["*"] {
1339            continue;
1340        }
1341        resolve_single_wildcard(import, file_path, &file_path_canonical, &mut resolved_cache);
1342    }
1343}
1344
1345/// Validate and canonicalize a wildcard target path, checking for self-references.
1346/// Returns the canonical path if valid, or None if validation fails.
1347fn validate_wildcard_target(
1348    target_to_read: &Path,
1349    file_path_canonical: &Path,
1350    module: &str,
1351) -> Option<PathBuf> {
1352    let Ok(canonical) = target_to_read.canonicalize() else {
1353        tracing::debug!(target = ?target_to_read, import = %module, "unable to canonicalize path");
1354        return None;
1355    };
1356
1357    if canonical == file_path_canonical {
1358        tracing::debug!(target = ?canonical, import = %module, "cannot import from self");
1359        return None;
1360    }
1361
1362    Some(canonical)
1363}
1364
1365/// Resolve one wildcard import in place. On any failure the import is left unchanged.
1366fn resolve_single_wildcard(
1367    import: &mut ImportInfo,
1368    file_path: &Path,
1369    file_path_canonical: &Path,
1370    resolved_cache: &mut std::collections::HashMap<PathBuf, Vec<String>>,
1371) {
1372    let module = import.module.clone();
1373    let dot_count = module.chars().take_while(|c| *c == '.').count();
1374    if dot_count == 0 {
1375        return;
1376    }
1377    let module_path = module.trim_start_matches('.');
1378
1379    let Some(target_to_read) = locate_target_file(file_path, dot_count, module_path, &module)
1380    else {
1381        return;
1382    };
1383
1384    let Some(canonical) = validate_wildcard_target(&target_to_read, file_path_canonical, &module)
1385    else {
1386        return;
1387    };
1388
1389    if let Some(cached) = resolved_cache.get(&canonical) {
1390        tracing::debug!(import = %module, symbols_count = cached.len(), "using cached symbols");
1391        import.items.clone_from(cached);
1392        return;
1393    }
1394
1395    if let Some(symbols) = parse_target_symbols(&target_to_read, &module) {
1396        tracing::debug!(import = %module, resolved_count = symbols.len(), "wildcard import resolved");
1397        import.items.clone_from(&symbols);
1398        resolved_cache.insert(canonical, symbols);
1399    }
1400}
1401
1402/// Locate the .py file that a wildcard import refers to. Returns None if not found.
1403fn locate_target_file(
1404    file_path: &Path,
1405    dot_count: usize,
1406    module_path: &str,
1407    module: &str,
1408) -> Option<PathBuf> {
1409    let mut target_dir = file_path.parent()?.to_path_buf();
1410
1411    for _ in 1..dot_count {
1412        if !target_dir.pop() {
1413            tracing::debug!(import = %module, "unable to climb {} levels", dot_count.saturating_sub(1));
1414            return None;
1415        }
1416    }
1417
1418    let target_file = if module_path.is_empty() {
1419        target_dir.join("__init__.py")
1420    } else {
1421        let rel_path = module_path.replace('.', "/");
1422        target_dir.join(format!("{rel_path}.py"))
1423    };
1424
1425    if target_file.exists() {
1426        Some(target_file)
1427    } else if target_file.with_extension("").is_dir() {
1428        let init = target_file.with_extension("").join("__init__.py");
1429        if init.exists() { Some(init) } else { None }
1430    } else {
1431        tracing::debug!(target = ?target_file, import = %module, "target file not found");
1432        None
1433    }
1434}
1435
1436/// Build a tree-sitter parser for Python and parse the source code.
1437fn build_parser_for_file(source: &str) -> Option<tree_sitter::Tree> {
1438    use tree_sitter::Parser;
1439
1440    let lang_info = crate::languages::get_language_info("python")?;
1441    let mut parser = Parser::new();
1442    if parser.set_language(&lang_info.language).is_err() {
1443        return None;
1444    }
1445    parser.parse(source, None)
1446}
1447
1448/// Extract all public symbols from a parsed tree (functions and classes).
1449fn extract_all_symbols(tree: &tree_sitter::Tree, source: &str) -> Vec<String> {
1450    let mut symbols = Vec::new();
1451    let root = tree.root_node();
1452    let mut cursor = root.walk();
1453    for child in root.children(&mut cursor) {
1454        if matches!(child.kind(), "function_definition" | "class_definition")
1455            && let Some(name_node) = child.child_by_field_name("name")
1456        {
1457            let name = source[name_node.start_byte()..name_node.end_byte()].to_string();
1458            if !name.starts_with('_') {
1459                symbols.push(name);
1460            }
1461        }
1462    }
1463    symbols
1464}
1465
1466/// Try to resolve symbols from __all__ or fallback to function/class extraction.
1467fn resolve_symbols_from_tree(tree: &tree_sitter::Tree, source: &str, module: &str) -> Vec<String> {
1468    let mut symbols = Vec::new();
1469    extract_all_from_tree(tree, source, &mut symbols);
1470    if !symbols.is_empty() {
1471        tracing::debug!(import = %module, symbols = ?symbols, "using __all__ symbols");
1472        return symbols;
1473    }
1474
1475    // Fallback: extract functions/classes from the tree
1476    let symbols = extract_all_symbols(tree, source);
1477    tracing::debug!(import = %module, fallback_symbols = ?symbols, "using fallback function/class names");
1478    symbols
1479}
1480
1481/// Read and parse a target .py file, returning its exported symbols.
1482fn parse_target_symbols(target_path: &Path, module: &str) -> Option<Vec<String>> {
1483    // Check file size before reading
1484    if target_path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1485        tracing::debug!("skipping large file: {}", target_path.display());
1486        return None;
1487    }
1488
1489    let source = match std::fs::read_to_string(target_path) {
1490        Ok(s) => s,
1491        Err(e) => {
1492            tracing::debug!(target = ?target_path, import = %module, error = %e, "unable to read target file");
1493            return None;
1494        }
1495    };
1496
1497    // Parse once with tree-sitter
1498    let tree = build_parser_for_file(&source)?;
1499
1500    // Try to extract __all__ or fallback to function/class extraction
1501    let symbols = resolve_symbols_from_tree(&tree, &source, module);
1502    Some(symbols)
1503}
1504
1505/// Extract __all__ from a tree-sitter tree.
1506fn extract_all_from_tree(tree: &tree_sitter::Tree, source: &str, result: &mut Vec<String>) {
1507    let root = tree.root_node();
1508    let mut cursor = root.walk();
1509    for child in root.children(&mut cursor) {
1510        if child.kind() == "simple_statement" {
1511            // simple_statement contains assignment and other statement types
1512            let mut simple_cursor = child.walk();
1513            for simple_child in child.children(&mut simple_cursor) {
1514                if simple_child.kind() == "assignment"
1515                    && let Some(left) = simple_child.child_by_field_name("left")
1516                {
1517                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1518                    if target_text == "__all__"
1519                        && let Some(right) = simple_child.child_by_field_name("right")
1520                    {
1521                        extract_string_list_from_list_node(&right, source, result);
1522                    }
1523                }
1524            }
1525        } else if child.kind() == "expression_statement" {
1526            // Fallback for older Python AST structures
1527            let mut stmt_cursor = child.walk();
1528            for stmt_child in child.children(&mut stmt_cursor) {
1529                if stmt_child.kind() == "assignment"
1530                    && let Some(left) = stmt_child.child_by_field_name("left")
1531                {
1532                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1533                    if target_text == "__all__"
1534                        && let Some(right) = stmt_child.child_by_field_name("right")
1535                    {
1536                        extract_string_list_from_list_node(&right, source, result);
1537                    }
1538                }
1539            }
1540        }
1541    }
1542}
1543
1544/// Extract string literals from a Python list node.
1545fn extract_string_list_from_list_node(
1546    list_node: &tree_sitter::Node,
1547    source: &str,
1548    result: &mut Vec<String>,
1549) {
1550    let mut cursor = list_node.walk();
1551    for child in list_node.named_children(&mut cursor) {
1552        if child.kind() == "string" {
1553            let raw = source[child.start_byte()..child.end_byte()].trim();
1554            // Strip quotes: "name" -> name
1555            let unquoted = raw.trim_matches('"').trim_matches('\'').to_string();
1556            if !unquoted.is_empty() {
1557                result.push(unquoted);
1558            }
1559        }
1560    }
1561}
1562
1563/// Read a file and return its raw content with line numbers for a specified range.
1564#[cfg(test)]
1565mod tests {
1566    use super::*;
1567    use crate::formatter::format_focused_paginated;
1568    use crate::graph::InternalCallChain;
1569    use crate::pagination::{PaginationMode, decode_cursor, paginate_slice};
1570    use std::fs;
1571    use std::path::PathBuf;
1572    use tempfile::TempDir;
1573
1574    #[cfg(feature = "lang-rust")]
1575    #[test]
1576    fn analyze_str_rust_happy_path() {
1577        let source = "fn hello() -> i32 { 42 }";
1578        let result = analyze_str(source, "rs", None);
1579        assert!(result.is_ok());
1580    }
1581
1582    #[cfg(feature = "lang-python")]
1583    #[test]
1584    fn analyze_str_python_happy_path() {
1585        let source = "def greet(name):\n    return f'Hello {name}'";
1586        let result = analyze_str(source, "py", None);
1587        assert!(result.is_ok());
1588    }
1589
1590    #[cfg(feature = "lang-rust")]
1591    #[test]
1592    fn analyze_str_rust_by_language_name() {
1593        let source = "fn hello() -> i32 { 42 }";
1594        let result = analyze_str(source, "rust", None);
1595        assert!(result.is_ok());
1596    }
1597
1598    #[cfg(feature = "lang-python")]
1599    #[test]
1600    fn analyze_str_python_by_language_name() {
1601        let source = "def greet(name):\n    return f'Hello {name}'";
1602        let result = analyze_str(source, "python", None);
1603        assert!(result.is_ok());
1604    }
1605
1606    #[cfg(feature = "lang-rust")]
1607    #[test]
1608    fn analyze_str_rust_mixed_case() {
1609        let source = "fn hello() -> i32 { 42 }";
1610        let result = analyze_str(source, "RuSt", None);
1611        assert!(result.is_ok());
1612    }
1613
1614    #[cfg(feature = "lang-python")]
1615    #[test]
1616    fn analyze_str_python_mixed_case() {
1617        let source = "def greet(name):\n    return f'Hello {name}'";
1618        let result = analyze_str(source, "PyThOn", None);
1619        assert!(result.is_ok());
1620    }
1621
1622    #[test]
1623    fn analyze_str_unsupported_language() {
1624        let result = analyze_str("code", "brainfuck", None);
1625        assert!(
1626            matches!(result, Err(AnalyzeError::UnsupportedLanguage(lang)) if lang == "brainfuck")
1627        );
1628    }
1629
1630    #[cfg(feature = "lang-rust")]
1631    #[test]
1632    fn test_symbol_focus_callers_pagination_first_page() {
1633        let temp_dir = TempDir::new().unwrap();
1634
1635        // Create a file with many callers of `target`
1636        let mut code = String::from("fn target() {}\n");
1637        for i in 0..15 {
1638            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1639        }
1640        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1641
1642        // Act
1643        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1644
1645        // Paginate prod callers with page_size=5
1646        let paginated = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1647            .expect("paginate failed");
1648        assert!(
1649            paginated.total >= 5,
1650            "should have enough callers to paginate"
1651        );
1652        assert!(
1653            paginated.next_cursor.is_some(),
1654            "should have next_cursor for page 1"
1655        );
1656
1657        // Verify cursor encodes callers mode
1658        assert_eq!(paginated.items.len(), 5);
1659    }
1660
1661    #[test]
1662    fn test_symbol_focus_callers_pagination_second_page() {
1663        let temp_dir = TempDir::new().unwrap();
1664
1665        let mut code = String::from("fn target() {}\n");
1666        for i in 0..12 {
1667            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1668        }
1669        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1670
1671        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1672        let total_prod = output.prod_chains.len();
1673
1674        if total_prod > 5 {
1675            // Get page 1 cursor
1676            let p1 = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1677                .expect("paginate failed");
1678            assert!(p1.next_cursor.is_some());
1679
1680            let cursor_str = p1.next_cursor.unwrap();
1681            let cursor_data = decode_cursor(&cursor_str).expect("decode failed");
1682
1683            // Get page 2
1684            let p2 = paginate_slice(
1685                &output.prod_chains,
1686                cursor_data.offset,
1687                5,
1688                PaginationMode::Callers,
1689            )
1690            .expect("paginate failed");
1691
1692            // Format paginated output
1693            let formatted = format_focused_paginated(
1694                &p2.items,
1695                total_prod,
1696                PaginationMode::Callers,
1697                "target",
1698                &output.prod_chains,
1699                &output.test_chains,
1700                &output.outgoing_chains,
1701                output.def_count,
1702                cursor_data.offset,
1703                Some(temp_dir.path()),
1704                true,
1705            );
1706
1707            // Assert: header shows correct range for page 2
1708            let expected_start = cursor_data.offset + 1;
1709            assert!(
1710                formatted.contains(&format!("CALLERS ({}", expected_start)),
1711                "header should show page 2 range, got: {}",
1712                formatted
1713            );
1714        }
1715    }
1716
1717    #[test]
1718    fn test_chains_to_entries_empty_returns_none() {
1719        // Arrange
1720        let chains: Vec<InternalCallChain> = vec![];
1721
1722        // Act
1723        let result = chains_to_entries(&chains, None);
1724
1725        // Assert
1726        assert!(result.is_none());
1727    }
1728
1729    #[test]
1730    fn test_chains_to_entries_with_data_returns_entries() {
1731        // Arrange
1732        let chains = vec![
1733            InternalCallChain {
1734                chain: vec![("caller1".to_string(), PathBuf::from("/root/lib.rs"), 10)],
1735            },
1736            InternalCallChain {
1737                chain: vec![("caller2".to_string(), PathBuf::from("/root/other.rs"), 20)],
1738            },
1739        ];
1740        let root = PathBuf::from("/root");
1741
1742        // Act
1743        let result = chains_to_entries(&chains, Some(root.as_path()));
1744
1745        // Assert
1746        assert!(result.is_some());
1747        let entries = result.unwrap();
1748        assert_eq!(entries.len(), 2);
1749        assert_eq!(entries[0].symbol, "caller1");
1750        assert_eq!(entries[0].file, "lib.rs");
1751        assert_eq!(entries[0].line, 10);
1752        assert_eq!(entries[1].symbol, "caller2");
1753        assert_eq!(entries[1].file, "other.rs");
1754        assert_eq!(entries[1].line, 20);
1755    }
1756
1757    #[test]
1758    fn test_symbol_focus_callees_pagination() {
1759        let temp_dir = TempDir::new().unwrap();
1760
1761        // target calls many functions
1762        let mut code = String::from("fn target() {\n");
1763        for i in 0..10 {
1764            code.push_str(&format!("    callee_{:02}();\n", i));
1765        }
1766        code.push_str("}\n");
1767        for i in 0..10 {
1768            code.push_str(&format!("fn callee_{:02}() {{}}\n", i));
1769        }
1770        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1771
1772        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1773        let total_callees = output.outgoing_chains.len();
1774
1775        if total_callees > 3 {
1776            let paginated = paginate_slice(&output.outgoing_chains, 0, 3, PaginationMode::Callees)
1777                .expect("paginate failed");
1778
1779            let formatted = format_focused_paginated(
1780                &paginated.items,
1781                total_callees,
1782                PaginationMode::Callees,
1783                "target",
1784                &output.prod_chains,
1785                &output.test_chains,
1786                &output.outgoing_chains,
1787                output.def_count,
1788                0,
1789                Some(temp_dir.path()),
1790                true,
1791            );
1792
1793            assert!(
1794                formatted.contains(&format!(
1795                    "CALLEES (1-{} of {})",
1796                    paginated.items.len(),
1797                    total_callees
1798                )),
1799                "header should show callees range, got: {}",
1800                formatted
1801            );
1802        }
1803    }
1804
1805    #[test]
1806    fn test_symbol_focus_empty_prod_callers() {
1807        let temp_dir = TempDir::new().unwrap();
1808
1809        // target is only called from test functions
1810        let code = r#"
1811fn target() {}
1812
1813#[cfg(test)]
1814mod tests {
1815    use super::*;
1816    #[test]
1817    fn test_something() { target(); }
1818}
1819"#;
1820        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1821
1822        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1823
1824        // prod_chains may be empty; pagination should handle it gracefully
1825        let paginated = paginate_slice(&output.prod_chains, 0, 100, PaginationMode::Callers)
1826            .expect("paginate failed");
1827        assert_eq!(paginated.items.len(), output.prod_chains.len());
1828        assert!(
1829            paginated.next_cursor.is_none(),
1830            "no next_cursor for empty or single-page prod_chains"
1831        );
1832    }
1833
1834    #[test]
1835    fn test_impl_only_filter_header_correct_counts() {
1836        let temp_dir = TempDir::new().unwrap();
1837
1838        // Create a Rust fixture with:
1839        // - A trait definition
1840        // - An impl Trait for SomeType block that calls the focus symbol
1841        // - A regular (non-trait-impl) function that also calls the focus symbol
1842        let code = r#"
1843trait MyTrait {
1844    fn focus_symbol();
1845}
1846
1847struct SomeType;
1848
1849impl MyTrait for SomeType {
1850    fn focus_symbol() {}
1851}
1852
1853fn impl_caller() {
1854    SomeType::focus_symbol();
1855}
1856
1857fn regular_caller() {
1858    SomeType::focus_symbol();
1859}
1860"#;
1861        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1862
1863        // Call analyze_focused with impl_only=Some(true)
1864        let params = FocusedAnalysisConfig {
1865            focus: "focus_symbol".to_string(),
1866            match_mode: SymbolMatchMode::Insensitive,
1867            follow_depth: 1,
1868            max_depth: None,
1869            ast_recursion_limit: None,
1870            use_summary: false,
1871            impl_only: Some(true),
1872            def_use: false,
1873            parse_timeout_micros: None,
1874        };
1875        let output = analyze_focused_with_progress(
1876            temp_dir.path(),
1877            &params,
1878            Arc::new(AtomicUsize::new(0)),
1879            CancellationToken::new(),
1880        )
1881        .unwrap();
1882
1883        // Assert the result contains "FILTER: impl_only=true"
1884        assert!(
1885            output.formatted.contains("FILTER: impl_only=true"),
1886            "formatted output should contain FILTER header for impl_only=true, got: {}",
1887            output.formatted
1888        );
1889
1890        // Assert the retained count N < total count M
1891        assert!(
1892            output.impl_trait_caller_count < output.unfiltered_caller_count,
1893            "impl_trait_caller_count ({}) should be less than unfiltered_caller_count ({})",
1894            output.impl_trait_caller_count,
1895            output.unfiltered_caller_count
1896        );
1897
1898        // Assert format is "FILTER: impl_only=true (N of M callers shown)"
1899        let filter_line = output
1900            .formatted
1901            .lines()
1902            .find(|line| line.contains("FILTER: impl_only=true"))
1903            .expect("should find FILTER line");
1904        assert!(
1905            filter_line.contains(&format!(
1906                "({} of {} callers shown)",
1907                output.impl_trait_caller_count, output.unfiltered_caller_count
1908            )),
1909            "FILTER line should show correct N of M counts, got: {}",
1910            filter_line
1911        );
1912    }
1913
1914    #[test]
1915    fn test_callers_count_matches_formatted_output() {
1916        let temp_dir = TempDir::new().unwrap();
1917
1918        // Create a file with multiple callers of `target`
1919        let code = r#"
1920fn target() {}
1921fn caller_a() { target(); }
1922fn caller_b() { target(); }
1923fn caller_c() { target(); }
1924"#;
1925        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1926
1927        // Analyze the symbol
1928        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1929
1930        // Extract CALLERS count from formatted output
1931        let formatted = &output.formatted;
1932        let callers_count_from_output = formatted
1933            .lines()
1934            .find(|line| line.contains("FOCUS:"))
1935            .and_then(|line| {
1936                line.split(',')
1937                    .find(|part| part.contains("callers"))
1938                    .and_then(|part| {
1939                        part.trim()
1940                            .split_whitespace()
1941                            .next()
1942                            .and_then(|s| s.parse::<usize>().ok())
1943                    })
1944            })
1945            .expect("should find CALLERS count in formatted output");
1946
1947        // Compute expected count from prod_chains (unique first-caller names)
1948        let expected_callers_count = output
1949            .prod_chains
1950            .iter()
1951            .filter_map(|chain| chain.chain.first().map(|(name, _, _)| name))
1952            .collect::<std::collections::HashSet<_>>()
1953            .len();
1954
1955        assert_eq!(
1956            callers_count_from_output, expected_callers_count,
1957            "CALLERS count in formatted output should match unique-first-caller count in prod_chains"
1958        );
1959    }
1960
1961    #[cfg(feature = "lang-rust")]
1962    #[test]
1963    fn test_def_use_focused_analysis() {
1964        let temp_dir = TempDir::new().unwrap();
1965        fs::write(
1966            temp_dir.path().join("lib.rs"),
1967            "fn example() {\n    let x = 10;\n    x += 1;\n    println!(\"{}\", x);\n    let y = x + 1;\n}\n",
1968        )
1969        .unwrap();
1970
1971        let entries = walk_directory(temp_dir.path(), None).unwrap();
1972        let counter = Arc::new(AtomicUsize::new(0));
1973        let ct = CancellationToken::new();
1974        let params = FocusedAnalysisConfig {
1975            focus: "x".to_string(),
1976            match_mode: SymbolMatchMode::Exact,
1977            follow_depth: 1,
1978            max_depth: None,
1979            ast_recursion_limit: None,
1980            use_summary: false,
1981            impl_only: None,
1982            def_use: true,
1983            parse_timeout_micros: None,
1984        };
1985
1986        let output = analyze_focused_with_progress_with_entries(
1987            temp_dir.path(),
1988            &params,
1989            &counter,
1990            &ct,
1991            &entries,
1992        )
1993        .expect("def_use analysis should succeed");
1994
1995        assert!(
1996            !output.def_use_sites.is_empty(),
1997            "should find def-use sites for x"
1998        );
1999        assert!(
2000            output
2001                .def_use_sites
2002                .iter()
2003                .any(|s| s.kind == crate::types::DefUseKind::Write),
2004            "should have at least one Write site",
2005        );
2006        // No location appears as both write and read
2007        let write_locs: std::collections::HashSet<_> = output
2008            .def_use_sites
2009            .iter()
2010            .filter(|s| {
2011                matches!(
2012                    s.kind,
2013                    crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
2014                )
2015            })
2016            .map(|s| (&s.file, s.line, s.column))
2017            .collect();
2018        assert!(
2019            output
2020                .def_use_sites
2021                .iter()
2022                .filter(|s| s.kind == crate::types::DefUseKind::Read)
2023                .all(|s| !write_locs.contains(&(&s.file, s.line, s.column))),
2024            "no location should appear as both write and read",
2025        );
2026        assert!(
2027            output.formatted.contains("DEF-USE SITES"),
2028            "formatted output should contain DEF-USE SITES"
2029        );
2030    }
2031
2032    fn make_temp_file(content: &str) -> tempfile::NamedTempFile {
2033        let mut f = tempfile::NamedTempFile::new().unwrap();
2034        use std::io::Write;
2035        f.write_all(content.as_bytes()).unwrap();
2036        f.flush().unwrap();
2037        f
2038    }
2039}