aptu_coder_core/
analyze.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Main analysis engine for extracting code structure from files and directories.
4//!
5//! Implements the four MCP tools: `analyze_directory` (Overview), `analyze_file` (`FileDetails`),
6//! `analyze_symbol` (call graph), and `analyze_module` (lightweight index). Handles parallel processing and cancellation.
7
8use crate::formatter::{
9    format_file_details, format_focused_internal, format_focused_summary_internal, format_structure,
10};
11use crate::graph::{CallGraph, InternalCallChain};
12use crate::lang::{language_for_extension, supported_languages};
13use crate::parser::{ElementExtractor, SemanticExtractor};
14use crate::test_detection::is_test_file;
15use crate::traversal::{WalkEntry, walk_directory};
16use crate::types::{
17    AnalysisMode, FileInfo, ImplTraitInfo, ImportInfo, SemanticAnalysis, SymbolMatchMode,
18};
19use rayon::prelude::*;
20#[cfg(feature = "schemars")]
21use schemars::JsonSchema;
22use serde::{Deserialize, Serialize};
23use std::path::{Path, PathBuf};
24use std::sync::Arc;
25use std::sync::atomic::{AtomicUsize, Ordering};
26use std::time::Instant;
27use thiserror::Error;
28use tokio_util::sync::CancellationToken;
29use tracing::instrument;
30
31pub const MAX_FILE_SIZE_BYTES: u64 = 10_000_000;
32
33#[derive(Debug, Error)]
34#[non_exhaustive]
35pub enum AnalyzeError {
36    #[error("Traversal error: {0}")]
37    Traversal(#[from] crate::traversal::TraversalError),
38    #[error("Parser error: {0}")]
39    Parser(#[from] crate::parser::ParserError),
40    #[error("Graph error: {0}")]
41    Graph(#[from] crate::graph::GraphError),
42    #[error("Formatter error: {0}")]
43    Formatter(#[from] crate::formatter::FormatterError),
44    #[error("Analysis cancelled")]
45    Cancelled,
46    #[error("unsupported language: {0}")]
47    UnsupportedLanguage(String),
48    #[error("I/O error: {0}")]
49    Io(#[from] std::io::Error),
50    #[error("invalid range: start ({start}) > end ({end}); file has {total} lines")]
51    InvalidRange {
52        start: usize,
53        end: usize,
54        total: usize,
55    },
56    #[error("path is a directory, not a file: {0}")]
57    NotAFile(PathBuf),
58    #[error(
59        "file has {total_lines} lines; provide start_line and end_line, or call analyze_module first to locate the range"
60    )]
61    RangelessLargeFile { total_lines: usize },
62    #[error("parse timeout exceeded for {path}: {micros} microseconds")]
63    ParseTimeout { path: PathBuf, micros: u64 },
64}
65
66/// Result of directory analysis containing both formatted output and file data.
67#[derive(Debug, Clone, Serialize, Deserialize)]
68#[cfg_attr(feature = "schemars", derive(JsonSchema))]
69#[non_exhaustive]
70pub struct AnalysisOutput {
71    #[cfg_attr(
72        feature = "schemars",
73        schemars(description = "Formatted text representation of the analysis")
74    )]
75    pub formatted: String,
76    #[cfg_attr(
77        feature = "schemars",
78        schemars(description = "List of files analyzed in the directory")
79    )]
80    pub files: Vec<FileInfo>,
81    /// Walk entries used internally for summary generation; not serialized.
82    #[serde(skip)]
83    #[serde(default)]
84    #[cfg_attr(feature = "schemars", schemars(skip))]
85    pub entries: Vec<WalkEntry>,
86    /// Subtree file counts computed from an unbounded walk; used by `format_summary`; not serialized.
87    #[serde(skip)]
88    #[serde(default)]
89    #[cfg_attr(feature = "schemars", schemars(skip))]
90    pub subtree_counts: Option<Vec<(std::path::PathBuf, usize)>>,
91    #[serde(skip_serializing_if = "Option::is_none")]
92    #[cfg_attr(
93        feature = "schemars",
94        schemars(
95            description = "Opaque cursor token for the next page of results (absent when no more results)"
96        )
97    )]
98    pub next_cursor: Option<String>,
99}
100
101/// Result of file-level semantic analysis.
102#[derive(Debug, Clone, Serialize, Deserialize)]
103#[cfg_attr(feature = "schemars", derive(JsonSchema))]
104#[non_exhaustive]
105pub struct FileAnalysisOutput {
106    #[cfg_attr(
107        feature = "schemars",
108        schemars(description = "Formatted text representation of the analysis")
109    )]
110    pub formatted: String,
111    #[cfg_attr(
112        feature = "schemars",
113        schemars(description = "Semantic analysis data including functions, classes, and imports")
114    )]
115    pub semantic: SemanticAnalysis,
116    #[cfg_attr(
117        feature = "schemars",
118        schemars(description = "Total line count of the analyzed file")
119    )]
120    #[cfg_attr(
121        feature = "schemars",
122        schemars(schema_with = "crate::schema_helpers::integer_schema")
123    )]
124    pub line_count: usize,
125    #[serde(skip_serializing_if = "Option::is_none")]
126    #[cfg_attr(
127        feature = "schemars",
128        schemars(
129            description = "Opaque cursor token for the next page of results (absent when no more results)"
130        )
131    )]
132    pub next_cursor: Option<String>,
133}
134
135impl FileAnalysisOutput {
136    /// Create a new `FileAnalysisOutput`.
137    #[must_use]
138    pub fn new(
139        formatted: String,
140        semantic: SemanticAnalysis,
141        line_count: usize,
142        next_cursor: Option<String>,
143    ) -> Self {
144        Self {
145            formatted,
146            semantic,
147            line_count,
148            next_cursor,
149        }
150    }
151}
152/// Check if a file is eligible for analysis based on size and language support.
153fn check_file_eligibility(entry: &WalkEntry) -> bool {
154    // Check file size before reading
155    if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
156        tracing::debug!("skipping large file: {}", entry.path.display());
157        return false;
158    }
159
160    // Try to read file content; skip binary or unreadable files
161    std::fs::read_to_string(&entry.path).is_ok()
162}
163
164/// Process a single file entry and extract its analysis data.
165fn process_file_entry(entry: &WalkEntry, source: &str) -> FileInfo {
166    let path_str = entry.path.display().to_string();
167    let line_count = source.lines().count();
168
169    // Detect language from extension
170    let ext = entry.path.extension().and_then(|e| e.to_str());
171
172    // Detect language and extract counts
173    let (language, function_count, class_count) = if let Some(ext_str) = ext {
174        if let Some(lang) = language_for_extension(ext_str) {
175            let lang_str = lang.to_string();
176            match ElementExtractor::extract_with_depth(source, &lang_str) {
177                Ok((func_count, class_count)) => (lang_str, func_count, class_count),
178                Err(_) => (lang_str, 0, 0),
179            }
180        } else {
181            ("unknown".to_string(), 0, 0)
182        }
183    } else {
184        ("unknown".to_string(), 0, 0)
185    };
186
187    let is_test = is_test_file(&entry.path);
188
189    FileInfo {
190        path: path_str,
191        line_count,
192        function_count,
193        class_count,
194        language,
195        is_test,
196    }
197}
198
199/// Analyze a single file entry in parallel context.
200fn analyze_single_file(
201    entry: &WalkEntry,
202    progress: &Arc<AtomicUsize>,
203    ct: &CancellationToken,
204) -> Option<FileInfo> {
205    // Check cancellation per file
206    if ct.is_cancelled() {
207        return None;
208    }
209
210    // Check file eligibility
211    if !check_file_eligibility(entry) {
212        progress.fetch_add(1, Ordering::Relaxed);
213        return None;
214    }
215
216    // Read file content (already checked in check_file_eligibility)
217    let Ok(source) = std::fs::read_to_string(&entry.path) else {
218        progress.fetch_add(1, Ordering::Relaxed);
219        return None;
220    };
221
222    let file_info = process_file_entry(entry, &source);
223    progress.fetch_add(1, Ordering::Relaxed);
224
225    Some(file_info)
226}
227
228/// Initialize analysis context and collect file entries.
229fn init_analysis_context(entries: &[WalkEntry]) -> Vec<&WalkEntry> {
230    entries
231        .iter()
232        .filter(|e| !e.is_dir && !e.is_symlink)
233        .collect()
234}
235
236/// Build the final analysis output from results.
237fn build_analysis_output(
238    entries: Vec<WalkEntry>,
239    analysis_results: Vec<FileInfo>,
240) -> AnalysisOutput {
241    let formatted = format_structure(&entries, &analysis_results, None);
242    AnalysisOutput {
243        formatted,
244        files: analysis_results,
245        entries,
246        next_cursor: None,
247        subtree_counts: None,
248    }
249}
250
251/// Run parallel analysis on file entries and log completion.
252fn run_parallel_analysis(
253    file_entries: &[&WalkEntry],
254    progress: &Arc<AtomicUsize>,
255    ct: &CancellationToken,
256) -> Result<Vec<FileInfo>, AnalyzeError> {
257    let start = Instant::now();
258    tracing::debug!(file_count = file_entries.len(), "analysis start");
259
260    let _parse_span = tracing::info_span!("ast.parse_batch", count = file_entries.len()).entered();
261
262    // Parallel analysis of files
263    let analysis_results: Vec<FileInfo> = file_entries
264        .par_iter()
265        .filter_map(|entry| analyze_single_file(entry, progress, ct))
266        .collect();
267
268    // Check if cancelled after parallel processing
269    if ct.is_cancelled() {
270        return Err(AnalyzeError::Cancelled);
271    }
272
273    tracing::debug!(
274        file_count = file_entries.len(),
275        duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
276        "analysis complete"
277    );
278
279    Ok(analysis_results)
280}
281
282#[instrument(skip_all, fields(path = %root.display()))]
283// public API; callers expect owned semantics
284#[allow(clippy::needless_pass_by_value)]
285pub fn analyze_directory_with_progress(
286    root: &Path,
287    entries: Vec<WalkEntry>,
288    progress: Arc<AtomicUsize>,
289    ct: CancellationToken,
290) -> Result<AnalysisOutput, AnalyzeError> {
291    // Check if already cancelled
292    if ct.is_cancelled() {
293        return Err(AnalyzeError::Cancelled);
294    }
295
296    tracing::debug!(root = %root.display(), "analysis start");
297
298    let file_entries = init_analysis_context(&entries);
299    let analysis_results = run_parallel_analysis(&file_entries, &progress, &ct)?;
300
301    let _format_span = tracing::info_span!("output.format").entered();
302
303    // Build and return output
304    Ok(build_analysis_output(entries, analysis_results))
305}
306
307/// Analyze a directory structure and return formatted output and file data.
308#[instrument(skip_all, fields(path = %root.display()))]
309pub fn analyze_directory(
310    root: &Path,
311    max_depth: Option<u32>,
312) -> Result<AnalysisOutput, AnalyzeError> {
313    let entries = walk_directory(root, max_depth)?;
314    let counter = Arc::new(AtomicUsize::new(0));
315    let ct = CancellationToken::new();
316    analyze_directory_with_progress(root, entries, counter, ct)
317}
318
319/// Determine analysis mode based on parameters and path.
320#[must_use]
321pub fn determine_mode(path: &str, focus: Option<&str>) -> AnalysisMode {
322    if focus.is_some() {
323        return AnalysisMode::SymbolFocus;
324    }
325
326    let path_obj = Path::new(path);
327    if path_obj.is_dir() {
328        AnalysisMode::Overview
329    } else {
330        AnalysisMode::FileDetails
331    }
332}
333
334/// Analyze a single file and return semantic analysis with formatted output.
335#[instrument(skip_all, fields(path))]
336pub fn analyze_file(
337    path: &str,
338    ast_recursion_limit: Option<usize>,
339) -> Result<FileAnalysisOutput, AnalyzeError> {
340    let start = Instant::now();
341
342    // Check file size before reading
343    if Path::new(path).metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
344        tracing::debug!("skipping large file: {}", path);
345        return Err(AnalyzeError::Parser(
346            crate::parser::ParserError::ParseError("file too large".to_string()),
347        ));
348    }
349
350    let source = std::fs::read_to_string(path)
351        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
352
353    let line_count = source.lines().count();
354
355    // Detect language from extension
356    let ext = Path::new(path)
357        .extension()
358        .and_then(|e| e.to_str())
359        .and_then(language_for_extension)
360        .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string);
361
362    // Extract semantic information
363    let mut semantic = SemanticExtractor::extract(&source, &ext, ast_recursion_limit, None)?;
364
365    // Populate the file path on references now that the path is known
366    for r in &mut semantic.references {
367        r.location = path.to_string();
368    }
369
370    // Resolve Python wildcard imports
371    if ext == "python" {
372        resolve_wildcard_imports(Path::new(path), &mut semantic.imports);
373    }
374
375    // Detect if this is a test file
376    let is_test = is_test_file(Path::new(path));
377
378    // Extract parent directory for relative path display
379    let parent_dir = Path::new(path).parent();
380
381    // Format output
382    let formatted = format_file_details(path, &semantic, line_count, is_test, parent_dir);
383
384    tracing::debug!(path = %path, language = %ext, functions = semantic.functions.len(), classes = semantic.classes.len(), imports = semantic.imports.len(), duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX), "file analysis complete");
385
386    Ok(FileAnalysisOutput::new(
387        formatted, semantic, line_count, None,
388    ))
389}
390
391/// Analyze source code from a string buffer without filesystem access.
392///
393/// This function analyzes in-memory source code by language identifier. The `language`
394/// parameter can be either a language name (e.g., `"rust"`, `"python"`, `"go"`) or a file
395/// extension (e.g., `"rs"`, `"py"`).
396///
397/// Accepted language identifiers depend on compiled features. Use [`supported_languages()`] to
398/// discover the available language names at runtime, and [`language_for_extension()`] to resolve
399/// a file extension to its supported language identifier.
400///
401/// # Arguments
402///
403/// * `source` - The source code to analyze
404/// * `language` - The language identifier (language name or extension)
405/// * `ast_recursion_limit` - Optional limit for AST traversal depth
406///
407/// # Returns
408///
409/// - `Ok(FileAnalysisOutput)` on success
410/// - `Err(AnalyzeError::UnsupportedLanguage)` if the language is not recognized
411/// - `Err(AnalyzeError::Parser)` if parsing fails
412///
413/// # Notes
414///
415/// - Python wildcard import resolution is skipped for in-memory analysis (no filesystem path available)
416/// - The formatted output uses the standard file-details formatter, so it includes a `FILE:` header with an empty path
417#[inline]
418pub fn analyze_str(
419    source: &str,
420    language: &str,
421    ast_recursion_limit: Option<usize>,
422) -> Result<FileAnalysisOutput, AnalyzeError> {
423    // Resolve language: first try as a file extension, then as a language name
424    // (case-insensitive match against supported_languages()).
425    let lang = language_for_extension(language).or_else(|| {
426        let lower = language.to_ascii_lowercase();
427        supported_languages()
428            .iter()
429            .find(|&&name| name == lower)
430            .copied()
431    });
432    let lang = lang.ok_or_else(|| AnalyzeError::UnsupportedLanguage(language.to_string()))?;
433
434    // Extract semantic information
435    let mut semantic = SemanticExtractor::extract(source, lang, ast_recursion_limit, None)?;
436
437    // Populate a stable in-memory sentinel on all reference locations
438    for r in &mut semantic.references {
439        r.location = "<memory>".to_string();
440    }
441
442    // Count lines in the source
443    let line_count = source.lines().count();
444
445    // Format output with empty path (no filesystem access)
446    let formatted = format_file_details("", &semantic, line_count, false, None);
447
448    Ok(FileAnalysisOutput::new(
449        formatted, semantic, line_count, None,
450    ))
451}
452
453/// Single entry in a call chain (depth-1 direct caller or callee).
454#[non_exhaustive]
455#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
456#[cfg_attr(feature = "schemars", derive(JsonSchema))]
457pub struct CallChainEntry {
458    #[cfg_attr(
459        feature = "schemars",
460        schemars(description = "Symbol name of the caller or callee")
461    )]
462    pub symbol: String,
463    #[cfg_attr(
464        feature = "schemars",
465        schemars(description = "File path relative to the repository root")
466    )]
467    pub file: String,
468    #[cfg_attr(
469        feature = "schemars",
470        schemars(
471            description = "Line number of the definition or call site (1-indexed)",
472            schema_with = "crate::schema_helpers::integer_schema"
473        )
474    )]
475    pub line: usize,
476}
477
478/// Result of focused symbol analysis.
479#[derive(Debug, Serialize, Deserialize)]
480#[cfg_attr(feature = "schemars", derive(JsonSchema))]
481#[non_exhaustive]
482pub struct FocusedAnalysisOutput {
483    #[cfg_attr(
484        feature = "schemars",
485        schemars(description = "Formatted text representation of the call graph analysis")
486    )]
487    pub formatted: String,
488    #[serde(skip_serializing_if = "Option::is_none")]
489    #[cfg_attr(
490        feature = "schemars",
491        schemars(
492            description = "Opaque cursor token for the next page of results (absent when no more results)"
493        )
494    )]
495    pub next_cursor: Option<String>,
496    /// Production caller chains (partitioned from incoming chains, excluding test callers).
497    /// Not serialized; used for pagination in lib.rs.
498    #[serde(skip)]
499    #[serde(default)]
500    #[cfg_attr(feature = "schemars", schemars(skip))]
501    pub prod_chains: Vec<InternalCallChain>,
502    /// Test caller chains. Not serialized; used for pagination summary in lib.rs.
503    #[serde(skip)]
504    #[serde(default)]
505    #[cfg_attr(feature = "schemars", schemars(skip))]
506    pub test_chains: Vec<InternalCallChain>,
507    /// Outgoing (callee) chains. Not serialized; used for pagination in lib.rs.
508    #[serde(skip)]
509    #[serde(default)]
510    #[cfg_attr(feature = "schemars", schemars(skip))]
511    pub outgoing_chains: Vec<InternalCallChain>,
512    /// Number of definitions for the symbol. Not serialized; used for pagination headers.
513    #[serde(skip)]
514    #[serde(default)]
515    #[cfg_attr(feature = "schemars", schemars(skip))]
516    pub def_count: usize,
517    /// Total unique callers before `impl_only` filter. Not serialized; used for FILTER header.
518    #[serde(skip)]
519    #[serde(default)]
520    #[cfg_attr(feature = "schemars", schemars(skip))]
521    pub unfiltered_caller_count: usize,
522    /// Unique callers after `impl_only` filter. Not serialized; used for FILTER header.
523    #[serde(skip)]
524    #[serde(default)]
525    #[cfg_attr(feature = "schemars", schemars(skip))]
526    pub impl_trait_caller_count: usize,
527    /// Direct (depth-1) production callers. `follow_depth` does not affect this field.
528    #[serde(skip_serializing_if = "Option::is_none")]
529    pub callers: Option<Vec<CallChainEntry>>,
530    /// Direct (depth-1) test callers. `follow_depth` does not affect this field.
531    #[serde(skip_serializing_if = "Option::is_none")]
532    pub test_callers: Option<Vec<CallChainEntry>>,
533    /// Direct (depth-1) callees. `follow_depth` does not affect this field.
534    #[serde(skip_serializing_if = "Option::is_none")]
535    pub callees: Option<Vec<CallChainEntry>>,
536    /// Definition and use sites for the symbol.
537    #[serde(default)]
538    pub def_use_sites: Vec<crate::types::DefUseSite>,
539}
540
541/// Parameters for focused symbol analysis. Groups high-arity parameters to keep
542/// function signatures under clippy's default 7-argument threshold.
543#[derive(Clone)]
544pub struct FocusedAnalysisConfig {
545    pub focus: String,
546    pub match_mode: SymbolMatchMode,
547    pub follow_depth: u32,
548    pub max_depth: Option<u32>,
549    pub ast_recursion_limit: Option<usize>,
550    pub use_summary: bool,
551    pub impl_only: Option<bool>,
552    pub def_use: bool,
553    pub parse_timeout_micros: Option<u64>,
554}
555
556/// Internal parameters for focused analysis phases.
557#[derive(Clone)]
558struct InternalFocusedParams {
559    focus: String,
560    match_mode: SymbolMatchMode,
561    follow_depth: u32,
562    ast_recursion_limit: Option<usize>,
563    use_summary: bool,
564    impl_only: Option<bool>,
565    def_use: bool,
566    parse_timeout_micros: Option<u64>,
567}
568
569/// Type alias for analysis results: (`file_path`, `semantic_analysis`) pairs and impl-trait info.
570type FileAnalysisBatch = (Vec<(PathBuf, SemanticAnalysis)>, Vec<ImplTraitInfo>);
571
572/// Phase 1: Collect semantic analysis for all files in parallel.
573fn collect_file_analysis(
574    entries: &[WalkEntry],
575    progress: &Arc<AtomicUsize>,
576    ct: &CancellationToken,
577    ast_recursion_limit: Option<usize>,
578    parse_timeout_micros: Option<u64>,
579) -> Result<FileAnalysisBatch, AnalyzeError> {
580    // Check if already cancelled
581    if ct.is_cancelled() {
582        return Err(AnalyzeError::Cancelled);
583    }
584
585    // Use pre-walked entries (passed by caller)
586    // Collect semantic analysis for all files in parallel
587    let file_entries: Vec<&WalkEntry> = entries
588        .iter()
589        .filter(|e| !e.is_dir && !e.is_symlink)
590        .collect();
591
592    // Collect per-file timeout events so they can be surfaced as AnalyzeError::ParseTimeout.
593    let timed_out: std::sync::Mutex<Vec<(PathBuf, u64)>> = std::sync::Mutex::new(Vec::new());
594
595    let analysis_results: Vec<(PathBuf, SemanticAnalysis)> = file_entries
596        .par_iter()
597        .filter_map(|entry| {
598            // Check cancellation per file
599            if ct.is_cancelled() {
600                return None;
601            }
602
603            let ext = entry.path.extension().and_then(|e| e.to_str());
604
605            // Check file size before reading
606            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
607                tracing::debug!("skipping large file: {}", entry.path.display());
608                progress.fetch_add(1, Ordering::Relaxed);
609                return None;
610            }
611
612            // Try to read file content
613            let Ok(source) = std::fs::read_to_string(&entry.path) else {
614                progress.fetch_add(1, Ordering::Relaxed);
615                return None;
616            };
617
618            // Detect language and extract semantic information
619            let language = if let Some(ext_str) = ext {
620                language_for_extension(ext_str)
621                    .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string)
622            } else {
623                "unknown".to_string()
624            };
625
626            match SemanticExtractor::extract(
627                &source,
628                &language,
629                ast_recursion_limit,
630                parse_timeout_micros,
631            ) {
632                Ok(mut semantic) => {
633                    // Populate file path on references
634                    for r in &mut semantic.references {
635                        r.location = entry.path.display().to_string();
636                    }
637                    // Populate file path on impl_traits (already extracted during SemanticExtractor::extract)
638                    for trait_info in &mut semantic.impl_traits {
639                        trait_info.path.clone_from(&entry.path);
640                    }
641                    progress.fetch_add(1, Ordering::Relaxed);
642                    Some((entry.path.clone(), semantic))
643                }
644                Err(crate::parser::ParserError::Timeout(micros)) => {
645                    tracing::warn!(
646                        "parse timeout exceeded for {}: {} microseconds",
647                        entry.path.display(),
648                        micros
649                    );
650                    if let Ok(mut v) = timed_out.lock() {
651                        v.push((entry.path.clone(), micros));
652                    }
653                    progress.fetch_add(1, Ordering::Relaxed);
654                    None
655                }
656                Err(_) => {
657                    progress.fetch_add(1, Ordering::Relaxed);
658                    None
659                }
660            }
661        })
662        .collect();
663
664    // Check if cancelled after parallel processing
665    if ct.is_cancelled() {
666        return Err(AnalyzeError::Cancelled);
667    }
668
669    // Surface the first timeout as AnalyzeError::ParseTimeout so callers can detect it.
670    if let Ok(mut v) = timed_out.lock()
671        && let Some((path, micros)) = v.drain(..).next()
672    {
673        return Err(AnalyzeError::ParseTimeout { path, micros });
674    }
675
676    // Collect all impl-trait info from analysis results
677    let all_impl_traits: Vec<ImplTraitInfo> = analysis_results
678        .iter()
679        .flat_map(|(_, sem)| sem.impl_traits.iter().cloned())
680        .collect();
681
682    Ok((analysis_results, all_impl_traits))
683}
684
685/// Phase 2: Build call graph from analysis results.
686fn build_call_graph(
687    analysis_results: Vec<(PathBuf, SemanticAnalysis)>,
688    all_impl_traits: &[ImplTraitInfo],
689) -> Result<CallGraph, AnalyzeError> {
690    // Build call graph. Always build without impl_only filter first so we can
691    // record the unfiltered caller count before discarding those edges.
692    CallGraph::build_from_results(
693        analysis_results,
694        all_impl_traits,
695        false, // filter applied below after counting
696    )
697    .map_err(std::convert::Into::into)
698}
699
700/// Phase 3: Resolve symbol and apply `impl_only` filter.
701/// Returns (`resolved_focus`, `unfiltered_caller_count`, `impl_trait_caller_count`).
702/// CRITICAL: Must capture `unfiltered_caller_count` BEFORE `retain()`, then apply `retain()`,
703/// then compute `impl_trait_caller_count`.
704fn resolve_symbol(
705    graph: &mut CallGraph,
706    params: &InternalFocusedParams,
707) -> Result<(String, usize, usize), AnalyzeError> {
708    // Resolve symbol name using the requested match mode.
709    let resolved_focus = if params.match_mode == SymbolMatchMode::Exact {
710        let exists = graph.definitions.contains_key(&params.focus)
711            || graph.callers.contains_key(&params.focus)
712            || graph.callees.contains_key(&params.focus);
713        if exists {
714            params.focus.clone()
715        } else {
716            return Err(crate::graph::GraphError::SymbolNotFound {
717                symbol: params.focus.clone(),
718                hint: "Try match_mode=insensitive for a case-insensitive search, or match_mode=prefix to list symbols starting with this name.".to_string(),
719            }
720            .into());
721        }
722    } else {
723        graph.resolve_symbol_indexed(&params.focus, &params.match_mode)?
724    };
725
726    // Count unique callers for the focus symbol before applying impl_only filter.
727    let unfiltered_caller_count = graph.callers.get(&resolved_focus).map_or(0, |edges| {
728        edges
729            .iter()
730            .map(|e| &e.neighbor_name)
731            .collect::<std::collections::HashSet<_>>()
732            .len()
733    });
734
735    // Apply impl_only filter now if requested, then count filtered callers.
736    // Filter all caller adjacency lists so traversal and formatting are consistently
737    // restricted to impl-trait edges regardless of follow_depth.
738    let impl_trait_caller_count = if params.impl_only.unwrap_or(false) {
739        for edges in graph.callers.values_mut() {
740            edges.retain(|e| e.is_impl_trait);
741        }
742        graph.callers.get(&resolved_focus).map_or(0, |edges| {
743            edges
744                .iter()
745                .map(|e| &e.neighbor_name)
746                .collect::<std::collections::HashSet<_>>()
747                .len()
748        })
749    } else {
750        unfiltered_caller_count
751    };
752
753    Ok((
754        resolved_focus,
755        unfiltered_caller_count,
756        impl_trait_caller_count,
757    ))
758}
759
760/// Type alias for `compute_chains` return type: (`formatted_output`, `prod_chains`, `test_chains`, `outgoing_chains`, `def_count`).
761type ChainComputeResult = (
762    String,
763    Vec<InternalCallChain>,
764    Vec<InternalCallChain>,
765    Vec<InternalCallChain>,
766    usize,
767);
768
769/// Helper function to convert InternalCallChain data to CallChainEntry vec.
770/// Takes the first (depth-1) element of each chain and converts it to a CallChainEntry.
771/// Returns None if chains is empty, otherwise returns a vec of up to 10 entries.
772fn chains_to_entries(
773    chains: &[InternalCallChain],
774    root: Option<&std::path::Path>,
775) -> Option<Vec<CallChainEntry>> {
776    if chains.is_empty() {
777        return None;
778    }
779    let entries: Vec<CallChainEntry> = chains
780        .iter()
781        .take(10)
782        .filter_map(|chain| {
783            let (symbol, path, line) = chain.chain.first()?;
784            let file = match root {
785                Some(root) => path
786                    .strip_prefix(root)
787                    .unwrap_or(path.as_path())
788                    .to_string_lossy()
789                    .into_owned(),
790                None => path.to_string_lossy().into_owned(),
791            };
792            Some(CallChainEntry {
793                symbol: symbol.clone(),
794                file,
795                line: *line,
796            })
797        })
798        .collect();
799    if entries.is_empty() {
800        None
801    } else {
802        Some(entries)
803    }
804}
805
806/// Phase 4: Compute chains and format output.
807fn compute_chains(
808    graph: &CallGraph,
809    resolved_focus: &str,
810    root: &Path,
811    params: &InternalFocusedParams,
812    unfiltered_caller_count: usize,
813    impl_trait_caller_count: usize,
814    def_use_sites: &[crate::types::DefUseSite],
815) -> Result<ChainComputeResult, AnalyzeError> {
816    // Compute chain data for pagination (always, regardless of summary mode)
817    let def_count = graph.definitions.get(resolved_focus).map_or(0, Vec::len);
818    let incoming_chains = graph.find_incoming_chains(resolved_focus, params.follow_depth)?;
819    let outgoing_chains = graph.find_outgoing_chains(resolved_focus, params.follow_depth)?;
820
821    let (prod_chains, test_chains): (Vec<_>, Vec<_>) =
822        incoming_chains.iter().cloned().partition(|chain| {
823            chain
824                .chain
825                .first()
826                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
827        });
828
829    // Format output with pre-computed chains
830    let mut formatted = if params.use_summary {
831        format_focused_summary_internal(
832            graph,
833            resolved_focus,
834            params.follow_depth,
835            Some(root),
836            Some(&incoming_chains),
837            Some(&outgoing_chains),
838            def_use_sites,
839        )?
840    } else {
841        format_focused_internal(
842            graph,
843            resolved_focus,
844            params.follow_depth,
845            Some(root),
846            Some(&incoming_chains),
847            Some(&outgoing_chains),
848            def_use_sites,
849        )?
850    };
851
852    // Add FILTER header if impl_only filter was applied
853    if params.impl_only.unwrap_or(false) {
854        let filter_header = format!(
855            "FILTER: impl_only=true ({impl_trait_caller_count} of {unfiltered_caller_count} callers shown)\n",
856        );
857        formatted = format!("{filter_header}{formatted}");
858    }
859
860    Ok((
861        formatted,
862        prod_chains,
863        test_chains,
864        outgoing_chains,
865        def_count,
866    ))
867}
868
869/// Analyze a symbol's call graph across a directory with progress tracking.
870// public API; callers expect owned semantics
871#[allow(clippy::needless_pass_by_value)]
872pub fn analyze_focused_with_progress(
873    root: &Path,
874    params: &FocusedAnalysisConfig,
875    progress: Arc<AtomicUsize>,
876    ct: CancellationToken,
877) -> Result<FocusedAnalysisOutput, AnalyzeError> {
878    let entries = walk_directory(root, params.max_depth)?;
879    let internal_params = InternalFocusedParams {
880        focus: params.focus.clone(),
881        match_mode: params.match_mode.clone(),
882        follow_depth: params.follow_depth,
883        ast_recursion_limit: params.ast_recursion_limit,
884        use_summary: params.use_summary,
885        impl_only: params.impl_only,
886        def_use: params.def_use,
887        parse_timeout_micros: params.parse_timeout_micros,
888    };
889    analyze_focused_with_progress_with_entries_internal(
890        root,
891        params.max_depth,
892        &progress,
893        &ct,
894        &internal_params,
895        &entries,
896    )
897}
898
899/// Internal implementation of focused analysis using pre-walked entries and params struct.
900#[instrument(skip_all, fields(path = %root.display(), symbol = %params.focus))]
901fn analyze_focused_with_progress_with_entries_internal(
902    root: &Path,
903    _max_depth: Option<u32>,
904    progress: &Arc<AtomicUsize>,
905    ct: &CancellationToken,
906    params: &InternalFocusedParams,
907    entries: &[WalkEntry],
908) -> Result<FocusedAnalysisOutput, AnalyzeError> {
909    // Check if already cancelled
910    if ct.is_cancelled() {
911        return Err(AnalyzeError::Cancelled);
912    }
913
914    // Check if path is a file (hint to use directory)
915    if root.is_file() {
916        let formatted =
917            "Single-file focus not supported. Please provide a directory path for cross-file call graph analysis.\n"
918                .to_string();
919        return Ok(FocusedAnalysisOutput {
920            formatted,
921            next_cursor: None,
922            prod_chains: vec![],
923            test_chains: vec![],
924            outgoing_chains: vec![],
925            def_count: 0,
926            unfiltered_caller_count: 0,
927            impl_trait_caller_count: 0,
928            callers: None,
929            test_callers: None,
930            callees: None,
931            def_use_sites: vec![],
932        });
933    }
934
935    // Phase 1: Collect file analysis
936    let (analysis_results, all_impl_traits) = collect_file_analysis(
937        entries,
938        progress,
939        ct,
940        params.ast_recursion_limit,
941        params.parse_timeout_micros,
942    )?;
943
944    // Check for cancellation before building the call graph (phase 2)
945    if ct.is_cancelled() {
946        return Err(AnalyzeError::Cancelled);
947    }
948
949    // Phase 2: Build call graph
950    let mut graph = build_call_graph(analysis_results, &all_impl_traits)?;
951
952    // Check for cancellation before resolving the symbol (phase 3)
953    if ct.is_cancelled() {
954        return Err(AnalyzeError::Cancelled);
955    }
956
957    // Phase 3: Resolve symbol and apply impl_only filter.
958    // When def_use=true and the symbol is not in the call graph (e.g. a variable),
959    // fall through to def-use extraction instead of returning SymbolNotFound.
960    let resolve_result = resolve_symbol(&mut graph, params);
961    if let Err(AnalyzeError::Graph(crate::graph::GraphError::SymbolNotFound { .. })) =
962        &resolve_result
963    {
964        // Deliberately not collapsed: resolve_result must stay alive past this block
965        // so that the `?` below can propagate non-SymbolNotFound errors.
966        if params.def_use {
967            let def_use_sites =
968                collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct);
969            if def_use_sites.is_empty() {
970                // Symbol not found anywhere (neither in call graph nor as def/use site).
971                // Propagate the original SymbolNotFound error instead of returning an
972                // empty success response.
973                return Err(resolve_result.unwrap_err());
974            }
975            use std::fmt::Write as _;
976            let mut formatted = String::new();
977            let _ = writeln!(
978                formatted,
979                "FOCUS: {} (0 defs, 0 callers, 0 callees)",
980                params.focus
981            );
982            {
983                let writes = def_use_sites
984                    .iter()
985                    .filter(|s| {
986                        matches!(
987                            s.kind,
988                            crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
989                        )
990                    })
991                    .count();
992                let reads = def_use_sites
993                    .iter()
994                    .filter(|s| s.kind == crate::types::DefUseKind::Read)
995                    .count();
996                let _ = writeln!(
997                    formatted,
998                    "DEF-USE SITES  {}  ({} total: {} writes, {} reads)",
999                    params.focus,
1000                    def_use_sites.len(),
1001                    writes,
1002                    reads
1003                );
1004            }
1005            return Ok(FocusedAnalysisOutput {
1006                formatted,
1007                next_cursor: None,
1008                callers: None,
1009                test_callers: None,
1010                callees: None,
1011                prod_chains: vec![],
1012                test_chains: vec![],
1013                outgoing_chains: vec![],
1014                def_count: 0,
1015                unfiltered_caller_count: 0,
1016                impl_trait_caller_count: 0,
1017                def_use_sites,
1018            });
1019        }
1020    }
1021    let (resolved_focus, unfiltered_caller_count, impl_trait_caller_count) = resolve_result?;
1022
1023    // Check for cancellation before computing chains (phase 4)
1024    if ct.is_cancelled() {
1025        return Err(AnalyzeError::Cancelled);
1026    }
1027
1028    // Phase 5 (optional, before formatting): Def-use site extraction.
1029    // Use params.focus (the raw user-supplied string) rather than resolved_focus
1030    // so that variable/field names that are not in the call graph still work.
1031    let def_use_sites = if params.def_use {
1032        collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct)
1033    } else {
1034        Vec::new()
1035    };
1036
1037    // Phase 4: Compute chains and format output (includes def_use_sites in one pass)
1038    let (formatted, prod_chains, test_chains, outgoing_chains, def_count) = compute_chains(
1039        &graph,
1040        &resolved_focus,
1041        root,
1042        params,
1043        unfiltered_caller_count,
1044        impl_trait_caller_count,
1045        &def_use_sites,
1046    )?;
1047
1048    // Compute depth-1 chains for structured output fields (always direct relationships only,
1049    // regardless of `follow_depth` used for the text-formatted output).
1050    let (depth1_callers, depth1_test_callers, depth1_callees) = if params.follow_depth <= 1 {
1051        // Chains already at depth 1; reuse the partitioned vecs.
1052        let callers = chains_to_entries(&prod_chains, Some(root));
1053        let test_callers = chains_to_entries(&test_chains, Some(root));
1054        let callees = chains_to_entries(&outgoing_chains, Some(root));
1055        (callers, test_callers, callees)
1056    } else {
1057        // follow_depth > 1: re-query at depth 1 to get only direct edges.
1058        let incoming1 = graph
1059            .find_incoming_chains(&resolved_focus, 1)
1060            .unwrap_or_default();
1061        let outgoing1 = graph
1062            .find_outgoing_chains(&resolved_focus, 1)
1063            .unwrap_or_default();
1064        let (prod1, test1): (Vec<_>, Vec<_>) = incoming1.into_iter().partition(|chain| {
1065            chain
1066                .chain
1067                .first()
1068                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
1069        });
1070        let callers = chains_to_entries(&prod1, Some(root));
1071        let test_callers = chains_to_entries(&test1, Some(root));
1072        let callees = chains_to_entries(&outgoing1, Some(root));
1073        (callers, test_callers, callees)
1074    };
1075
1076    Ok(FocusedAnalysisOutput {
1077        formatted,
1078        next_cursor: None,
1079        callers: depth1_callers,
1080        test_callers: depth1_test_callers,
1081        callees: depth1_callees,
1082        prod_chains,
1083        test_chains,
1084        outgoing_chains,
1085        def_count,
1086        unfiltered_caller_count,
1087        impl_trait_caller_count,
1088        def_use_sites,
1089    })
1090}
1091
1092/// Phase 5: Extract def-use sites for `symbol` across all entries.
1093/// Writes go before reads; within each kind ordered by file, line, then column.
1094fn collect_def_use_sites(
1095    entries: &[WalkEntry],
1096    symbol: &str,
1097    ast_recursion_limit: Option<usize>,
1098    root: &std::path::Path,
1099    ct: &CancellationToken,
1100) -> Vec<crate::types::DefUseSite> {
1101    use crate::parser::SemanticExtractor;
1102
1103    let file_entries: Vec<&WalkEntry> = entries
1104        .iter()
1105        .filter(|e| !e.is_dir && !e.is_symlink)
1106        .collect();
1107
1108    let mut sites: Vec<crate::types::DefUseSite> = file_entries
1109        .par_iter()
1110        .filter_map(|entry| {
1111            if ct.is_cancelled() {
1112                return None;
1113            }
1114
1115            // Check file size before reading
1116            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1117                tracing::debug!("skipping large file: {}", entry.path.display());
1118                return None;
1119            }
1120
1121            let Ok(source) = std::fs::read_to_string(&entry.path) else {
1122                return None;
1123            };
1124            let ext = entry
1125                .path
1126                .extension()
1127                .and_then(|e| e.to_str())
1128                .unwrap_or("");
1129            let lang = crate::lang::language_for_extension(ext)?;
1130            let file_path = entry
1131                .path
1132                .strip_prefix(root)
1133                .unwrap_or(&entry.path)
1134                .display()
1135                .to_string();
1136            let sites = SemanticExtractor::extract_def_use_for_file(
1137                &source,
1138                lang,
1139                symbol,
1140                &file_path,
1141                ast_recursion_limit,
1142            );
1143            if sites.is_empty() { None } else { Some(sites) }
1144        })
1145        .flatten()
1146        .collect();
1147
1148    // Writes before reads; within each kind: file, line, then column for deterministic order
1149    sites.sort_by(|a, b| {
1150        use crate::types::DefUseKind;
1151        let kind_ord = |k: &DefUseKind| match k {
1152            DefUseKind::Write | DefUseKind::WriteRead => 0,
1153            DefUseKind::Read => 1,
1154        };
1155        kind_ord(&a.kind)
1156            .cmp(&kind_ord(&b.kind))
1157            .then_with(|| a.file.cmp(&b.file))
1158            .then_with(|| a.line.cmp(&b.line))
1159            .then_with(|| a.column.cmp(&b.column))
1160    });
1161
1162    sites
1163}
1164
1165/// Analyze a symbol's call graph using pre-walked directory entries.
1166pub fn analyze_focused_with_progress_with_entries(
1167    root: &Path,
1168    params: &FocusedAnalysisConfig,
1169    progress: &Arc<AtomicUsize>,
1170    ct: &CancellationToken,
1171    entries: &[WalkEntry],
1172) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1173    let internal_params = InternalFocusedParams {
1174        focus: params.focus.clone(),
1175        match_mode: params.match_mode.clone(),
1176        follow_depth: params.follow_depth,
1177        ast_recursion_limit: params.ast_recursion_limit,
1178        use_summary: params.use_summary,
1179        impl_only: params.impl_only,
1180        def_use: params.def_use,
1181        parse_timeout_micros: params.parse_timeout_micros,
1182    };
1183    analyze_focused_with_progress_with_entries_internal(
1184        root,
1185        params.max_depth,
1186        progress,
1187        ct,
1188        &internal_params,
1189        entries,
1190    )
1191}
1192
1193#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
1194pub fn analyze_focused(
1195    root: &Path,
1196    focus: &str,
1197    follow_depth: u32,
1198    max_depth: Option<u32>,
1199    ast_recursion_limit: Option<usize>,
1200) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1201    let entries = walk_directory(root, max_depth)?;
1202    let counter = Arc::new(AtomicUsize::new(0));
1203    let ct = CancellationToken::new();
1204    let params = FocusedAnalysisConfig {
1205        focus: focus.to_string(),
1206        match_mode: SymbolMatchMode::Exact,
1207        follow_depth,
1208        max_depth,
1209        ast_recursion_limit,
1210        use_summary: false,
1211        impl_only: None,
1212        def_use: false,
1213        parse_timeout_micros: None,
1214    };
1215    analyze_focused_with_progress_with_entries(root, &params, &counter, &ct, &entries)
1216}
1217
1218/// Analyze a single file and return a minimal fixed schema (name, line count, language,
1219/// functions, imports) for lightweight code understanding.
1220#[instrument(skip_all, fields(path))]
1221pub fn analyze_module_file(path: &str) -> Result<crate::types::ModuleInfo, AnalyzeError> {
1222    // Check file size before reading
1223    if Path::new(path).metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1224        tracing::debug!("skipping large file: {}", path);
1225        return Err(AnalyzeError::Parser(
1226            crate::parser::ParserError::ParseError("file too large".to_string()),
1227        ));
1228    }
1229
1230    let source = std::fs::read_to_string(path)
1231        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
1232
1233    let file_path = Path::new(path);
1234    let name = file_path
1235        .file_name()
1236        .and_then(|s| s.to_str())
1237        .unwrap_or("unknown")
1238        .to_string();
1239
1240    let line_count = source.lines().count();
1241
1242    let language = file_path
1243        .extension()
1244        .and_then(|e| e.to_str())
1245        .and_then(language_for_extension)
1246        .ok_or_else(|| {
1247            AnalyzeError::Parser(crate::parser::ParserError::ParseError(
1248                "unsupported or missing file extension".to_string(),
1249            ))
1250        })?;
1251
1252    let mut module_info = SemanticExtractor::extract_module_info(&source, language, None)?;
1253    module_info.name = name;
1254    module_info.line_count = line_count;
1255
1256    Ok(module_info)
1257}
1258
1259/// Scan a directory for files that import a given module path.
1260///
1261/// For each non-directory walk entry, extracts imports via [`SemanticExtractor`] and
1262/// checks whether `module` matches `ImportInfo.module` or appears in `ImportInfo.items`.
1263/// Returns a [`FocusedAnalysisOutput`] whose `formatted` field lists matching files.
1264pub fn analyze_import_lookup(
1265    root: &Path,
1266    module: &str,
1267    entries: &[WalkEntry],
1268    ast_recursion_limit: Option<usize>,
1269) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1270    let matches: Vec<(PathBuf, usize)> = entries
1271        .par_iter()
1272        .filter_map(|entry| {
1273            if entry.is_dir || entry.is_symlink {
1274                tracing::debug!("skipping symlink: {}", entry.path.display());
1275                return None;
1276            }
1277            let ext = entry
1278                .path
1279                .extension()
1280                .and_then(|e| e.to_str())
1281                .and_then(crate::lang::language_for_extension)?;
1282            let source = std::fs::read_to_string(&entry.path).ok()?;
1283            let semantic =
1284                SemanticExtractor::extract(&source, ext, ast_recursion_limit, None).ok()?;
1285            for import in &semantic.imports {
1286                if import.module == module || import.items.iter().any(|item| item == module) {
1287                    return Some((entry.path.clone(), import.line));
1288                }
1289            }
1290            None
1291        })
1292        .collect();
1293
1294    let mut text = format!("IMPORT_LOOKUP: {module}\n");
1295    text.push_str(&format!("ROOT: {}\n", root.display()));
1296    text.push_str(&format!("MATCHES: {}\n", matches.len()));
1297    for (path, line) in &matches {
1298        let rel = path.strip_prefix(root).unwrap_or(path);
1299        text.push_str(&format!("  {}:{line}\n", rel.display()));
1300    }
1301
1302    Ok(FocusedAnalysisOutput {
1303        formatted: text,
1304        next_cursor: None,
1305        prod_chains: vec![],
1306        test_chains: vec![],
1307        outgoing_chains: vec![],
1308        def_count: 0,
1309        unfiltered_caller_count: 0,
1310        impl_trait_caller_count: 0,
1311        callers: None,
1312        test_callers: None,
1313        callees: None,
1314        def_use_sites: vec![],
1315    })
1316}
1317
1318/// Resolve Python wildcard imports to actual symbol names.
1319///
1320/// For each import with items=`["*"]`, this function:
1321/// 1. Parses the relative dots (if any) and climbs the directory tree
1322/// 2. Finds the target .py file or __init__.py
1323/// 3. Extracts symbols (functions and classes) from the target
1324/// 4. Honors __all__ if defined, otherwise uses function+class names
1325///
1326/// All resolution failures are non-fatal: debug-logged and the wildcard is preserved.
1327fn resolve_wildcard_imports(file_path: &Path, imports: &mut [ImportInfo]) {
1328    use std::collections::HashMap;
1329
1330    let mut resolved_cache: HashMap<PathBuf, Vec<String>> = HashMap::new();
1331    let Ok(file_path_canonical) = file_path.canonicalize() else {
1332        tracing::debug!(file = ?file_path, "unable to canonicalize current file path");
1333        return;
1334    };
1335
1336    for import in imports.iter_mut() {
1337        if import.items != ["*"] {
1338            continue;
1339        }
1340        resolve_single_wildcard(import, file_path, &file_path_canonical, &mut resolved_cache);
1341    }
1342}
1343
1344/// Validate and canonicalize a wildcard target path, checking for self-references.
1345/// Returns the canonical path if valid, or None if validation fails.
1346fn validate_wildcard_target(
1347    target_to_read: &Path,
1348    file_path_canonical: &Path,
1349    module: &str,
1350) -> Option<PathBuf> {
1351    let Ok(canonical) = target_to_read.canonicalize() else {
1352        tracing::debug!(target = ?target_to_read, import = %module, "unable to canonicalize path");
1353        return None;
1354    };
1355
1356    if canonical == file_path_canonical {
1357        tracing::debug!(target = ?canonical, import = %module, "cannot import from self");
1358        return None;
1359    }
1360
1361    Some(canonical)
1362}
1363
1364/// Resolve one wildcard import in place. On any failure the import is left unchanged.
1365fn resolve_single_wildcard(
1366    import: &mut ImportInfo,
1367    file_path: &Path,
1368    file_path_canonical: &Path,
1369    resolved_cache: &mut std::collections::HashMap<PathBuf, Vec<String>>,
1370) {
1371    let module = import.module.clone();
1372    let dot_count = module.chars().take_while(|c| *c == '.').count();
1373    if dot_count == 0 {
1374        return;
1375    }
1376    let module_path = module.trim_start_matches('.');
1377
1378    let Some(target_to_read) = locate_target_file(file_path, dot_count, module_path, &module)
1379    else {
1380        return;
1381    };
1382
1383    let Some(canonical) = validate_wildcard_target(&target_to_read, file_path_canonical, &module)
1384    else {
1385        return;
1386    };
1387
1388    if let Some(cached) = resolved_cache.get(&canonical) {
1389        tracing::debug!(import = %module, symbols_count = cached.len(), "using cached symbols");
1390        import.items.clone_from(cached);
1391        return;
1392    }
1393
1394    if let Some(symbols) = parse_target_symbols(&target_to_read, &module) {
1395        tracing::debug!(import = %module, resolved_count = symbols.len(), "wildcard import resolved");
1396        import.items.clone_from(&symbols);
1397        resolved_cache.insert(canonical, symbols);
1398    }
1399}
1400
1401/// Locate the .py file that a wildcard import refers to. Returns None if not found.
1402fn locate_target_file(
1403    file_path: &Path,
1404    dot_count: usize,
1405    module_path: &str,
1406    module: &str,
1407) -> Option<PathBuf> {
1408    let mut target_dir = file_path.parent()?.to_path_buf();
1409
1410    for _ in 1..dot_count {
1411        if !target_dir.pop() {
1412            tracing::debug!(import = %module, "unable to climb {} levels", dot_count.saturating_sub(1));
1413            return None;
1414        }
1415    }
1416
1417    let target_file = if module_path.is_empty() {
1418        target_dir.join("__init__.py")
1419    } else {
1420        let rel_path = module_path.replace('.', "/");
1421        target_dir.join(format!("{rel_path}.py"))
1422    };
1423
1424    if target_file.exists() {
1425        Some(target_file)
1426    } else if target_file.with_extension("").is_dir() {
1427        let init = target_file.with_extension("").join("__init__.py");
1428        if init.exists() { Some(init) } else { None }
1429    } else {
1430        tracing::debug!(target = ?target_file, import = %module, "target file not found");
1431        None
1432    }
1433}
1434
1435/// Build a tree-sitter parser for Python and parse the source code.
1436fn build_parser_for_file(source: &str) -> Option<tree_sitter::Tree> {
1437    use tree_sitter::Parser;
1438
1439    let lang_info = crate::languages::get_language_info("python")?;
1440    let mut parser = Parser::new();
1441    if parser.set_language(&lang_info.language).is_err() {
1442        return None;
1443    }
1444    parser.parse(source, None)
1445}
1446
1447/// Extract all public symbols from a parsed tree (functions and classes).
1448fn extract_all_symbols(tree: &tree_sitter::Tree, source: &str) -> Vec<String> {
1449    let mut symbols = Vec::new();
1450    let root = tree.root_node();
1451    let mut cursor = root.walk();
1452    for child in root.children(&mut cursor) {
1453        if matches!(child.kind(), "function_definition" | "class_definition")
1454            && let Some(name_node) = child.child_by_field_name("name")
1455        {
1456            let name = source[name_node.start_byte()..name_node.end_byte()].to_string();
1457            if !name.starts_with('_') {
1458                symbols.push(name);
1459            }
1460        }
1461    }
1462    symbols
1463}
1464
1465/// Try to resolve symbols from __all__ or fallback to function/class extraction.
1466fn resolve_symbols_from_tree(tree: &tree_sitter::Tree, source: &str, module: &str) -> Vec<String> {
1467    let mut symbols = Vec::new();
1468    extract_all_from_tree(tree, source, &mut symbols);
1469    if !symbols.is_empty() {
1470        tracing::debug!(import = %module, symbols = ?symbols, "using __all__ symbols");
1471        return symbols;
1472    }
1473
1474    // Fallback: extract functions/classes from the tree
1475    let symbols = extract_all_symbols(tree, source);
1476    tracing::debug!(import = %module, fallback_symbols = ?symbols, "using fallback function/class names");
1477    symbols
1478}
1479
1480/// Read and parse a target .py file, returning its exported symbols.
1481fn parse_target_symbols(target_path: &Path, module: &str) -> Option<Vec<String>> {
1482    // Check file size before reading
1483    if target_path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1484        tracing::debug!("skipping large file: {}", target_path.display());
1485        return None;
1486    }
1487
1488    let source = match std::fs::read_to_string(target_path) {
1489        Ok(s) => s,
1490        Err(e) => {
1491            tracing::debug!(target = ?target_path, import = %module, error = %e, "unable to read target file");
1492            return None;
1493        }
1494    };
1495
1496    // Parse once with tree-sitter
1497    let tree = build_parser_for_file(&source)?;
1498
1499    // Try to extract __all__ or fallback to function/class extraction
1500    let symbols = resolve_symbols_from_tree(&tree, &source, module);
1501    Some(symbols)
1502}
1503
1504/// Extract __all__ from a tree-sitter tree.
1505fn extract_all_from_tree(tree: &tree_sitter::Tree, source: &str, result: &mut Vec<String>) {
1506    let root = tree.root_node();
1507    let mut cursor = root.walk();
1508    for child in root.children(&mut cursor) {
1509        if child.kind() == "simple_statement" {
1510            // simple_statement contains assignment and other statement types
1511            let mut simple_cursor = child.walk();
1512            for simple_child in child.children(&mut simple_cursor) {
1513                if simple_child.kind() == "assignment"
1514                    && let Some(left) = simple_child.child_by_field_name("left")
1515                {
1516                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1517                    if target_text == "__all__"
1518                        && let Some(right) = simple_child.child_by_field_name("right")
1519                    {
1520                        extract_string_list_from_list_node(&right, source, result);
1521                    }
1522                }
1523            }
1524        } else if child.kind() == "expression_statement" {
1525            // Fallback for older Python AST structures
1526            let mut stmt_cursor = child.walk();
1527            for stmt_child in child.children(&mut stmt_cursor) {
1528                if stmt_child.kind() == "assignment"
1529                    && let Some(left) = stmt_child.child_by_field_name("left")
1530                {
1531                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1532                    if target_text == "__all__"
1533                        && let Some(right) = stmt_child.child_by_field_name("right")
1534                    {
1535                        extract_string_list_from_list_node(&right, source, result);
1536                    }
1537                }
1538            }
1539        }
1540    }
1541}
1542
1543/// Extract string literals from a Python list node.
1544fn extract_string_list_from_list_node(
1545    list_node: &tree_sitter::Node,
1546    source: &str,
1547    result: &mut Vec<String>,
1548) {
1549    let mut cursor = list_node.walk();
1550    for child in list_node.named_children(&mut cursor) {
1551        if child.kind() == "string" {
1552            let raw = source[child.start_byte()..child.end_byte()].trim();
1553            // Strip quotes: "name" -> name
1554            let unquoted = raw.trim_matches('"').trim_matches('\'').to_string();
1555            if !unquoted.is_empty() {
1556                result.push(unquoted);
1557            }
1558        }
1559    }
1560}
1561
1562/// Read a file and return its raw content with line numbers for a specified range.
1563#[cfg(test)]
1564mod tests {
1565    use super::*;
1566    use crate::formatter::format_focused_paginated;
1567    use crate::graph::InternalCallChain;
1568    use crate::pagination::{PaginationMode, decode_cursor, paginate_slice};
1569    use std::fs;
1570    use std::path::PathBuf;
1571    use tempfile::TempDir;
1572
1573    #[cfg(feature = "lang-rust")]
1574    #[test]
1575    fn analyze_str_rust_happy_path() {
1576        let source = "fn hello() -> i32 { 42 }";
1577        let result = analyze_str(source, "rs", None);
1578        assert!(result.is_ok());
1579    }
1580
1581    #[cfg(feature = "lang-python")]
1582    #[test]
1583    fn analyze_str_python_happy_path() {
1584        let source = "def greet(name):\n    return f'Hello {name}'";
1585        let result = analyze_str(source, "py", None);
1586        assert!(result.is_ok());
1587    }
1588
1589    #[cfg(feature = "lang-rust")]
1590    #[test]
1591    fn analyze_str_rust_by_language_name() {
1592        let source = "fn hello() -> i32 { 42 }";
1593        let result = analyze_str(source, "rust", None);
1594        assert!(result.is_ok());
1595    }
1596
1597    #[cfg(feature = "lang-python")]
1598    #[test]
1599    fn analyze_str_python_by_language_name() {
1600        let source = "def greet(name):\n    return f'Hello {name}'";
1601        let result = analyze_str(source, "python", None);
1602        assert!(result.is_ok());
1603    }
1604
1605    #[cfg(feature = "lang-rust")]
1606    #[test]
1607    fn analyze_str_rust_mixed_case() {
1608        let source = "fn hello() -> i32 { 42 }";
1609        let result = analyze_str(source, "RuSt", None);
1610        assert!(result.is_ok());
1611    }
1612
1613    #[cfg(feature = "lang-python")]
1614    #[test]
1615    fn analyze_str_python_mixed_case() {
1616        let source = "def greet(name):\n    return f'Hello {name}'";
1617        let result = analyze_str(source, "PyThOn", None);
1618        assert!(result.is_ok());
1619    }
1620
1621    #[test]
1622    fn analyze_str_unsupported_language() {
1623        let result = analyze_str("code", "brainfuck", None);
1624        assert!(
1625            matches!(result, Err(AnalyzeError::UnsupportedLanguage(lang)) if lang == "brainfuck")
1626        );
1627    }
1628
1629    #[cfg(feature = "lang-rust")]
1630    #[test]
1631    fn test_symbol_focus_callers_pagination_first_page() {
1632        let temp_dir = TempDir::new().unwrap();
1633
1634        // Create a file with many callers of `target`
1635        let mut code = String::from("fn target() {}\n");
1636        for i in 0..15 {
1637            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1638        }
1639        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1640
1641        // Act
1642        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1643
1644        // Paginate prod callers with page_size=5
1645        let paginated = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1646            .expect("paginate failed");
1647        assert!(
1648            paginated.total >= 5,
1649            "should have enough callers to paginate"
1650        );
1651        assert!(
1652            paginated.next_cursor.is_some(),
1653            "should have next_cursor for page 1"
1654        );
1655
1656        // Verify cursor encodes callers mode
1657        assert_eq!(paginated.items.len(), 5);
1658    }
1659
1660    #[test]
1661    fn test_symbol_focus_callers_pagination_second_page() {
1662        let temp_dir = TempDir::new().unwrap();
1663
1664        let mut code = String::from("fn target() {}\n");
1665        for i in 0..12 {
1666            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1667        }
1668        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1669
1670        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1671        let total_prod = output.prod_chains.len();
1672
1673        if total_prod > 5 {
1674            // Get page 1 cursor
1675            let p1 = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1676                .expect("paginate failed");
1677            assert!(p1.next_cursor.is_some());
1678
1679            let cursor_str = p1.next_cursor.unwrap();
1680            let cursor_data = decode_cursor(&cursor_str).expect("decode failed");
1681
1682            // Get page 2
1683            let p2 = paginate_slice(
1684                &output.prod_chains,
1685                cursor_data.offset,
1686                5,
1687                PaginationMode::Callers,
1688            )
1689            .expect("paginate failed");
1690
1691            // Format paginated output
1692            let formatted = format_focused_paginated(
1693                &p2.items,
1694                total_prod,
1695                PaginationMode::Callers,
1696                "target",
1697                &output.prod_chains,
1698                &output.test_chains,
1699                &output.outgoing_chains,
1700                output.def_count,
1701                cursor_data.offset,
1702                Some(temp_dir.path()),
1703                true,
1704            );
1705
1706            // Assert: header shows correct range for page 2
1707            let expected_start = cursor_data.offset + 1;
1708            assert!(
1709                formatted.contains(&format!("CALLERS ({}", expected_start)),
1710                "header should show page 2 range, got: {}",
1711                formatted
1712            );
1713        }
1714    }
1715
1716    #[test]
1717    fn test_chains_to_entries_empty_returns_none() {
1718        // Arrange
1719        let chains: Vec<InternalCallChain> = vec![];
1720
1721        // Act
1722        let result = chains_to_entries(&chains, None);
1723
1724        // Assert
1725        assert!(result.is_none());
1726    }
1727
1728    #[test]
1729    fn test_chains_to_entries_with_data_returns_entries() {
1730        // Arrange
1731        let chains = vec![
1732            InternalCallChain {
1733                chain: vec![("caller1".to_string(), PathBuf::from("/root/lib.rs"), 10)],
1734            },
1735            InternalCallChain {
1736                chain: vec![("caller2".to_string(), PathBuf::from("/root/other.rs"), 20)],
1737            },
1738        ];
1739        let root = PathBuf::from("/root");
1740
1741        // Act
1742        let result = chains_to_entries(&chains, Some(root.as_path()));
1743
1744        // Assert
1745        assert!(result.is_some());
1746        let entries = result.unwrap();
1747        assert_eq!(entries.len(), 2);
1748        assert_eq!(entries[0].symbol, "caller1");
1749        assert_eq!(entries[0].file, "lib.rs");
1750        assert_eq!(entries[0].line, 10);
1751        assert_eq!(entries[1].symbol, "caller2");
1752        assert_eq!(entries[1].file, "other.rs");
1753        assert_eq!(entries[1].line, 20);
1754    }
1755
1756    #[test]
1757    fn test_symbol_focus_callees_pagination() {
1758        let temp_dir = TempDir::new().unwrap();
1759
1760        // target calls many functions
1761        let mut code = String::from("fn target() {\n");
1762        for i in 0..10 {
1763            code.push_str(&format!("    callee_{:02}();\n", i));
1764        }
1765        code.push_str("}\n");
1766        for i in 0..10 {
1767            code.push_str(&format!("fn callee_{:02}() {{}}\n", i));
1768        }
1769        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1770
1771        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1772        let total_callees = output.outgoing_chains.len();
1773
1774        if total_callees > 3 {
1775            let paginated = paginate_slice(&output.outgoing_chains, 0, 3, PaginationMode::Callees)
1776                .expect("paginate failed");
1777
1778            let formatted = format_focused_paginated(
1779                &paginated.items,
1780                total_callees,
1781                PaginationMode::Callees,
1782                "target",
1783                &output.prod_chains,
1784                &output.test_chains,
1785                &output.outgoing_chains,
1786                output.def_count,
1787                0,
1788                Some(temp_dir.path()),
1789                true,
1790            );
1791
1792            assert!(
1793                formatted.contains(&format!(
1794                    "CALLEES (1-{} of {})",
1795                    paginated.items.len(),
1796                    total_callees
1797                )),
1798                "header should show callees range, got: {}",
1799                formatted
1800            );
1801        }
1802    }
1803
1804    #[test]
1805    fn test_symbol_focus_empty_prod_callers() {
1806        let temp_dir = TempDir::new().unwrap();
1807
1808        // target is only called from test functions
1809        let code = r#"
1810fn target() {}
1811
1812#[cfg(test)]
1813mod tests {
1814    use super::*;
1815    #[test]
1816    fn test_something() { target(); }
1817}
1818"#;
1819        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1820
1821        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1822
1823        // prod_chains may be empty; pagination should handle it gracefully
1824        let paginated = paginate_slice(&output.prod_chains, 0, 100, PaginationMode::Callers)
1825            .expect("paginate failed");
1826        assert_eq!(paginated.items.len(), output.prod_chains.len());
1827        assert!(
1828            paginated.next_cursor.is_none(),
1829            "no next_cursor for empty or single-page prod_chains"
1830        );
1831    }
1832
1833    #[test]
1834    fn test_impl_only_filter_header_correct_counts() {
1835        let temp_dir = TempDir::new().unwrap();
1836
1837        // Create a Rust fixture with:
1838        // - A trait definition
1839        // - An impl Trait for SomeType block that calls the focus symbol
1840        // - A regular (non-trait-impl) function that also calls the focus symbol
1841        let code = r#"
1842trait MyTrait {
1843    fn focus_symbol();
1844}
1845
1846struct SomeType;
1847
1848impl MyTrait for SomeType {
1849    fn focus_symbol() {}
1850}
1851
1852fn impl_caller() {
1853    SomeType::focus_symbol();
1854}
1855
1856fn regular_caller() {
1857    SomeType::focus_symbol();
1858}
1859"#;
1860        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1861
1862        // Call analyze_focused with impl_only=Some(true)
1863        let params = FocusedAnalysisConfig {
1864            focus: "focus_symbol".to_string(),
1865            match_mode: SymbolMatchMode::Insensitive,
1866            follow_depth: 1,
1867            max_depth: None,
1868            ast_recursion_limit: None,
1869            use_summary: false,
1870            impl_only: Some(true),
1871            def_use: false,
1872            parse_timeout_micros: None,
1873        };
1874        let output = analyze_focused_with_progress(
1875            temp_dir.path(),
1876            &params,
1877            Arc::new(AtomicUsize::new(0)),
1878            CancellationToken::new(),
1879        )
1880        .unwrap();
1881
1882        // Assert the result contains "FILTER: impl_only=true"
1883        assert!(
1884            output.formatted.contains("FILTER: impl_only=true"),
1885            "formatted output should contain FILTER header for impl_only=true, got: {}",
1886            output.formatted
1887        );
1888
1889        // Assert the retained count N < total count M
1890        assert!(
1891            output.impl_trait_caller_count < output.unfiltered_caller_count,
1892            "impl_trait_caller_count ({}) should be less than unfiltered_caller_count ({})",
1893            output.impl_trait_caller_count,
1894            output.unfiltered_caller_count
1895        );
1896
1897        // Assert format is "FILTER: impl_only=true (N of M callers shown)"
1898        let filter_line = output
1899            .formatted
1900            .lines()
1901            .find(|line| line.contains("FILTER: impl_only=true"))
1902            .expect("should find FILTER line");
1903        assert!(
1904            filter_line.contains(&format!(
1905                "({} of {} callers shown)",
1906                output.impl_trait_caller_count, output.unfiltered_caller_count
1907            )),
1908            "FILTER line should show correct N of M counts, got: {}",
1909            filter_line
1910        );
1911    }
1912
1913    #[test]
1914    fn test_callers_count_matches_formatted_output() {
1915        let temp_dir = TempDir::new().unwrap();
1916
1917        // Create a file with multiple callers of `target`
1918        let code = r#"
1919fn target() {}
1920fn caller_a() { target(); }
1921fn caller_b() { target(); }
1922fn caller_c() { target(); }
1923"#;
1924        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1925
1926        // Analyze the symbol
1927        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1928
1929        // Extract CALLERS count from formatted output
1930        let formatted = &output.formatted;
1931        let callers_count_from_output = formatted
1932            .lines()
1933            .find(|line| line.contains("FOCUS:"))
1934            .and_then(|line| {
1935                line.split(',')
1936                    .find(|part| part.contains("callers"))
1937                    .and_then(|part| {
1938                        part.trim()
1939                            .split_whitespace()
1940                            .next()
1941                            .and_then(|s| s.parse::<usize>().ok())
1942                    })
1943            })
1944            .expect("should find CALLERS count in formatted output");
1945
1946        // Compute expected count from prod_chains (unique first-caller names)
1947        let expected_callers_count = output
1948            .prod_chains
1949            .iter()
1950            .filter_map(|chain| chain.chain.first().map(|(name, _, _)| name))
1951            .collect::<std::collections::HashSet<_>>()
1952            .len();
1953
1954        assert_eq!(
1955            callers_count_from_output, expected_callers_count,
1956            "CALLERS count in formatted output should match unique-first-caller count in prod_chains"
1957        );
1958    }
1959
1960    #[cfg(feature = "lang-rust")]
1961    #[test]
1962    fn test_def_use_focused_analysis() {
1963        let temp_dir = TempDir::new().unwrap();
1964        fs::write(
1965            temp_dir.path().join("lib.rs"),
1966            "fn example() {\n    let x = 10;\n    x += 1;\n    println!(\"{}\", x);\n    let y = x + 1;\n}\n",
1967        )
1968        .unwrap();
1969
1970        let entries = walk_directory(temp_dir.path(), None).unwrap();
1971        let counter = Arc::new(AtomicUsize::new(0));
1972        let ct = CancellationToken::new();
1973        let params = FocusedAnalysisConfig {
1974            focus: "x".to_string(),
1975            match_mode: SymbolMatchMode::Exact,
1976            follow_depth: 1,
1977            max_depth: None,
1978            ast_recursion_limit: None,
1979            use_summary: false,
1980            impl_only: None,
1981            def_use: true,
1982            parse_timeout_micros: None,
1983        };
1984
1985        let output = analyze_focused_with_progress_with_entries(
1986            temp_dir.path(),
1987            &params,
1988            &counter,
1989            &ct,
1990            &entries,
1991        )
1992        .expect("def_use analysis should succeed");
1993
1994        assert!(
1995            !output.def_use_sites.is_empty(),
1996            "should find def-use sites for x"
1997        );
1998        assert!(
1999            output
2000                .def_use_sites
2001                .iter()
2002                .any(|s| s.kind == crate::types::DefUseKind::Write),
2003            "should have at least one Write site",
2004        );
2005        // No location appears as both write and read
2006        let write_locs: std::collections::HashSet<_> = output
2007            .def_use_sites
2008            .iter()
2009            .filter(|s| {
2010                matches!(
2011                    s.kind,
2012                    crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
2013                )
2014            })
2015            .map(|s| (&s.file, s.line, s.column))
2016            .collect();
2017        assert!(
2018            output
2019                .def_use_sites
2020                .iter()
2021                .filter(|s| s.kind == crate::types::DefUseKind::Read)
2022                .all(|s| !write_locs.contains(&(&s.file, s.line, s.column))),
2023            "no location should appear as both write and read",
2024        );
2025        assert!(
2026            output.formatted.contains("DEF-USE SITES"),
2027            "formatted output should contain DEF-USE SITES"
2028        );
2029    }
2030
2031    fn make_temp_file(content: &str) -> tempfile::NamedTempFile {
2032        let mut f = tempfile::NamedTempFile::new().unwrap();
2033        use std::io::Write;
2034        f.write_all(content.as_bytes()).unwrap();
2035        f.flush().unwrap();
2036        f
2037    }
2038}
aptu_coder_core/analyze.rs

aptu_coder_core/
analyze.rs