aptu_coder_core/
analyze.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Main analysis engine for extracting code structure from files and directories.
4//!
5//! Implements the four MCP tools: `analyze_directory` (Overview), `analyze_file` (`FileDetails`),
6//! `analyze_symbol` (call graph), and `analyze_module` (lightweight index). Handles parallel processing and cancellation.
7
8use crate::formatter::{
9    format_file_details, format_focused_internal, format_focused_summary_internal, format_structure,
10};
11use crate::graph::{CallGraph, InternalCallChain};
12use crate::lang::{language_for_extension, supported_languages};
13use crate::parser::{ElementExtractor, SemanticExtractor};
14use crate::test_detection::is_test_file;
15use crate::traversal::{WalkEntry, walk_directory};
16use crate::types::{
17    AnalysisMode, FileInfo, ImplTraitInfo, ImportInfo, SemanticAnalysis, SymbolMatchMode,
18};
19use rayon::prelude::*;
20#[cfg(feature = "schemars")]
21use schemars::JsonSchema;
22use serde::{Deserialize, Serialize};
23use std::path::{Path, PathBuf};
24use std::sync::Arc;
25use std::sync::atomic::{AtomicUsize, Ordering};
26use std::time::Instant;
27use thiserror::Error;
28use tokio_util::sync::CancellationToken;
29use tracing::instrument;
30
31pub const MAX_FILE_SIZE_BYTES: u64 = 10_000_000;
32
33#[derive(Debug, Error)]
34#[non_exhaustive]
35pub enum AnalyzeError {
36    #[error("Traversal error: {0}")]
37    Traversal(#[from] crate::traversal::TraversalError),
38    #[error("Parser error: {0}")]
39    Parser(#[from] crate::parser::ParserError),
40    #[error("Graph error: {0}")]
41    Graph(#[from] crate::graph::GraphError),
42    #[error("Formatter error: {0}")]
43    Formatter(#[from] crate::formatter::FormatterError),
44    #[error("Analysis cancelled")]
45    Cancelled,
46    #[error("unsupported language: {0}")]
47    UnsupportedLanguage(String),
48    #[error("I/O error: {0}")]
49    Io(#[from] std::io::Error),
50    #[error("invalid range: start ({start}) > end ({end}); file has {total} lines")]
51    InvalidRange {
52        start: usize,
53        end: usize,
54        total: usize,
55    },
56    #[error("path is a directory, not a file: {0}")]
57    NotAFile(PathBuf),
58    #[error(
59        "file has {total_lines} lines; provide start_line and end_line, or call analyze_module first to locate the range"
60    )]
61    RangelessLargeFile { total_lines: usize },
62    #[error("parse timeout exceeded for {path}: {micros} microseconds")]
63    ParseTimeout { path: PathBuf, micros: u64 },
64}
65
66/// Result of directory analysis containing both formatted output and file data.
67#[derive(Debug, Clone, Serialize)]
68#[cfg_attr(feature = "schemars", derive(JsonSchema))]
69#[non_exhaustive]
70pub struct AnalysisOutput {
71    #[cfg_attr(
72        feature = "schemars",
73        schemars(description = "Formatted text representation of the analysis")
74    )]
75    pub formatted: String,
76    #[cfg_attr(
77        feature = "schemars",
78        schemars(description = "List of files analyzed in the directory")
79    )]
80    pub files: Vec<FileInfo>,
81    /// Walk entries used internally for summary generation; not serialized.
82    #[serde(skip)]
83    #[cfg_attr(feature = "schemars", schemars(skip))]
84    pub entries: Vec<WalkEntry>,
85    /// Subtree file counts computed from an unbounded walk; used by `format_summary`; not serialized.
86    #[serde(skip)]
87    #[cfg_attr(feature = "schemars", schemars(skip))]
88    pub subtree_counts: Option<Vec<(std::path::PathBuf, usize)>>,
89    #[serde(skip_serializing_if = "Option::is_none")]
90    #[cfg_attr(
91        feature = "schemars",
92        schemars(
93            description = "Opaque cursor token for the next page of results (absent when no more results)"
94        )
95    )]
96    pub next_cursor: Option<String>,
97}
98
99/// Result of file-level semantic analysis.
100#[derive(Debug, Clone, Serialize)]
101#[cfg_attr(feature = "schemars", derive(JsonSchema))]
102#[non_exhaustive]
103pub struct FileAnalysisOutput {
104    #[cfg_attr(
105        feature = "schemars",
106        schemars(description = "Formatted text representation of the analysis")
107    )]
108    pub formatted: String,
109    #[cfg_attr(
110        feature = "schemars",
111        schemars(description = "Semantic analysis data including functions, classes, and imports")
112    )]
113    pub semantic: SemanticAnalysis,
114    #[cfg_attr(
115        feature = "schemars",
116        schemars(description = "Total line count of the analyzed file")
117    )]
118    #[cfg_attr(
119        feature = "schemars",
120        schemars(schema_with = "crate::schema_helpers::integer_schema")
121    )]
122    pub line_count: usize,
123    #[serde(skip_serializing_if = "Option::is_none")]
124    #[cfg_attr(
125        feature = "schemars",
126        schemars(
127            description = "Opaque cursor token for the next page of results (absent when no more results)"
128        )
129    )]
130    pub next_cursor: Option<String>,
131}
132
133impl FileAnalysisOutput {
134    /// Create a new `FileAnalysisOutput`.
135    #[must_use]
136    pub fn new(
137        formatted: String,
138        semantic: SemanticAnalysis,
139        line_count: usize,
140        next_cursor: Option<String>,
141    ) -> Self {
142        Self {
143            formatted,
144            semantic,
145            line_count,
146            next_cursor,
147        }
148    }
149}
150#[instrument(skip_all, fields(path = %root.display()))]
151// public API; callers expect owned semantics
152#[allow(clippy::needless_pass_by_value)]
153pub fn analyze_directory_with_progress(
154    root: &Path,
155    entries: Vec<WalkEntry>,
156    progress: Arc<AtomicUsize>,
157    ct: CancellationToken,
158) -> Result<AnalysisOutput, AnalyzeError> {
159    // Check if already cancelled
160    if ct.is_cancelled() {
161        return Err(AnalyzeError::Cancelled);
162    }
163
164    // Detect language from file extension
165    let file_entries: Vec<&WalkEntry> = entries
166        .iter()
167        .filter(|e| !e.is_dir && !e.is_symlink)
168        .collect();
169
170    let start = Instant::now();
171    tracing::debug!(file_count = file_entries.len(), root = %root.display(), "analysis start");
172
173    let _parse_span = tracing::info_span!("ast.parse_batch", count = file_entries.len()).entered();
174
175    // Parallel analysis of files
176    let analysis_results: Vec<FileInfo> = file_entries
177        .par_iter()
178        .filter_map(|entry| {
179            // Check cancellation per file
180            if ct.is_cancelled() {
181                return None;
182            }
183
184            let path_str = entry.path.display().to_string();
185
186            // Detect language from extension
187            let ext = entry.path.extension().and_then(|e| e.to_str());
188
189            // Check file size before reading
190            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
191                tracing::debug!("skipping large file: {}", entry.path.display());
192                progress.fetch_add(1, Ordering::Relaxed);
193                return None;
194            }
195
196            // Try to read file content; skip binary or unreadable files
197            let Ok(source) = std::fs::read_to_string(&entry.path) else {
198                progress.fetch_add(1, Ordering::Relaxed);
199                return None;
200            };
201
202            // Count lines
203            let line_count = source.lines().count();
204
205            // Detect language and extract counts
206            let (language, function_count, class_count) = if let Some(ext_str) = ext {
207                if let Some(lang) = language_for_extension(ext_str) {
208                    let lang_str = lang.to_string();
209                    match ElementExtractor::extract_with_depth(&source, &lang_str) {
210                        Ok((func_count, class_count)) => (lang_str, func_count, class_count),
211                        Err(_) => (lang_str, 0, 0),
212                    }
213                } else {
214                    ("unknown".to_string(), 0, 0)
215                }
216            } else {
217                ("unknown".to_string(), 0, 0)
218            };
219
220            progress.fetch_add(1, Ordering::Relaxed);
221
222            let is_test = is_test_file(&entry.path);
223
224            Some(FileInfo {
225                path: path_str,
226                line_count,
227                function_count,
228                class_count,
229                language,
230                is_test,
231            })
232        })
233        .collect();
234
235    // Check if cancelled after parallel processing
236    if ct.is_cancelled() {
237        return Err(AnalyzeError::Cancelled);
238    }
239
240    tracing::debug!(
241        file_count = file_entries.len(),
242        duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
243        "analysis complete"
244    );
245
246    let _format_span = tracing::info_span!("output.format").entered();
247
248    // Format output
249    let formatted = format_structure(&entries, &analysis_results, None);
250
251    Ok(AnalysisOutput {
252        formatted,
253        files: analysis_results,
254        entries,
255        next_cursor: None,
256        subtree_counts: None,
257    })
258}
259
260/// Analyze a directory structure and return formatted output and file data.
261#[instrument(skip_all, fields(path = %root.display()))]
262pub fn analyze_directory(
263    root: &Path,
264    max_depth: Option<u32>,
265) -> Result<AnalysisOutput, AnalyzeError> {
266    let entries = walk_directory(root, max_depth)?;
267    let counter = Arc::new(AtomicUsize::new(0));
268    let ct = CancellationToken::new();
269    analyze_directory_with_progress(root, entries, counter, ct)
270}
271
272/// Determine analysis mode based on parameters and path.
273#[must_use]
274pub fn determine_mode(path: &str, focus: Option<&str>) -> AnalysisMode {
275    if focus.is_some() {
276        return AnalysisMode::SymbolFocus;
277    }
278
279    let path_obj = Path::new(path);
280    if path_obj.is_dir() {
281        AnalysisMode::Overview
282    } else {
283        AnalysisMode::FileDetails
284    }
285}
286
287/// Analyze a single file and return semantic analysis with formatted output.
288#[instrument(skip_all, fields(path))]
289pub fn analyze_file(
290    path: &str,
291    ast_recursion_limit: Option<usize>,
292) -> Result<FileAnalysisOutput, AnalyzeError> {
293    let start = Instant::now();
294
295    // Check file size before reading
296    if Path::new(path).metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
297        tracing::debug!("skipping large file: {}", path);
298        return Err(AnalyzeError::Parser(
299            crate::parser::ParserError::ParseError("file too large".to_string()),
300        ));
301    }
302
303    let source = std::fs::read_to_string(path)
304        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
305
306    let line_count = source.lines().count();
307
308    // Detect language from extension
309    let ext = Path::new(path)
310        .extension()
311        .and_then(|e| e.to_str())
312        .and_then(language_for_extension)
313        .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string);
314
315    // Extract semantic information
316    let mut semantic = SemanticExtractor::extract(&source, &ext, ast_recursion_limit, None)?;
317
318    // Populate the file path on references now that the path is known
319    for r in &mut semantic.references {
320        r.location = path.to_string();
321    }
322
323    // Resolve Python wildcard imports
324    if ext == "python" {
325        resolve_wildcard_imports(Path::new(path), &mut semantic.imports);
326    }
327
328    // Detect if this is a test file
329    let is_test = is_test_file(Path::new(path));
330
331    // Extract parent directory for relative path display
332    let parent_dir = Path::new(path).parent();
333
334    // Format output
335    let formatted = format_file_details(path, &semantic, line_count, is_test, parent_dir);
336
337    tracing::debug!(path = %path, language = %ext, functions = semantic.functions.len(), classes = semantic.classes.len(), imports = semantic.imports.len(), duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX), "file analysis complete");
338
339    Ok(FileAnalysisOutput::new(
340        formatted, semantic, line_count, None,
341    ))
342}
343
344/// Analyze source code from a string buffer without filesystem access.
345///
346/// This function analyzes in-memory source code by language identifier. The `language`
347/// parameter can be either a language name (e.g., `"rust"`, `"python"`, `"go"`) or a file
348/// extension (e.g., `"rs"`, `"py"`).
349///
350/// Accepted language identifiers depend on compiled features. Use [`supported_languages()`] to
351/// discover the available language names at runtime, and [`language_for_extension()`] to resolve
352/// a file extension to its supported language identifier.
353///
354/// # Arguments
355///
356/// * `source` - The source code to analyze
357/// * `language` - The language identifier (language name or extension)
358/// * `ast_recursion_limit` - Optional limit for AST traversal depth
359///
360/// # Returns
361///
362/// - `Ok(FileAnalysisOutput)` on success
363/// - `Err(AnalyzeError::UnsupportedLanguage)` if the language is not recognized
364/// - `Err(AnalyzeError::Parser)` if parsing fails
365///
366/// # Notes
367///
368/// - Python wildcard import resolution is skipped for in-memory analysis (no filesystem path available)
369/// - The formatted output uses the standard file-details formatter, so it includes a `FILE:` header with an empty path
370#[inline]
371pub fn analyze_str(
372    source: &str,
373    language: &str,
374    ast_recursion_limit: Option<usize>,
375) -> Result<FileAnalysisOutput, AnalyzeError> {
376    // Resolve language: first try as a file extension, then as a language name
377    // (case-insensitive match against supported_languages()).
378    let lang = language_for_extension(language).or_else(|| {
379        let lower = language.to_ascii_lowercase();
380        supported_languages()
381            .iter()
382            .find(|&&name| name == lower)
383            .copied()
384    });
385    let lang = lang.ok_or_else(|| AnalyzeError::UnsupportedLanguage(language.to_string()))?;
386
387    // Extract semantic information
388    let mut semantic = SemanticExtractor::extract(source, lang, ast_recursion_limit, None)?;
389
390    // Populate a stable in-memory sentinel on all reference locations
391    for r in &mut semantic.references {
392        r.location = "<memory>".to_string();
393    }
394
395    // Count lines in the source
396    let line_count = source.lines().count();
397
398    // Format output with empty path (no filesystem access)
399    let formatted = format_file_details("", &semantic, line_count, false, None);
400
401    Ok(FileAnalysisOutput::new(
402        formatted, semantic, line_count, None,
403    ))
404}
405
406/// Single entry in a call chain (depth-1 direct caller or callee).
407#[non_exhaustive]
408#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
409#[cfg_attr(feature = "schemars", derive(JsonSchema))]
410pub struct CallChainEntry {
411    #[cfg_attr(
412        feature = "schemars",
413        schemars(description = "Symbol name of the caller or callee")
414    )]
415    pub symbol: String,
416    #[cfg_attr(
417        feature = "schemars",
418        schemars(description = "File path relative to the repository root")
419    )]
420    pub file: String,
421    #[cfg_attr(
422        feature = "schemars",
423        schemars(
424            description = "Line number of the definition or call site (1-indexed)",
425            schema_with = "crate::schema_helpers::integer_schema"
426        )
427    )]
428    pub line: usize,
429}
430
431/// Result of focused symbol analysis.
432#[derive(Debug, Serialize)]
433#[cfg_attr(feature = "schemars", derive(JsonSchema))]
434#[non_exhaustive]
435pub struct FocusedAnalysisOutput {
436    #[cfg_attr(
437        feature = "schemars",
438        schemars(description = "Formatted text representation of the call graph analysis")
439    )]
440    pub formatted: String,
441    #[serde(skip_serializing_if = "Option::is_none")]
442    #[cfg_attr(
443        feature = "schemars",
444        schemars(
445            description = "Opaque cursor token for the next page of results (absent when no more results)"
446        )
447    )]
448    pub next_cursor: Option<String>,
449    /// Production caller chains (partitioned from incoming chains, excluding test callers).
450    /// Not serialized; used for pagination in lib.rs.
451    #[serde(skip)]
452    #[cfg_attr(feature = "schemars", schemars(skip))]
453    pub prod_chains: Vec<InternalCallChain>,
454    /// Test caller chains. Not serialized; used for pagination summary in lib.rs.
455    #[serde(skip)]
456    #[cfg_attr(feature = "schemars", schemars(skip))]
457    pub test_chains: Vec<InternalCallChain>,
458    /// Outgoing (callee) chains. Not serialized; used for pagination in lib.rs.
459    #[serde(skip)]
460    #[cfg_attr(feature = "schemars", schemars(skip))]
461    pub outgoing_chains: Vec<InternalCallChain>,
462    /// Number of definitions for the symbol. Not serialized; used for pagination headers.
463    #[serde(skip)]
464    #[cfg_attr(feature = "schemars", schemars(skip))]
465    pub def_count: usize,
466    /// Total unique callers before `impl_only` filter. Not serialized; used for FILTER header.
467    #[serde(skip)]
468    #[cfg_attr(feature = "schemars", schemars(skip))]
469    pub unfiltered_caller_count: usize,
470    /// Unique callers after `impl_only` filter. Not serialized; used for FILTER header.
471    #[serde(skip)]
472    #[cfg_attr(feature = "schemars", schemars(skip))]
473    pub impl_trait_caller_count: usize,
474    /// Direct (depth-1) production callers. `follow_depth` does not affect this field.
475    #[serde(skip_serializing_if = "Option::is_none")]
476    pub callers: Option<Vec<CallChainEntry>>,
477    /// Direct (depth-1) test callers. `follow_depth` does not affect this field.
478    #[serde(skip_serializing_if = "Option::is_none")]
479    pub test_callers: Option<Vec<CallChainEntry>>,
480    /// Direct (depth-1) callees. `follow_depth` does not affect this field.
481    #[serde(skip_serializing_if = "Option::is_none")]
482    pub callees: Option<Vec<CallChainEntry>>,
483    /// Definition and use sites for the symbol.
484    #[serde(default)]
485    pub def_use_sites: Vec<crate::types::DefUseSite>,
486}
487
488/// Parameters for focused symbol analysis. Groups high-arity parameters to keep
489/// function signatures under clippy's default 7-argument threshold.
490#[derive(Clone)]
491pub struct FocusedAnalysisConfig {
492    pub focus: String,
493    pub match_mode: SymbolMatchMode,
494    pub follow_depth: u32,
495    pub max_depth: Option<u32>,
496    pub ast_recursion_limit: Option<usize>,
497    pub use_summary: bool,
498    pub impl_only: Option<bool>,
499    pub def_use: bool,
500    pub parse_timeout_micros: Option<u64>,
501}
502
503/// Internal parameters for focused analysis phases.
504#[derive(Clone)]
505struct InternalFocusedParams {
506    focus: String,
507    match_mode: SymbolMatchMode,
508    follow_depth: u32,
509    ast_recursion_limit: Option<usize>,
510    use_summary: bool,
511    impl_only: Option<bool>,
512    def_use: bool,
513    parse_timeout_micros: Option<u64>,
514}
515
516/// Type alias for analysis results: (`file_path`, `semantic_analysis`) pairs and impl-trait info.
517type FileAnalysisBatch = (Vec<(PathBuf, SemanticAnalysis)>, Vec<ImplTraitInfo>);
518
519/// Phase 1: Collect semantic analysis for all files in parallel.
520fn collect_file_analysis(
521    entries: &[WalkEntry],
522    progress: &Arc<AtomicUsize>,
523    ct: &CancellationToken,
524    ast_recursion_limit: Option<usize>,
525    parse_timeout_micros: Option<u64>,
526) -> Result<FileAnalysisBatch, AnalyzeError> {
527    // Check if already cancelled
528    if ct.is_cancelled() {
529        return Err(AnalyzeError::Cancelled);
530    }
531
532    // Use pre-walked entries (passed by caller)
533    // Collect semantic analysis for all files in parallel
534    let file_entries: Vec<&WalkEntry> = entries
535        .iter()
536        .filter(|e| !e.is_dir && !e.is_symlink)
537        .collect();
538
539    // Collect per-file timeout events so they can be surfaced as AnalyzeError::ParseTimeout.
540    let timed_out: std::sync::Mutex<Vec<(PathBuf, u64)>> = std::sync::Mutex::new(Vec::new());
541
542    let analysis_results: Vec<(PathBuf, SemanticAnalysis)> = file_entries
543        .par_iter()
544        .filter_map(|entry| {
545            // Check cancellation per file
546            if ct.is_cancelled() {
547                return None;
548            }
549
550            let ext = entry.path.extension().and_then(|e| e.to_str());
551
552            // Check file size before reading
553            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
554                tracing::debug!("skipping large file: {}", entry.path.display());
555                progress.fetch_add(1, Ordering::Relaxed);
556                return None;
557            }
558
559            // Try to read file content
560            let Ok(source) = std::fs::read_to_string(&entry.path) else {
561                progress.fetch_add(1, Ordering::Relaxed);
562                return None;
563            };
564
565            // Detect language and extract semantic information
566            let language = if let Some(ext_str) = ext {
567                language_for_extension(ext_str)
568                    .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string)
569            } else {
570                "unknown".to_string()
571            };
572
573            match SemanticExtractor::extract(
574                &source,
575                &language,
576                ast_recursion_limit,
577                parse_timeout_micros,
578            ) {
579                Ok(mut semantic) => {
580                    // Populate file path on references
581                    for r in &mut semantic.references {
582                        r.location = entry.path.display().to_string();
583                    }
584                    // Populate file path on impl_traits (already extracted during SemanticExtractor::extract)
585                    for trait_info in &mut semantic.impl_traits {
586                        trait_info.path.clone_from(&entry.path);
587                    }
588                    progress.fetch_add(1, Ordering::Relaxed);
589                    Some((entry.path.clone(), semantic))
590                }
591                Err(crate::parser::ParserError::Timeout(micros)) => {
592                    tracing::warn!(
593                        "parse timeout exceeded for {}: {} microseconds",
594                        entry.path.display(),
595                        micros
596                    );
597                    if let Ok(mut v) = timed_out.lock() {
598                        v.push((entry.path.clone(), micros));
599                    }
600                    progress.fetch_add(1, Ordering::Relaxed);
601                    None
602                }
603                Err(_) => {
604                    progress.fetch_add(1, Ordering::Relaxed);
605                    None
606                }
607            }
608        })
609        .collect();
610
611    // Check if cancelled after parallel processing
612    if ct.is_cancelled() {
613        return Err(AnalyzeError::Cancelled);
614    }
615
616    // Surface the first timeout as AnalyzeError::ParseTimeout so callers can detect it.
617    if let Ok(mut v) = timed_out.lock()
618        && let Some((path, micros)) = v.drain(..).next()
619    {
620        return Err(AnalyzeError::ParseTimeout { path, micros });
621    }
622
623    // Collect all impl-trait info from analysis results
624    let all_impl_traits: Vec<ImplTraitInfo> = analysis_results
625        .iter()
626        .flat_map(|(_, sem)| sem.impl_traits.iter().cloned())
627        .collect();
628
629    Ok((analysis_results, all_impl_traits))
630}
631
632/// Phase 2: Build call graph from analysis results.
633fn build_call_graph(
634    analysis_results: Vec<(PathBuf, SemanticAnalysis)>,
635    all_impl_traits: &[ImplTraitInfo],
636) -> Result<CallGraph, AnalyzeError> {
637    // Build call graph. Always build without impl_only filter first so we can
638    // record the unfiltered caller count before discarding those edges.
639    CallGraph::build_from_results(
640        analysis_results,
641        all_impl_traits,
642        false, // filter applied below after counting
643    )
644    .map_err(std::convert::Into::into)
645}
646
647/// Phase 3: Resolve symbol and apply `impl_only` filter.
648/// Returns (`resolved_focus`, `unfiltered_caller_count`, `impl_trait_caller_count`).
649/// CRITICAL: Must capture `unfiltered_caller_count` BEFORE `retain()`, then apply `retain()`,
650/// then compute `impl_trait_caller_count`.
651fn resolve_symbol(
652    graph: &mut CallGraph,
653    params: &InternalFocusedParams,
654) -> Result<(String, usize, usize), AnalyzeError> {
655    // Resolve symbol name using the requested match mode.
656    let resolved_focus = if params.match_mode == SymbolMatchMode::Exact {
657        let exists = graph.definitions.contains_key(&params.focus)
658            || graph.callers.contains_key(&params.focus)
659            || graph.callees.contains_key(&params.focus);
660        if exists {
661            params.focus.clone()
662        } else {
663            return Err(crate::graph::GraphError::SymbolNotFound {
664                symbol: params.focus.clone(),
665                hint: "Try match_mode=insensitive for a case-insensitive search, or match_mode=prefix to list symbols starting with this name.".to_string(),
666            }
667            .into());
668        }
669    } else {
670        graph.resolve_symbol_indexed(&params.focus, &params.match_mode)?
671    };
672
673    // Count unique callers for the focus symbol before applying impl_only filter.
674    let unfiltered_caller_count = graph.callers.get(&resolved_focus).map_or(0, |edges| {
675        edges
676            .iter()
677            .map(|e| &e.neighbor_name)
678            .collect::<std::collections::HashSet<_>>()
679            .len()
680    });
681
682    // Apply impl_only filter now if requested, then count filtered callers.
683    // Filter all caller adjacency lists so traversal and formatting are consistently
684    // restricted to impl-trait edges regardless of follow_depth.
685    let impl_trait_caller_count = if params.impl_only.unwrap_or(false) {
686        for edges in graph.callers.values_mut() {
687            edges.retain(|e| e.is_impl_trait);
688        }
689        graph.callers.get(&resolved_focus).map_or(0, |edges| {
690            edges
691                .iter()
692                .map(|e| &e.neighbor_name)
693                .collect::<std::collections::HashSet<_>>()
694                .len()
695        })
696    } else {
697        unfiltered_caller_count
698    };
699
700    Ok((
701        resolved_focus,
702        unfiltered_caller_count,
703        impl_trait_caller_count,
704    ))
705}
706
707/// Type alias for `compute_chains` return type: (`formatted_output`, `prod_chains`, `test_chains`, `outgoing_chains`, `def_count`).
708type ChainComputeResult = (
709    String,
710    Vec<InternalCallChain>,
711    Vec<InternalCallChain>,
712    Vec<InternalCallChain>,
713    usize,
714);
715
716/// Helper function to convert InternalCallChain data to CallChainEntry vec.
717/// Takes the first (depth-1) element of each chain and converts it to a CallChainEntry.
718/// Returns None if chains is empty, otherwise returns a vec of up to 10 entries.
719fn chains_to_entries(
720    chains: &[InternalCallChain],
721    root: Option<&std::path::Path>,
722) -> Option<Vec<CallChainEntry>> {
723    if chains.is_empty() {
724        return None;
725    }
726    let entries: Vec<CallChainEntry> = chains
727        .iter()
728        .take(10)
729        .filter_map(|chain| {
730            let (symbol, path, line) = chain.chain.first()?;
731            let file = match root {
732                Some(root) => path
733                    .strip_prefix(root)
734                    .unwrap_or(path.as_path())
735                    .to_string_lossy()
736                    .into_owned(),
737                None => path.to_string_lossy().into_owned(),
738            };
739            Some(CallChainEntry {
740                symbol: symbol.clone(),
741                file,
742                line: *line,
743            })
744        })
745        .collect();
746    if entries.is_empty() {
747        None
748    } else {
749        Some(entries)
750    }
751}
752
753/// Phase 4: Compute chains and format output.
754fn compute_chains(
755    graph: &CallGraph,
756    resolved_focus: &str,
757    root: &Path,
758    params: &InternalFocusedParams,
759    unfiltered_caller_count: usize,
760    impl_trait_caller_count: usize,
761    def_use_sites: &[crate::types::DefUseSite],
762) -> Result<ChainComputeResult, AnalyzeError> {
763    // Compute chain data for pagination (always, regardless of summary mode)
764    let def_count = graph.definitions.get(resolved_focus).map_or(0, Vec::len);
765    let incoming_chains = graph.find_incoming_chains(resolved_focus, params.follow_depth)?;
766    let outgoing_chains = graph.find_outgoing_chains(resolved_focus, params.follow_depth)?;
767
768    let (prod_chains, test_chains): (Vec<_>, Vec<_>) =
769        incoming_chains.iter().cloned().partition(|chain| {
770            chain
771                .chain
772                .first()
773                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
774        });
775
776    // Format output with pre-computed chains
777    let mut formatted = if params.use_summary {
778        format_focused_summary_internal(
779            graph,
780            resolved_focus,
781            params.follow_depth,
782            Some(root),
783            Some(&incoming_chains),
784            Some(&outgoing_chains),
785            def_use_sites,
786        )?
787    } else {
788        format_focused_internal(
789            graph,
790            resolved_focus,
791            params.follow_depth,
792            Some(root),
793            Some(&incoming_chains),
794            Some(&outgoing_chains),
795            def_use_sites,
796        )?
797    };
798
799    // Add FILTER header if impl_only filter was applied
800    if params.impl_only.unwrap_or(false) {
801        let filter_header = format!(
802            "FILTER: impl_only=true ({impl_trait_caller_count} of {unfiltered_caller_count} callers shown)\n",
803        );
804        formatted = format!("{filter_header}{formatted}");
805    }
806
807    Ok((
808        formatted,
809        prod_chains,
810        test_chains,
811        outgoing_chains,
812        def_count,
813    ))
814}
815
816/// Analyze a symbol's call graph across a directory with progress tracking.
817// public API; callers expect owned semantics
818#[allow(clippy::needless_pass_by_value)]
819pub fn analyze_focused_with_progress(
820    root: &Path,
821    params: &FocusedAnalysisConfig,
822    progress: Arc<AtomicUsize>,
823    ct: CancellationToken,
824) -> Result<FocusedAnalysisOutput, AnalyzeError> {
825    let entries = walk_directory(root, params.max_depth)?;
826    let internal_params = InternalFocusedParams {
827        focus: params.focus.clone(),
828        match_mode: params.match_mode.clone(),
829        follow_depth: params.follow_depth,
830        ast_recursion_limit: params.ast_recursion_limit,
831        use_summary: params.use_summary,
832        impl_only: params.impl_only,
833        def_use: params.def_use,
834        parse_timeout_micros: params.parse_timeout_micros,
835    };
836    analyze_focused_with_progress_with_entries_internal(
837        root,
838        params.max_depth,
839        &progress,
840        &ct,
841        &internal_params,
842        &entries,
843    )
844}
845
846/// Internal implementation of focused analysis using pre-walked entries and params struct.
847#[instrument(skip_all, fields(path = %root.display(), symbol = %params.focus))]
848fn analyze_focused_with_progress_with_entries_internal(
849    root: &Path,
850    _max_depth: Option<u32>,
851    progress: &Arc<AtomicUsize>,
852    ct: &CancellationToken,
853    params: &InternalFocusedParams,
854    entries: &[WalkEntry],
855) -> Result<FocusedAnalysisOutput, AnalyzeError> {
856    // Check if already cancelled
857    if ct.is_cancelled() {
858        return Err(AnalyzeError::Cancelled);
859    }
860
861    // Check if path is a file (hint to use directory)
862    if root.is_file() {
863        let formatted =
864            "Single-file focus not supported. Please provide a directory path for cross-file call graph analysis.\n"
865                .to_string();
866        return Ok(FocusedAnalysisOutput {
867            formatted,
868            next_cursor: None,
869            prod_chains: vec![],
870            test_chains: vec![],
871            outgoing_chains: vec![],
872            def_count: 0,
873            unfiltered_caller_count: 0,
874            impl_trait_caller_count: 0,
875            callers: None,
876            test_callers: None,
877            callees: None,
878            def_use_sites: vec![],
879        });
880    }
881
882    // Phase 1: Collect file analysis
883    let (analysis_results, all_impl_traits) = collect_file_analysis(
884        entries,
885        progress,
886        ct,
887        params.ast_recursion_limit,
888        params.parse_timeout_micros,
889    )?;
890
891    // Check for cancellation before building the call graph (phase 2)
892    if ct.is_cancelled() {
893        return Err(AnalyzeError::Cancelled);
894    }
895
896    // Phase 2: Build call graph
897    let mut graph = build_call_graph(analysis_results, &all_impl_traits)?;
898
899    // Check for cancellation before resolving the symbol (phase 3)
900    if ct.is_cancelled() {
901        return Err(AnalyzeError::Cancelled);
902    }
903
904    // Phase 3: Resolve symbol and apply impl_only filter.
905    // When def_use=true and the symbol is not in the call graph (e.g. a variable),
906    // fall through to def-use extraction instead of returning SymbolNotFound.
907    let resolve_result = resolve_symbol(&mut graph, params);
908    if let Err(AnalyzeError::Graph(crate::graph::GraphError::SymbolNotFound { .. })) =
909        &resolve_result
910    {
911        // Deliberately not collapsed: resolve_result must stay alive past this block
912        // so that the `?` below can propagate non-SymbolNotFound errors.
913        if params.def_use {
914            let def_use_sites =
915                collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct);
916            if def_use_sites.is_empty() {
917                // Symbol not found anywhere (neither in call graph nor as def/use site).
918                // Propagate the original SymbolNotFound error instead of returning an
919                // empty success response.
920                return Err(resolve_result.unwrap_err());
921            }
922            use std::fmt::Write as _;
923            let mut formatted = String::new();
924            let _ = writeln!(
925                formatted,
926                "FOCUS: {} (0 defs, 0 callers, 0 callees)",
927                params.focus
928            );
929            {
930                let writes = def_use_sites
931                    .iter()
932                    .filter(|s| {
933                        matches!(
934                            s.kind,
935                            crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
936                        )
937                    })
938                    .count();
939                let reads = def_use_sites
940                    .iter()
941                    .filter(|s| s.kind == crate::types::DefUseKind::Read)
942                    .count();
943                let _ = writeln!(
944                    formatted,
945                    "DEF-USE SITES  {}  ({} total: {} writes, {} reads)",
946                    params.focus,
947                    def_use_sites.len(),
948                    writes,
949                    reads
950                );
951            }
952            return Ok(FocusedAnalysisOutput {
953                formatted,
954                next_cursor: None,
955                callers: None,
956                test_callers: None,
957                callees: None,
958                prod_chains: vec![],
959                test_chains: vec![],
960                outgoing_chains: vec![],
961                def_count: 0,
962                unfiltered_caller_count: 0,
963                impl_trait_caller_count: 0,
964                def_use_sites,
965            });
966        }
967    }
968    let (resolved_focus, unfiltered_caller_count, impl_trait_caller_count) = resolve_result?;
969
970    // Check for cancellation before computing chains (phase 4)
971    if ct.is_cancelled() {
972        return Err(AnalyzeError::Cancelled);
973    }
974
975    // Phase 5 (optional, before formatting): Def-use site extraction.
976    // Use params.focus (the raw user-supplied string) rather than resolved_focus
977    // so that variable/field names that are not in the call graph still work.
978    let def_use_sites = if params.def_use {
979        collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct)
980    } else {
981        Vec::new()
982    };
983
984    // Phase 4: Compute chains and format output (includes def_use_sites in one pass)
985    let (formatted, prod_chains, test_chains, outgoing_chains, def_count) = compute_chains(
986        &graph,
987        &resolved_focus,
988        root,
989        params,
990        unfiltered_caller_count,
991        impl_trait_caller_count,
992        &def_use_sites,
993    )?;
994
995    // Compute depth-1 chains for structured output fields (always direct relationships only,
996    // regardless of `follow_depth` used for the text-formatted output).
997    let (depth1_callers, depth1_test_callers, depth1_callees) = if params.follow_depth <= 1 {
998        // Chains already at depth 1; reuse the partitioned vecs.
999        let callers = chains_to_entries(&prod_chains, Some(root));
1000        let test_callers = chains_to_entries(&test_chains, Some(root));
1001        let callees = chains_to_entries(&outgoing_chains, Some(root));
1002        (callers, test_callers, callees)
1003    } else {
1004        // follow_depth > 1: re-query at depth 1 to get only direct edges.
1005        let incoming1 = graph
1006            .find_incoming_chains(&resolved_focus, 1)
1007            .unwrap_or_default();
1008        let outgoing1 = graph
1009            .find_outgoing_chains(&resolved_focus, 1)
1010            .unwrap_or_default();
1011        let (prod1, test1): (Vec<_>, Vec<_>) = incoming1.into_iter().partition(|chain| {
1012            chain
1013                .chain
1014                .first()
1015                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
1016        });
1017        let callers = chains_to_entries(&prod1, Some(root));
1018        let test_callers = chains_to_entries(&test1, Some(root));
1019        let callees = chains_to_entries(&outgoing1, Some(root));
1020        (callers, test_callers, callees)
1021    };
1022
1023    Ok(FocusedAnalysisOutput {
1024        formatted,
1025        next_cursor: None,
1026        callers: depth1_callers,
1027        test_callers: depth1_test_callers,
1028        callees: depth1_callees,
1029        prod_chains,
1030        test_chains,
1031        outgoing_chains,
1032        def_count,
1033        unfiltered_caller_count,
1034        impl_trait_caller_count,
1035        def_use_sites,
1036    })
1037}
1038
1039/// Phase 5: Extract def-use sites for `symbol` across all entries.
1040/// Writes go before reads; within each kind ordered by file, line, then column.
1041fn collect_def_use_sites(
1042    entries: &[WalkEntry],
1043    symbol: &str,
1044    ast_recursion_limit: Option<usize>,
1045    root: &std::path::Path,
1046    ct: &CancellationToken,
1047) -> Vec<crate::types::DefUseSite> {
1048    use crate::parser::SemanticExtractor;
1049
1050    let file_entries: Vec<&WalkEntry> = entries
1051        .iter()
1052        .filter(|e| !e.is_dir && !e.is_symlink)
1053        .collect();
1054
1055    let mut sites: Vec<crate::types::DefUseSite> = file_entries
1056        .par_iter()
1057        .filter_map(|entry| {
1058            if ct.is_cancelled() {
1059                return None;
1060            }
1061
1062            // Check file size before reading
1063            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1064                tracing::debug!("skipping large file: {}", entry.path.display());
1065                return None;
1066            }
1067
1068            let Ok(source) = std::fs::read_to_string(&entry.path) else {
1069                return None;
1070            };
1071            let ext = entry
1072                .path
1073                .extension()
1074                .and_then(|e| e.to_str())
1075                .unwrap_or("");
1076            let lang = crate::lang::language_for_extension(ext)?;
1077            let file_path = entry
1078                .path
1079                .strip_prefix(root)
1080                .unwrap_or(&entry.path)
1081                .display()
1082                .to_string();
1083            let sites = SemanticExtractor::extract_def_use_for_file(
1084                &source,
1085                lang,
1086                symbol,
1087                &file_path,
1088                ast_recursion_limit,
1089            );
1090            if sites.is_empty() { None } else { Some(sites) }
1091        })
1092        .flatten()
1093        .collect();
1094
1095    // Writes before reads; within each kind: file, line, then column for deterministic order
1096    sites.sort_by(|a, b| {
1097        use crate::types::DefUseKind;
1098        let kind_ord = |k: &DefUseKind| match k {
1099            DefUseKind::Write | DefUseKind::WriteRead => 0,
1100            DefUseKind::Read => 1,
1101        };
1102        kind_ord(&a.kind)
1103            .cmp(&kind_ord(&b.kind))
1104            .then_with(|| a.file.cmp(&b.file))
1105            .then_with(|| a.line.cmp(&b.line))
1106            .then_with(|| a.column.cmp(&b.column))
1107    });
1108
1109    sites
1110}
1111
1112/// Analyze a symbol's call graph using pre-walked directory entries.
1113pub fn analyze_focused_with_progress_with_entries(
1114    root: &Path,
1115    params: &FocusedAnalysisConfig,
1116    progress: &Arc<AtomicUsize>,
1117    ct: &CancellationToken,
1118    entries: &[WalkEntry],
1119) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1120    let internal_params = InternalFocusedParams {
1121        focus: params.focus.clone(),
1122        match_mode: params.match_mode.clone(),
1123        follow_depth: params.follow_depth,
1124        ast_recursion_limit: params.ast_recursion_limit,
1125        use_summary: params.use_summary,
1126        impl_only: params.impl_only,
1127        def_use: params.def_use,
1128        parse_timeout_micros: params.parse_timeout_micros,
1129    };
1130    analyze_focused_with_progress_with_entries_internal(
1131        root,
1132        params.max_depth,
1133        progress,
1134        ct,
1135        &internal_params,
1136        entries,
1137    )
1138}
1139
1140#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
1141pub fn analyze_focused(
1142    root: &Path,
1143    focus: &str,
1144    follow_depth: u32,
1145    max_depth: Option<u32>,
1146    ast_recursion_limit: Option<usize>,
1147) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1148    let entries = walk_directory(root, max_depth)?;
1149    let counter = Arc::new(AtomicUsize::new(0));
1150    let ct = CancellationToken::new();
1151    let params = FocusedAnalysisConfig {
1152        focus: focus.to_string(),
1153        match_mode: SymbolMatchMode::Exact,
1154        follow_depth,
1155        max_depth,
1156        ast_recursion_limit,
1157        use_summary: false,
1158        impl_only: None,
1159        def_use: false,
1160        parse_timeout_micros: None,
1161    };
1162    analyze_focused_with_progress_with_entries(root, &params, &counter, &ct, &entries)
1163}
1164
1165/// Analyze a single file and return a minimal fixed schema (name, line count, language,
1166/// functions, imports) for lightweight code understanding.
1167#[instrument(skip_all, fields(path))]
1168pub fn analyze_module_file(path: &str) -> Result<crate::types::ModuleInfo, AnalyzeError> {
1169    // Check file size before reading
1170    if Path::new(path).metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1171        tracing::debug!("skipping large file: {}", path);
1172        return Err(AnalyzeError::Parser(
1173            crate::parser::ParserError::ParseError("file too large".to_string()),
1174        ));
1175    }
1176
1177    let source = std::fs::read_to_string(path)
1178        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
1179
1180    let file_path = Path::new(path);
1181    let name = file_path
1182        .file_name()
1183        .and_then(|s| s.to_str())
1184        .unwrap_or("unknown")
1185        .to_string();
1186
1187    let line_count = source.lines().count();
1188
1189    let language = file_path
1190        .extension()
1191        .and_then(|e| e.to_str())
1192        .and_then(language_for_extension)
1193        .ok_or_else(|| {
1194            AnalyzeError::Parser(crate::parser::ParserError::ParseError(
1195                "unsupported or missing file extension".to_string(),
1196            ))
1197        })?;
1198
1199    let semantic = SemanticExtractor::extract(&source, language, None, None)?;
1200
1201    let functions = semantic
1202        .functions
1203        .into_iter()
1204        .map(|f| crate::types::ModuleFunctionInfo {
1205            name: f.name,
1206            line: f.line,
1207        })
1208        .collect();
1209
1210    let imports = semantic
1211        .imports
1212        .into_iter()
1213        .map(|i| crate::types::ModuleImportInfo {
1214            module: i.module,
1215            items: i.items,
1216        })
1217        .collect();
1218
1219    Ok(crate::types::ModuleInfo {
1220        name,
1221        line_count,
1222        language: language.to_string(),
1223        functions,
1224        imports,
1225    })
1226}
1227
1228/// Scan a directory for files that import a given module path.
1229///
1230/// For each non-directory walk entry, extracts imports via [`SemanticExtractor`] and
1231/// checks whether `module` matches `ImportInfo.module` or appears in `ImportInfo.items`.
1232/// Returns a [`FocusedAnalysisOutput`] whose `formatted` field lists matching files.
1233pub fn analyze_import_lookup(
1234    root: &Path,
1235    module: &str,
1236    entries: &[WalkEntry],
1237    ast_recursion_limit: Option<usize>,
1238) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1239    let matches: Vec<(PathBuf, usize)> = entries
1240        .par_iter()
1241        .filter_map(|entry| {
1242            if entry.is_dir || entry.is_symlink {
1243                tracing::debug!("skipping symlink: {}", entry.path.display());
1244                return None;
1245            }
1246            let ext = entry
1247                .path
1248                .extension()
1249                .and_then(|e| e.to_str())
1250                .and_then(crate::lang::language_for_extension)?;
1251            let source = std::fs::read_to_string(&entry.path).ok()?;
1252            let semantic =
1253                SemanticExtractor::extract(&source, ext, ast_recursion_limit, None).ok()?;
1254            for import in &semantic.imports {
1255                if import.module == module || import.items.iter().any(|item| item == module) {
1256                    return Some((entry.path.clone(), import.line));
1257                }
1258            }
1259            None
1260        })
1261        .collect();
1262
1263    let mut text = format!("IMPORT_LOOKUP: {module}\n");
1264    text.push_str(&format!("ROOT: {}\n", root.display()));
1265    text.push_str(&format!("MATCHES: {}\n", matches.len()));
1266    for (path, line) in &matches {
1267        let rel = path.strip_prefix(root).unwrap_or(path);
1268        text.push_str(&format!("  {}:{line}\n", rel.display()));
1269    }
1270
1271    Ok(FocusedAnalysisOutput {
1272        formatted: text,
1273        next_cursor: None,
1274        prod_chains: vec![],
1275        test_chains: vec![],
1276        outgoing_chains: vec![],
1277        def_count: 0,
1278        unfiltered_caller_count: 0,
1279        impl_trait_caller_count: 0,
1280        callers: None,
1281        test_callers: None,
1282        callees: None,
1283        def_use_sites: vec![],
1284    })
1285}
1286
1287/// Resolve Python wildcard imports to actual symbol names.
1288///
1289/// For each import with items=`["*"]`, this function:
1290/// 1. Parses the relative dots (if any) and climbs the directory tree
1291/// 2. Finds the target .py file or __init__.py
1292/// 3. Extracts symbols (functions and classes) from the target
1293/// 4. Honors __all__ if defined, otherwise uses function+class names
1294///
1295/// All resolution failures are non-fatal: debug-logged and the wildcard is preserved.
1296fn resolve_wildcard_imports(file_path: &Path, imports: &mut [ImportInfo]) {
1297    use std::collections::HashMap;
1298
1299    let mut resolved_cache: HashMap<PathBuf, Vec<String>> = HashMap::new();
1300    let Ok(file_path_canonical) = file_path.canonicalize() else {
1301        tracing::debug!(file = ?file_path, "unable to canonicalize current file path");
1302        return;
1303    };
1304
1305    for import in imports.iter_mut() {
1306        if import.items != ["*"] {
1307            continue;
1308        }
1309        resolve_single_wildcard(import, file_path, &file_path_canonical, &mut resolved_cache);
1310    }
1311}
1312
1313/// Resolve one wildcard import in place. On any failure the import is left unchanged.
1314fn resolve_single_wildcard(
1315    import: &mut ImportInfo,
1316    file_path: &Path,
1317    file_path_canonical: &Path,
1318    resolved_cache: &mut std::collections::HashMap<PathBuf, Vec<String>>,
1319) {
1320    let module = import.module.clone();
1321    let dot_count = module.chars().take_while(|c| *c == '.').count();
1322    if dot_count == 0 {
1323        return;
1324    }
1325    let module_path = module.trim_start_matches('.');
1326
1327    let Some(target_to_read) = locate_target_file(file_path, dot_count, module_path, &module)
1328    else {
1329        return;
1330    };
1331
1332    let Ok(canonical) = target_to_read.canonicalize() else {
1333        tracing::debug!(target = ?target_to_read, import = %module, "unable to canonicalize path");
1334        return;
1335    };
1336
1337    if canonical == file_path_canonical {
1338        tracing::debug!(target = ?canonical, import = %module, "cannot import from self");
1339        return;
1340    }
1341
1342    if let Some(cached) = resolved_cache.get(&canonical) {
1343        tracing::debug!(import = %module, symbols_count = cached.len(), "using cached symbols");
1344        import.items.clone_from(cached);
1345        return;
1346    }
1347
1348    if let Some(symbols) = parse_target_symbols(&target_to_read, &module) {
1349        tracing::debug!(import = %module, resolved_count = symbols.len(), "wildcard import resolved");
1350        import.items.clone_from(&symbols);
1351        resolved_cache.insert(canonical, symbols);
1352    }
1353}
1354
1355/// Locate the .py file that a wildcard import refers to. Returns None if not found.
1356fn locate_target_file(
1357    file_path: &Path,
1358    dot_count: usize,
1359    module_path: &str,
1360    module: &str,
1361) -> Option<PathBuf> {
1362    let mut target_dir = file_path.parent()?.to_path_buf();
1363
1364    for _ in 1..dot_count {
1365        if !target_dir.pop() {
1366            tracing::debug!(import = %module, "unable to climb {} levels", dot_count.saturating_sub(1));
1367            return None;
1368        }
1369    }
1370
1371    let target_file = if module_path.is_empty() {
1372        target_dir.join("__init__.py")
1373    } else {
1374        let rel_path = module_path.replace('.', "/");
1375        target_dir.join(format!("{rel_path}.py"))
1376    };
1377
1378    if target_file.exists() {
1379        Some(target_file)
1380    } else if target_file.with_extension("").is_dir() {
1381        let init = target_file.with_extension("").join("__init__.py");
1382        if init.exists() { Some(init) } else { None }
1383    } else {
1384        tracing::debug!(target = ?target_file, import = %module, "target file not found");
1385        None
1386    }
1387}
1388
1389/// Read and parse a target .py file, returning its exported symbols.
1390fn parse_target_symbols(target_path: &Path, module: &str) -> Option<Vec<String>> {
1391    use tree_sitter::Parser;
1392
1393    // Check file size before reading
1394    if target_path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1395        tracing::debug!("skipping large file: {}", target_path.display());
1396        return None;
1397    }
1398
1399    let source = match std::fs::read_to_string(target_path) {
1400        Ok(s) => s,
1401        Err(e) => {
1402            tracing::debug!(target = ?target_path, import = %module, error = %e, "unable to read target file");
1403            return None;
1404        }
1405    };
1406
1407    // Parse once with tree-sitter
1408    let lang_info = crate::languages::get_language_info("python")?;
1409    let mut parser = Parser::new();
1410    if parser.set_language(&lang_info.language).is_err() {
1411        return None;
1412    }
1413    let tree = parser.parse(&source, None)?;
1414
1415    // First, try to extract __all__ from the same tree
1416    let mut symbols = Vec::new();
1417    extract_all_from_tree(&tree, &source, &mut symbols);
1418    if !symbols.is_empty() {
1419        tracing::debug!(import = %module, symbols = ?symbols, "using __all__ symbols");
1420        return Some(symbols);
1421    }
1422
1423    // Fallback: extract functions/classes from the tree
1424    let root = tree.root_node();
1425    let mut cursor = root.walk();
1426    for child in root.children(&mut cursor) {
1427        if matches!(child.kind(), "function_definition" | "class_definition")
1428            && let Some(name_node) = child.child_by_field_name("name")
1429        {
1430            let name = source[name_node.start_byte()..name_node.end_byte()].to_string();
1431            if !name.starts_with('_') {
1432                symbols.push(name);
1433            }
1434        }
1435    }
1436    tracing::debug!(import = %module, fallback_symbols = ?symbols, "using fallback function/class names");
1437    Some(symbols)
1438}
1439
1440/// Extract __all__ from a tree-sitter tree.
1441fn extract_all_from_tree(tree: &tree_sitter::Tree, source: &str, result: &mut Vec<String>) {
1442    let root = tree.root_node();
1443    let mut cursor = root.walk();
1444    for child in root.children(&mut cursor) {
1445        if child.kind() == "simple_statement" {
1446            // simple_statement contains assignment and other statement types
1447            let mut simple_cursor = child.walk();
1448            for simple_child in child.children(&mut simple_cursor) {
1449                if simple_child.kind() == "assignment"
1450                    && let Some(left) = simple_child.child_by_field_name("left")
1451                {
1452                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1453                    if target_text == "__all__"
1454                        && let Some(right) = simple_child.child_by_field_name("right")
1455                    {
1456                        extract_string_list_from_list_node(&right, source, result);
1457                    }
1458                }
1459            }
1460        } else if child.kind() == "expression_statement" {
1461            // Fallback for older Python AST structures
1462            let mut stmt_cursor = child.walk();
1463            for stmt_child in child.children(&mut stmt_cursor) {
1464                if stmt_child.kind() == "assignment"
1465                    && let Some(left) = stmt_child.child_by_field_name("left")
1466                {
1467                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1468                    if target_text == "__all__"
1469                        && let Some(right) = stmt_child.child_by_field_name("right")
1470                    {
1471                        extract_string_list_from_list_node(&right, source, result);
1472                    }
1473                }
1474            }
1475        }
1476    }
1477}
1478
1479/// Extract string literals from a Python list node.
1480fn extract_string_list_from_list_node(
1481    list_node: &tree_sitter::Node,
1482    source: &str,
1483    result: &mut Vec<String>,
1484) {
1485    let mut cursor = list_node.walk();
1486    for child in list_node.named_children(&mut cursor) {
1487        if child.kind() == "string" {
1488            let raw = source[child.start_byte()..child.end_byte()].trim();
1489            // Strip quotes: "name" -> name
1490            let unquoted = raw.trim_matches('"').trim_matches('\'').to_string();
1491            if !unquoted.is_empty() {
1492                result.push(unquoted);
1493            }
1494        }
1495    }
1496}
1497
1498/// Read a file and return its raw content with line numbers for a specified range.
1499#[cfg(test)]
1500mod tests {
1501    use super::*;
1502    use crate::formatter::format_focused_paginated;
1503    use crate::graph::InternalCallChain;
1504    use crate::pagination::{PaginationMode, decode_cursor, paginate_slice};
1505    use std::fs;
1506    use std::path::PathBuf;
1507    use tempfile::TempDir;
1508
1509    #[cfg(feature = "lang-rust")]
1510    #[test]
1511    fn analyze_str_rust_happy_path() {
1512        let source = "fn hello() -> i32 { 42 }";
1513        let result = analyze_str(source, "rs", None);
1514        assert!(result.is_ok());
1515    }
1516
1517    #[cfg(feature = "lang-python")]
1518    #[test]
1519    fn analyze_str_python_happy_path() {
1520        let source = "def greet(name):\n    return f'Hello {name}'";
1521        let result = analyze_str(source, "py", None);
1522        assert!(result.is_ok());
1523    }
1524
1525    #[cfg(feature = "lang-rust")]
1526    #[test]
1527    fn analyze_str_rust_by_language_name() {
1528        let source = "fn hello() -> i32 { 42 }";
1529        let result = analyze_str(source, "rust", None);
1530        assert!(result.is_ok());
1531    }
1532
1533    #[cfg(feature = "lang-python")]
1534    #[test]
1535    fn analyze_str_python_by_language_name() {
1536        let source = "def greet(name):\n    return f'Hello {name}'";
1537        let result = analyze_str(source, "python", None);
1538        assert!(result.is_ok());
1539    }
1540
1541    #[cfg(feature = "lang-rust")]
1542    #[test]
1543    fn analyze_str_rust_mixed_case() {
1544        let source = "fn hello() -> i32 { 42 }";
1545        let result = analyze_str(source, "RuSt", None);
1546        assert!(result.is_ok());
1547    }
1548
1549    #[cfg(feature = "lang-python")]
1550    #[test]
1551    fn analyze_str_python_mixed_case() {
1552        let source = "def greet(name):\n    return f'Hello {name}'";
1553        let result = analyze_str(source, "PyThOn", None);
1554        assert!(result.is_ok());
1555    }
1556
1557    #[test]
1558    fn analyze_str_unsupported_language() {
1559        let result = analyze_str("code", "brainfuck", None);
1560        assert!(
1561            matches!(result, Err(AnalyzeError::UnsupportedLanguage(lang)) if lang == "brainfuck")
1562        );
1563    }
1564
1565    #[cfg(feature = "lang-rust")]
1566    #[test]
1567    fn test_symbol_focus_callers_pagination_first_page() {
1568        let temp_dir = TempDir::new().unwrap();
1569
1570        // Create a file with many callers of `target`
1571        let mut code = String::from("fn target() {}\n");
1572        for i in 0..15 {
1573            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1574        }
1575        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1576
1577        // Act
1578        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1579
1580        // Paginate prod callers with page_size=5
1581        let paginated = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1582            .expect("paginate failed");
1583        assert!(
1584            paginated.total >= 5,
1585            "should have enough callers to paginate"
1586        );
1587        assert!(
1588            paginated.next_cursor.is_some(),
1589            "should have next_cursor for page 1"
1590        );
1591
1592        // Verify cursor encodes callers mode
1593        assert_eq!(paginated.items.len(), 5);
1594    }
1595
1596    #[test]
1597    fn test_symbol_focus_callers_pagination_second_page() {
1598        let temp_dir = TempDir::new().unwrap();
1599
1600        let mut code = String::from("fn target() {}\n");
1601        for i in 0..12 {
1602            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1603        }
1604        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1605
1606        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1607        let total_prod = output.prod_chains.len();
1608
1609        if total_prod > 5 {
1610            // Get page 1 cursor
1611            let p1 = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1612                .expect("paginate failed");
1613            assert!(p1.next_cursor.is_some());
1614
1615            let cursor_str = p1.next_cursor.unwrap();
1616            let cursor_data = decode_cursor(&cursor_str).expect("decode failed");
1617
1618            // Get page 2
1619            let p2 = paginate_slice(
1620                &output.prod_chains,
1621                cursor_data.offset,
1622                5,
1623                PaginationMode::Callers,
1624            )
1625            .expect("paginate failed");
1626
1627            // Format paginated output
1628            let formatted = format_focused_paginated(
1629                &p2.items,
1630                total_prod,
1631                PaginationMode::Callers,
1632                "target",
1633                &output.prod_chains,
1634                &output.test_chains,
1635                &output.outgoing_chains,
1636                output.def_count,
1637                cursor_data.offset,
1638                Some(temp_dir.path()),
1639                true,
1640            );
1641
1642            // Assert: header shows correct range for page 2
1643            let expected_start = cursor_data.offset + 1;
1644            assert!(
1645                formatted.contains(&format!("CALLERS ({}", expected_start)),
1646                "header should show page 2 range, got: {}",
1647                formatted
1648            );
1649        }
1650    }
1651
1652    #[test]
1653    fn test_chains_to_entries_empty_returns_none() {
1654        // Arrange
1655        let chains: Vec<InternalCallChain> = vec![];
1656
1657        // Act
1658        let result = chains_to_entries(&chains, None);
1659
1660        // Assert
1661        assert!(result.is_none());
1662    }
1663
1664    #[test]
1665    fn test_chains_to_entries_with_data_returns_entries() {
1666        // Arrange
1667        let chains = vec![
1668            InternalCallChain {
1669                chain: vec![("caller1".to_string(), PathBuf::from("/root/lib.rs"), 10)],
1670            },
1671            InternalCallChain {
1672                chain: vec![("caller2".to_string(), PathBuf::from("/root/other.rs"), 20)],
1673            },
1674        ];
1675        let root = PathBuf::from("/root");
1676
1677        // Act
1678        let result = chains_to_entries(&chains, Some(root.as_path()));
1679
1680        // Assert
1681        assert!(result.is_some());
1682        let entries = result.unwrap();
1683        assert_eq!(entries.len(), 2);
1684        assert_eq!(entries[0].symbol, "caller1");
1685        assert_eq!(entries[0].file, "lib.rs");
1686        assert_eq!(entries[0].line, 10);
1687        assert_eq!(entries[1].symbol, "caller2");
1688        assert_eq!(entries[1].file, "other.rs");
1689        assert_eq!(entries[1].line, 20);
1690    }
1691
1692    #[test]
1693    fn test_symbol_focus_callees_pagination() {
1694        let temp_dir = TempDir::new().unwrap();
1695
1696        // target calls many functions
1697        let mut code = String::from("fn target() {\n");
1698        for i in 0..10 {
1699            code.push_str(&format!("    callee_{:02}();\n", i));
1700        }
1701        code.push_str("}\n");
1702        for i in 0..10 {
1703            code.push_str(&format!("fn callee_{:02}() {{}}\n", i));
1704        }
1705        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1706
1707        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1708        let total_callees = output.outgoing_chains.len();
1709
1710        if total_callees > 3 {
1711            let paginated = paginate_slice(&output.outgoing_chains, 0, 3, PaginationMode::Callees)
1712                .expect("paginate failed");
1713
1714            let formatted = format_focused_paginated(
1715                &paginated.items,
1716                total_callees,
1717                PaginationMode::Callees,
1718                "target",
1719                &output.prod_chains,
1720                &output.test_chains,
1721                &output.outgoing_chains,
1722                output.def_count,
1723                0,
1724                Some(temp_dir.path()),
1725                true,
1726            );
1727
1728            assert!(
1729                formatted.contains(&format!(
1730                    "CALLEES (1-{} of {})",
1731                    paginated.items.len(),
1732                    total_callees
1733                )),
1734                "header should show callees range, got: {}",
1735                formatted
1736            );
1737        }
1738    }
1739
1740    #[test]
1741    fn test_symbol_focus_empty_prod_callers() {
1742        let temp_dir = TempDir::new().unwrap();
1743
1744        // target is only called from test functions
1745        let code = r#"
1746fn target() {}
1747
1748#[cfg(test)]
1749mod tests {
1750    use super::*;
1751    #[test]
1752    fn test_something() { target(); }
1753}
1754"#;
1755        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1756
1757        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1758
1759        // prod_chains may be empty; pagination should handle it gracefully
1760        let paginated = paginate_slice(&output.prod_chains, 0, 100, PaginationMode::Callers)
1761            .expect("paginate failed");
1762        assert_eq!(paginated.items.len(), output.prod_chains.len());
1763        assert!(
1764            paginated.next_cursor.is_none(),
1765            "no next_cursor for empty or single-page prod_chains"
1766        );
1767    }
1768
1769    #[test]
1770    fn test_impl_only_filter_header_correct_counts() {
1771        let temp_dir = TempDir::new().unwrap();
1772
1773        // Create a Rust fixture with:
1774        // - A trait definition
1775        // - An impl Trait for SomeType block that calls the focus symbol
1776        // - A regular (non-trait-impl) function that also calls the focus symbol
1777        let code = r#"
1778trait MyTrait {
1779    fn focus_symbol();
1780}
1781
1782struct SomeType;
1783
1784impl MyTrait for SomeType {
1785    fn focus_symbol() {}
1786}
1787
1788fn impl_caller() {
1789    SomeType::focus_symbol();
1790}
1791
1792fn regular_caller() {
1793    SomeType::focus_symbol();
1794}
1795"#;
1796        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1797
1798        // Call analyze_focused with impl_only=Some(true)
1799        let params = FocusedAnalysisConfig {
1800            focus: "focus_symbol".to_string(),
1801            match_mode: SymbolMatchMode::Insensitive,
1802            follow_depth: 1,
1803            max_depth: None,
1804            ast_recursion_limit: None,
1805            use_summary: false,
1806            impl_only: Some(true),
1807            def_use: false,
1808            parse_timeout_micros: None,
1809        };
1810        let output = analyze_focused_with_progress(
1811            temp_dir.path(),
1812            &params,
1813            Arc::new(AtomicUsize::new(0)),
1814            CancellationToken::new(),
1815        )
1816        .unwrap();
1817
1818        // Assert the result contains "FILTER: impl_only=true"
1819        assert!(
1820            output.formatted.contains("FILTER: impl_only=true"),
1821            "formatted output should contain FILTER header for impl_only=true, got: {}",
1822            output.formatted
1823        );
1824
1825        // Assert the retained count N < total count M
1826        assert!(
1827            output.impl_trait_caller_count < output.unfiltered_caller_count,
1828            "impl_trait_caller_count ({}) should be less than unfiltered_caller_count ({})",
1829            output.impl_trait_caller_count,
1830            output.unfiltered_caller_count
1831        );
1832
1833        // Assert format is "FILTER: impl_only=true (N of M callers shown)"
1834        let filter_line = output
1835            .formatted
1836            .lines()
1837            .find(|line| line.contains("FILTER: impl_only=true"))
1838            .expect("should find FILTER line");
1839        assert!(
1840            filter_line.contains(&format!(
1841                "({} of {} callers shown)",
1842                output.impl_trait_caller_count, output.unfiltered_caller_count
1843            )),
1844            "FILTER line should show correct N of M counts, got: {}",
1845            filter_line
1846        );
1847    }
1848
1849    #[test]
1850    fn test_callers_count_matches_formatted_output() {
1851        let temp_dir = TempDir::new().unwrap();
1852
1853        // Create a file with multiple callers of `target`
1854        let code = r#"
1855fn target() {}
1856fn caller_a() { target(); }
1857fn caller_b() { target(); }
1858fn caller_c() { target(); }
1859"#;
1860        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1861
1862        // Analyze the symbol
1863        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1864
1865        // Extract CALLERS count from formatted output
1866        let formatted = &output.formatted;
1867        let callers_count_from_output = formatted
1868            .lines()
1869            .find(|line| line.contains("FOCUS:"))
1870            .and_then(|line| {
1871                line.split(',')
1872                    .find(|part| part.contains("callers"))
1873                    .and_then(|part| {
1874                        part.trim()
1875                            .split_whitespace()
1876                            .next()
1877                            .and_then(|s| s.parse::<usize>().ok())
1878                    })
1879            })
1880            .expect("should find CALLERS count in formatted output");
1881
1882        // Compute expected count from prod_chains (unique first-caller names)
1883        let expected_callers_count = output
1884            .prod_chains
1885            .iter()
1886            .filter_map(|chain| chain.chain.first().map(|(name, _, _)| name))
1887            .collect::<std::collections::HashSet<_>>()
1888            .len();
1889
1890        assert_eq!(
1891            callers_count_from_output, expected_callers_count,
1892            "CALLERS count in formatted output should match unique-first-caller count in prod_chains"
1893        );
1894    }
1895
1896    #[cfg(feature = "lang-rust")]
1897    #[test]
1898    fn test_def_use_focused_analysis() {
1899        let temp_dir = TempDir::new().unwrap();
1900        fs::write(
1901            temp_dir.path().join("lib.rs"),
1902            "fn example() {\n    let x = 10;\n    x += 1;\n    println!(\"{}\", x);\n    let y = x + 1;\n}\n",
1903        )
1904        .unwrap();
1905
1906        let entries = walk_directory(temp_dir.path(), None).unwrap();
1907        let counter = Arc::new(AtomicUsize::new(0));
1908        let ct = CancellationToken::new();
1909        let params = FocusedAnalysisConfig {
1910            focus: "x".to_string(),
1911            match_mode: SymbolMatchMode::Exact,
1912            follow_depth: 1,
1913            max_depth: None,
1914            ast_recursion_limit: None,
1915            use_summary: false,
1916            impl_only: None,
1917            def_use: true,
1918            parse_timeout_micros: None,
1919        };
1920
1921        let output = analyze_focused_with_progress_with_entries(
1922            temp_dir.path(),
1923            &params,
1924            &counter,
1925            &ct,
1926            &entries,
1927        )
1928        .expect("def_use analysis should succeed");
1929
1930        assert!(
1931            !output.def_use_sites.is_empty(),
1932            "should find def-use sites for x"
1933        );
1934        assert!(
1935            output
1936                .def_use_sites
1937                .iter()
1938                .any(|s| s.kind == crate::types::DefUseKind::Write),
1939            "should have at least one Write site",
1940        );
1941        // No location appears as both write and read
1942        let write_locs: std::collections::HashSet<_> = output
1943            .def_use_sites
1944            .iter()
1945            .filter(|s| {
1946                matches!(
1947                    s.kind,
1948                    crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
1949                )
1950            })
1951            .map(|s| (&s.file, s.line, s.column))
1952            .collect();
1953        assert!(
1954            output
1955                .def_use_sites
1956                .iter()
1957                .filter(|s| s.kind == crate::types::DefUseKind::Read)
1958                .all(|s| !write_locs.contains(&(&s.file, s.line, s.column))),
1959            "no location should appear as both write and read",
1960        );
1961        assert!(
1962            output.formatted.contains("DEF-USE SITES"),
1963            "formatted output should contain DEF-USE SITES"
1964        );
1965    }
1966
1967    fn make_temp_file(content: &str) -> tempfile::NamedTempFile {
1968        let mut f = tempfile::NamedTempFile::new().unwrap();
1969        use std::io::Write;
1970        f.write_all(content.as_bytes()).unwrap();
1971        f.flush().unwrap();
1972        f
1973    }
1974}
aptu_coder_core/analyze.rs

aptu_coder_core/
analyze.rs