aptu_coder_core/
analyze.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Main analysis engine for extracting code structure from files and directories.
4//!
5//! Implements the four MCP tools: `analyze_directory` (Overview), `analyze_file` (`FileDetails`),
6//! `analyze_symbol` (call graph), and `analyze_module` (lightweight index). Handles parallel processing and cancellation.
7
8use crate::formatter::{
9    format_file_details, format_focused_internal, format_focused_summary_internal, format_structure,
10};
11use crate::graph::{CallGraph, InternalCallChain};
12use crate::lang::{language_for_extension, supported_languages};
13use crate::parser::{ElementExtractor, SemanticExtractor};
14use crate::test_detection::is_test_file;
15use crate::traversal::{WalkEntry, walk_directory};
16use crate::types::{
17    AnalysisMode, FileInfo, ImplTraitInfo, ImportInfo, SemanticAnalysis, SymbolMatchMode,
18};
19use rayon::prelude::*;
20#[cfg(feature = "schemars")]
21use schemars::JsonSchema;
22use serde::{Deserialize, Serialize};
23use std::path::{Path, PathBuf};
24use std::sync::Arc;
25use std::sync::atomic::{AtomicUsize, Ordering};
26use std::time::Instant;
27use thiserror::Error;
28use tokio_util::sync::CancellationToken;
29use tracing::instrument;
30
31pub const MAX_FILE_SIZE_BYTES: u64 = 10_000_000;
32
33#[derive(Debug, Error)]
34#[non_exhaustive]
35pub enum AnalyzeError {
36    #[error("Traversal error: {0}")]
37    Traversal(#[from] crate::traversal::TraversalError),
38    #[error("Parser error: {0}")]
39    Parser(#[from] crate::parser::ParserError),
40    #[error("Graph error: {0}")]
41    Graph(#[from] crate::graph::GraphError),
42    #[error("Formatter error: {0}")]
43    Formatter(#[from] crate::formatter::FormatterError),
44    #[error("Analysis cancelled")]
45    Cancelled,
46    #[error("unsupported language: {0}")]
47    UnsupportedLanguage(String),
48    #[error("I/O error: {0}")]
49    Io(#[from] std::io::Error),
50    #[error("invalid range: start ({start}) > end ({end}); file has {total} lines")]
51    InvalidRange {
52        start: usize,
53        end: usize,
54        total: usize,
55    },
56    #[error("path is a directory, not a file: {0}")]
57    NotAFile(PathBuf),
58    #[error(
59        "file has {total_lines} lines; provide start_line and end_line, or call analyze_module first to locate the range"
60    )]
61    RangelessLargeFile { total_lines: usize },
62    #[error("parse timeout exceeded for {path}: {micros} microseconds")]
63    ParseTimeout { path: PathBuf, micros: u64 },
64}
65
66/// Result of directory analysis containing both formatted output and file data.
67#[derive(Debug, Clone, Serialize)]
68#[cfg_attr(feature = "schemars", derive(JsonSchema))]
69#[non_exhaustive]
70pub struct AnalysisOutput {
71    #[cfg_attr(
72        feature = "schemars",
73        schemars(description = "Formatted text representation of the analysis")
74    )]
75    pub formatted: String,
76    #[cfg_attr(
77        feature = "schemars",
78        schemars(description = "List of files analyzed in the directory")
79    )]
80    pub files: Vec<FileInfo>,
81    /// Walk entries used internally for summary generation; not serialized.
82    #[serde(skip)]
83    #[cfg_attr(feature = "schemars", schemars(skip))]
84    pub entries: Vec<WalkEntry>,
85    /// Subtree file counts computed from an unbounded walk; used by `format_summary`; not serialized.
86    #[serde(skip)]
87    #[cfg_attr(feature = "schemars", schemars(skip))]
88    pub subtree_counts: Option<Vec<(std::path::PathBuf, usize)>>,
89    #[serde(skip_serializing_if = "Option::is_none")]
90    #[cfg_attr(
91        feature = "schemars",
92        schemars(
93            description = "Opaque cursor token for the next page of results (absent when no more results)"
94        )
95    )]
96    pub next_cursor: Option<String>,
97}
98
99/// Result of file-level semantic analysis.
100#[derive(Debug, Clone, Serialize)]
101#[cfg_attr(feature = "schemars", derive(JsonSchema))]
102#[non_exhaustive]
103pub struct FileAnalysisOutput {
104    #[cfg_attr(
105        feature = "schemars",
106        schemars(description = "Formatted text representation of the analysis")
107    )]
108    pub formatted: String,
109    #[cfg_attr(
110        feature = "schemars",
111        schemars(description = "Semantic analysis data including functions, classes, and imports")
112    )]
113    pub semantic: SemanticAnalysis,
114    #[cfg_attr(
115        feature = "schemars",
116        schemars(description = "Total line count of the analyzed file")
117    )]
118    #[cfg_attr(
119        feature = "schemars",
120        schemars(schema_with = "crate::schema_helpers::integer_schema")
121    )]
122    pub line_count: usize,
123    #[serde(skip_serializing_if = "Option::is_none")]
124    #[cfg_attr(
125        feature = "schemars",
126        schemars(
127            description = "Opaque cursor token for the next page of results (absent when no more results)"
128        )
129    )]
130    pub next_cursor: Option<String>,
131}
132
133impl FileAnalysisOutput {
134    /// Create a new `FileAnalysisOutput`.
135    #[must_use]
136    pub fn new(
137        formatted: String,
138        semantic: SemanticAnalysis,
139        line_count: usize,
140        next_cursor: Option<String>,
141    ) -> Self {
142        Self {
143            formatted,
144            semantic,
145            line_count,
146            next_cursor,
147        }
148    }
149}
150#[instrument(skip_all, fields(path = %root.display()))]
151// public API; callers expect owned semantics
152#[allow(clippy::needless_pass_by_value)]
153pub fn analyze_directory_with_progress(
154    root: &Path,
155    entries: Vec<WalkEntry>,
156    progress: Arc<AtomicUsize>,
157    ct: CancellationToken,
158) -> Result<AnalysisOutput, AnalyzeError> {
159    // Check if already cancelled
160    if ct.is_cancelled() {
161        return Err(AnalyzeError::Cancelled);
162    }
163
164    // Detect language from file extension
165    let file_entries: Vec<&WalkEntry> = entries
166        .iter()
167        .filter(|e| !e.is_dir && !e.is_symlink)
168        .collect();
169
170    let start = Instant::now();
171    tracing::debug!(file_count = file_entries.len(), root = %root.display(), "analysis start");
172
173    // Parallel analysis of files
174    let analysis_results: Vec<FileInfo> = file_entries
175        .par_iter()
176        .filter_map(|entry| {
177            // Check cancellation per file
178            if ct.is_cancelled() {
179                return None;
180            }
181
182            let path_str = entry.path.display().to_string();
183
184            // Detect language from extension
185            let ext = entry.path.extension().and_then(|e| e.to_str());
186
187            // Check file size before reading
188            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
189                tracing::debug!("skipping large file: {}", entry.path.display());
190                progress.fetch_add(1, Ordering::Relaxed);
191                return None;
192            }
193
194            // Try to read file content; skip binary or unreadable files
195            let Ok(source) = std::fs::read_to_string(&entry.path) else {
196                progress.fetch_add(1, Ordering::Relaxed);
197                return None;
198            };
199
200            // Count lines
201            let line_count = source.lines().count();
202
203            // Detect language and extract counts
204            let (language, function_count, class_count) = if let Some(ext_str) = ext {
205                if let Some(lang) = language_for_extension(ext_str) {
206                    let lang_str = lang.to_string();
207                    match ElementExtractor::extract_with_depth(&source, &lang_str) {
208                        Ok((func_count, class_count)) => (lang_str, func_count, class_count),
209                        Err(_) => (lang_str, 0, 0),
210                    }
211                } else {
212                    ("unknown".to_string(), 0, 0)
213                }
214            } else {
215                ("unknown".to_string(), 0, 0)
216            };
217
218            progress.fetch_add(1, Ordering::Relaxed);
219
220            let is_test = is_test_file(&entry.path);
221
222            Some(FileInfo {
223                path: path_str,
224                line_count,
225                function_count,
226                class_count,
227                language,
228                is_test,
229            })
230        })
231        .collect();
232
233    // Check if cancelled after parallel processing
234    if ct.is_cancelled() {
235        return Err(AnalyzeError::Cancelled);
236    }
237
238    tracing::debug!(
239        file_count = file_entries.len(),
240        duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
241        "analysis complete"
242    );
243
244    // Format output
245    let formatted = format_structure(&entries, &analysis_results, None);
246
247    Ok(AnalysisOutput {
248        formatted,
249        files: analysis_results,
250        entries,
251        next_cursor: None,
252        subtree_counts: None,
253    })
254}
255
256/// Analyze a directory structure and return formatted output and file data.
257#[instrument(skip_all, fields(path = %root.display()))]
258pub fn analyze_directory(
259    root: &Path,
260    max_depth: Option<u32>,
261) -> Result<AnalysisOutput, AnalyzeError> {
262    let entries = walk_directory(root, max_depth)?;
263    let counter = Arc::new(AtomicUsize::new(0));
264    let ct = CancellationToken::new();
265    analyze_directory_with_progress(root, entries, counter, ct)
266}
267
268/// Determine analysis mode based on parameters and path.
269#[must_use]
270pub fn determine_mode(path: &str, focus: Option<&str>) -> AnalysisMode {
271    if focus.is_some() {
272        return AnalysisMode::SymbolFocus;
273    }
274
275    let path_obj = Path::new(path);
276    if path_obj.is_dir() {
277        AnalysisMode::Overview
278    } else {
279        AnalysisMode::FileDetails
280    }
281}
282
283/// Analyze a single file and return semantic analysis with formatted output.
284#[instrument(skip_all, fields(path))]
285pub fn analyze_file(
286    path: &str,
287    ast_recursion_limit: Option<usize>,
288) -> Result<FileAnalysisOutput, AnalyzeError> {
289    let start = Instant::now();
290
291    // Check file size before reading
292    if Path::new(path).metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
293        tracing::debug!("skipping large file: {}", path);
294        return Err(AnalyzeError::Parser(
295            crate::parser::ParserError::ParseError("file too large".to_string()),
296        ));
297    }
298
299    let source = std::fs::read_to_string(path)
300        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
301
302    let line_count = source.lines().count();
303
304    // Detect language from extension
305    let ext = Path::new(path)
306        .extension()
307        .and_then(|e| e.to_str())
308        .and_then(language_for_extension)
309        .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string);
310
311    // Extract semantic information
312    let mut semantic = SemanticExtractor::extract(&source, &ext, ast_recursion_limit, None)?;
313
314    // Populate the file path on references now that the path is known
315    for r in &mut semantic.references {
316        r.location = path.to_string();
317    }
318
319    // Resolve Python wildcard imports
320    if ext == "python" {
321        resolve_wildcard_imports(Path::new(path), &mut semantic.imports);
322    }
323
324    // Detect if this is a test file
325    let is_test = is_test_file(Path::new(path));
326
327    // Extract parent directory for relative path display
328    let parent_dir = Path::new(path).parent();
329
330    // Format output
331    let formatted = format_file_details(path, &semantic, line_count, is_test, parent_dir);
332
333    tracing::debug!(path = %path, language = %ext, functions = semantic.functions.len(), classes = semantic.classes.len(), imports = semantic.imports.len(), duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX), "file analysis complete");
334
335    Ok(FileAnalysisOutput::new(
336        formatted, semantic, line_count, None,
337    ))
338}
339
340/// Analyze source code from a string buffer without filesystem access.
341///
342/// This function analyzes in-memory source code by language identifier. The `language`
343/// parameter can be either a language name (e.g., `"rust"`, `"python"`, `"go"`) or a file
344/// extension (e.g., `"rs"`, `"py"`).
345///
346/// Accepted language identifiers depend on compiled features. Use [`supported_languages()`] to
347/// discover the available language names at runtime, and [`language_for_extension()`] to resolve
348/// a file extension to its supported language identifier.
349///
350/// # Arguments
351///
352/// * `source` - The source code to analyze
353/// * `language` - The language identifier (language name or extension)
354/// * `ast_recursion_limit` - Optional limit for AST traversal depth
355///
356/// # Returns
357///
358/// - `Ok(FileAnalysisOutput)` on success
359/// - `Err(AnalyzeError::UnsupportedLanguage)` if the language is not recognized
360/// - `Err(AnalyzeError::Parser)` if parsing fails
361///
362/// # Notes
363///
364/// - Python wildcard import resolution is skipped for in-memory analysis (no filesystem path available)
365/// - The formatted output uses the standard file-details formatter, so it includes a `FILE:` header with an empty path
366#[inline]
367pub fn analyze_str(
368    source: &str,
369    language: &str,
370    ast_recursion_limit: Option<usize>,
371) -> Result<FileAnalysisOutput, AnalyzeError> {
372    // Resolve language: first try as a file extension, then as a language name
373    // (case-insensitive match against supported_languages()).
374    let lang = language_for_extension(language).or_else(|| {
375        let lower = language.to_ascii_lowercase();
376        supported_languages()
377            .iter()
378            .find(|&&name| name == lower)
379            .copied()
380    });
381    let lang = lang.ok_or_else(|| AnalyzeError::UnsupportedLanguage(language.to_string()))?;
382
383    // Extract semantic information
384    let mut semantic = SemanticExtractor::extract(source, lang, ast_recursion_limit, None)?;
385
386    // Populate a stable in-memory sentinel on all reference locations
387    for r in &mut semantic.references {
388        r.location = "<memory>".to_string();
389    }
390
391    // Count lines in the source
392    let line_count = source.lines().count();
393
394    // Format output with empty path (no filesystem access)
395    let formatted = format_file_details("", &semantic, line_count, false, None);
396
397    Ok(FileAnalysisOutput::new(
398        formatted, semantic, line_count, None,
399    ))
400}
401
402/// Single entry in a call chain (depth-1 direct caller or callee).
403#[non_exhaustive]
404#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
405#[cfg_attr(feature = "schemars", derive(JsonSchema))]
406pub struct CallChainEntry {
407    #[cfg_attr(
408        feature = "schemars",
409        schemars(description = "Symbol name of the caller or callee")
410    )]
411    pub symbol: String,
412    #[cfg_attr(
413        feature = "schemars",
414        schemars(description = "File path relative to the repository root")
415    )]
416    pub file: String,
417    #[cfg_attr(
418        feature = "schemars",
419        schemars(
420            description = "Line number of the definition or call site (1-indexed)",
421            schema_with = "crate::schema_helpers::integer_schema"
422        )
423    )]
424    pub line: usize,
425}
426
427/// Result of focused symbol analysis.
428#[derive(Debug, Serialize)]
429#[cfg_attr(feature = "schemars", derive(JsonSchema))]
430#[non_exhaustive]
431pub struct FocusedAnalysisOutput {
432    #[cfg_attr(
433        feature = "schemars",
434        schemars(description = "Formatted text representation of the call graph analysis")
435    )]
436    pub formatted: String,
437    #[serde(skip_serializing_if = "Option::is_none")]
438    #[cfg_attr(
439        feature = "schemars",
440        schemars(
441            description = "Opaque cursor token for the next page of results (absent when no more results)"
442        )
443    )]
444    pub next_cursor: Option<String>,
445    /// Production caller chains (partitioned from incoming chains, excluding test callers).
446    /// Not serialized; used for pagination in lib.rs.
447    #[serde(skip)]
448    #[cfg_attr(feature = "schemars", schemars(skip))]
449    pub prod_chains: Vec<InternalCallChain>,
450    /// Test caller chains. Not serialized; used for pagination summary in lib.rs.
451    #[serde(skip)]
452    #[cfg_attr(feature = "schemars", schemars(skip))]
453    pub test_chains: Vec<InternalCallChain>,
454    /// Outgoing (callee) chains. Not serialized; used for pagination in lib.rs.
455    #[serde(skip)]
456    #[cfg_attr(feature = "schemars", schemars(skip))]
457    pub outgoing_chains: Vec<InternalCallChain>,
458    /// Number of definitions for the symbol. Not serialized; used for pagination headers.
459    #[serde(skip)]
460    #[cfg_attr(feature = "schemars", schemars(skip))]
461    pub def_count: usize,
462    /// Total unique callers before `impl_only` filter. Not serialized; used for FILTER header.
463    #[serde(skip)]
464    #[cfg_attr(feature = "schemars", schemars(skip))]
465    pub unfiltered_caller_count: usize,
466    /// Unique callers after `impl_only` filter. Not serialized; used for FILTER header.
467    #[serde(skip)]
468    #[cfg_attr(feature = "schemars", schemars(skip))]
469    pub impl_trait_caller_count: usize,
470    /// Direct (depth-1) production callers. `follow_depth` does not affect this field.
471    #[serde(skip_serializing_if = "Option::is_none")]
472    pub callers: Option<Vec<CallChainEntry>>,
473    /// Direct (depth-1) test callers. `follow_depth` does not affect this field.
474    #[serde(skip_serializing_if = "Option::is_none")]
475    pub test_callers: Option<Vec<CallChainEntry>>,
476    /// Direct (depth-1) callees. `follow_depth` does not affect this field.
477    #[serde(skip_serializing_if = "Option::is_none")]
478    pub callees: Option<Vec<CallChainEntry>>,
479    /// Definition and use sites for the symbol.
480    #[serde(default)]
481    pub def_use_sites: Vec<crate::types::DefUseSite>,
482}
483
484/// Parameters for focused symbol analysis. Groups high-arity parameters to keep
485/// function signatures under clippy's default 7-argument threshold.
486#[derive(Clone)]
487pub struct FocusedAnalysisConfig {
488    pub focus: String,
489    pub match_mode: SymbolMatchMode,
490    pub follow_depth: u32,
491    pub max_depth: Option<u32>,
492    pub ast_recursion_limit: Option<usize>,
493    pub use_summary: bool,
494    pub impl_only: Option<bool>,
495    pub def_use: bool,
496    pub parse_timeout_micros: Option<u64>,
497}
498
499/// Internal parameters for focused analysis phases.
500#[derive(Clone)]
501struct InternalFocusedParams {
502    focus: String,
503    match_mode: SymbolMatchMode,
504    follow_depth: u32,
505    ast_recursion_limit: Option<usize>,
506    use_summary: bool,
507    impl_only: Option<bool>,
508    def_use: bool,
509    parse_timeout_micros: Option<u64>,
510}
511
512/// Type alias for analysis results: (`file_path`, `semantic_analysis`) pairs and impl-trait info.
513type FileAnalysisBatch = (Vec<(PathBuf, SemanticAnalysis)>, Vec<ImplTraitInfo>);
514
515/// Phase 1: Collect semantic analysis for all files in parallel.
516fn collect_file_analysis(
517    entries: &[WalkEntry],
518    progress: &Arc<AtomicUsize>,
519    ct: &CancellationToken,
520    ast_recursion_limit: Option<usize>,
521    parse_timeout_micros: Option<u64>,
522) -> Result<FileAnalysisBatch, AnalyzeError> {
523    // Check if already cancelled
524    if ct.is_cancelled() {
525        return Err(AnalyzeError::Cancelled);
526    }
527
528    // Use pre-walked entries (passed by caller)
529    // Collect semantic analysis for all files in parallel
530    let file_entries: Vec<&WalkEntry> = entries
531        .iter()
532        .filter(|e| !e.is_dir && !e.is_symlink)
533        .collect();
534
535    // Collect per-file timeout events so they can be surfaced as AnalyzeError::ParseTimeout.
536    let timed_out: std::sync::Mutex<Vec<(PathBuf, u64)>> = std::sync::Mutex::new(Vec::new());
537
538    let analysis_results: Vec<(PathBuf, SemanticAnalysis)> = file_entries
539        .par_iter()
540        .filter_map(|entry| {
541            // Check cancellation per file
542            if ct.is_cancelled() {
543                return None;
544            }
545
546            let ext = entry.path.extension().and_then(|e| e.to_str());
547
548            // Check file size before reading
549            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
550                tracing::debug!("skipping large file: {}", entry.path.display());
551                progress.fetch_add(1, Ordering::Relaxed);
552                return None;
553            }
554
555            // Try to read file content
556            let Ok(source) = std::fs::read_to_string(&entry.path) else {
557                progress.fetch_add(1, Ordering::Relaxed);
558                return None;
559            };
560
561            // Detect language and extract semantic information
562            let language = if let Some(ext_str) = ext {
563                language_for_extension(ext_str)
564                    .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string)
565            } else {
566                "unknown".to_string()
567            };
568
569            match SemanticExtractor::extract(
570                &source,
571                &language,
572                ast_recursion_limit,
573                parse_timeout_micros,
574            ) {
575                Ok(mut semantic) => {
576                    // Populate file path on references
577                    for r in &mut semantic.references {
578                        r.location = entry.path.display().to_string();
579                    }
580                    // Populate file path on impl_traits (already extracted during SemanticExtractor::extract)
581                    for trait_info in &mut semantic.impl_traits {
582                        trait_info.path.clone_from(&entry.path);
583                    }
584                    progress.fetch_add(1, Ordering::Relaxed);
585                    Some((entry.path.clone(), semantic))
586                }
587                Err(crate::parser::ParserError::Timeout(micros)) => {
588                    tracing::warn!(
589                        "parse timeout exceeded for {}: {} microseconds",
590                        entry.path.display(),
591                        micros
592                    );
593                    if let Ok(mut v) = timed_out.lock() {
594                        v.push((entry.path.clone(), micros));
595                    }
596                    progress.fetch_add(1, Ordering::Relaxed);
597                    None
598                }
599                Err(_) => {
600                    progress.fetch_add(1, Ordering::Relaxed);
601                    None
602                }
603            }
604        })
605        .collect();
606
607    // Check if cancelled after parallel processing
608    if ct.is_cancelled() {
609        return Err(AnalyzeError::Cancelled);
610    }
611
612    // Surface the first timeout as AnalyzeError::ParseTimeout so callers can detect it.
613    if let Ok(mut v) = timed_out.lock()
614        && let Some((path, micros)) = v.drain(..).next()
615    {
616        return Err(AnalyzeError::ParseTimeout { path, micros });
617    }
618
619    // Collect all impl-trait info from analysis results
620    let all_impl_traits: Vec<ImplTraitInfo> = analysis_results
621        .iter()
622        .flat_map(|(_, sem)| sem.impl_traits.iter().cloned())
623        .collect();
624
625    Ok((analysis_results, all_impl_traits))
626}
627
628/// Phase 2: Build call graph from analysis results.
629fn build_call_graph(
630    analysis_results: Vec<(PathBuf, SemanticAnalysis)>,
631    all_impl_traits: &[ImplTraitInfo],
632) -> Result<CallGraph, AnalyzeError> {
633    // Build call graph. Always build without impl_only filter first so we can
634    // record the unfiltered caller count before discarding those edges.
635    CallGraph::build_from_results(
636        analysis_results,
637        all_impl_traits,
638        false, // filter applied below after counting
639    )
640    .map_err(std::convert::Into::into)
641}
642
643/// Phase 3: Resolve symbol and apply `impl_only` filter.
644/// Returns (`resolved_focus`, `unfiltered_caller_count`, `impl_trait_caller_count`).
645/// CRITICAL: Must capture `unfiltered_caller_count` BEFORE `retain()`, then apply `retain()`,
646/// then compute `impl_trait_caller_count`.
647fn resolve_symbol(
648    graph: &mut CallGraph,
649    params: &InternalFocusedParams,
650) -> Result<(String, usize, usize), AnalyzeError> {
651    // Resolve symbol name using the requested match mode.
652    let resolved_focus = if params.match_mode == SymbolMatchMode::Exact {
653        let exists = graph.definitions.contains_key(&params.focus)
654            || graph.callers.contains_key(&params.focus)
655            || graph.callees.contains_key(&params.focus);
656        if exists {
657            params.focus.clone()
658        } else {
659            return Err(crate::graph::GraphError::SymbolNotFound {
660                symbol: params.focus.clone(),
661                hint: "Try match_mode=insensitive for a case-insensitive search, or match_mode=prefix to list symbols starting with this name.".to_string(),
662            }
663            .into());
664        }
665    } else {
666        graph.resolve_symbol_indexed(&params.focus, &params.match_mode)?
667    };
668
669    // Count unique callers for the focus symbol before applying impl_only filter.
670    let unfiltered_caller_count = graph.callers.get(&resolved_focus).map_or(0, |edges| {
671        edges
672            .iter()
673            .map(|e| &e.neighbor_name)
674            .collect::<std::collections::HashSet<_>>()
675            .len()
676    });
677
678    // Apply impl_only filter now if requested, then count filtered callers.
679    // Filter all caller adjacency lists so traversal and formatting are consistently
680    // restricted to impl-trait edges regardless of follow_depth.
681    let impl_trait_caller_count = if params.impl_only.unwrap_or(false) {
682        for edges in graph.callers.values_mut() {
683            edges.retain(|e| e.is_impl_trait);
684        }
685        graph.callers.get(&resolved_focus).map_or(0, |edges| {
686            edges
687                .iter()
688                .map(|e| &e.neighbor_name)
689                .collect::<std::collections::HashSet<_>>()
690                .len()
691        })
692    } else {
693        unfiltered_caller_count
694    };
695
696    Ok((
697        resolved_focus,
698        unfiltered_caller_count,
699        impl_trait_caller_count,
700    ))
701}
702
703/// Type alias for `compute_chains` return type: (`formatted_output`, `prod_chains`, `test_chains`, `outgoing_chains`, `def_count`).
704type ChainComputeResult = (
705    String,
706    Vec<InternalCallChain>,
707    Vec<InternalCallChain>,
708    Vec<InternalCallChain>,
709    usize,
710);
711
712/// Helper function to convert InternalCallChain data to CallChainEntry vec.
713/// Takes the first (depth-1) element of each chain and converts it to a CallChainEntry.
714/// Returns None if chains is empty, otherwise returns a vec of up to 10 entries.
715fn chains_to_entries(
716    chains: &[InternalCallChain],
717    root: Option<&std::path::Path>,
718) -> Option<Vec<CallChainEntry>> {
719    if chains.is_empty() {
720        return None;
721    }
722    let entries: Vec<CallChainEntry> = chains
723        .iter()
724        .take(10)
725        .filter_map(|chain| {
726            let (symbol, path, line) = chain.chain.first()?;
727            let file = match root {
728                Some(root) => path
729                    .strip_prefix(root)
730                    .unwrap_or(path.as_path())
731                    .to_string_lossy()
732                    .into_owned(),
733                None => path.to_string_lossy().into_owned(),
734            };
735            Some(CallChainEntry {
736                symbol: symbol.clone(),
737                file,
738                line: *line,
739            })
740        })
741        .collect();
742    if entries.is_empty() {
743        None
744    } else {
745        Some(entries)
746    }
747}
748
749/// Phase 4: Compute chains and format output.
750fn compute_chains(
751    graph: &CallGraph,
752    resolved_focus: &str,
753    root: &Path,
754    params: &InternalFocusedParams,
755    unfiltered_caller_count: usize,
756    impl_trait_caller_count: usize,
757    def_use_sites: &[crate::types::DefUseSite],
758) -> Result<ChainComputeResult, AnalyzeError> {
759    // Compute chain data for pagination (always, regardless of summary mode)
760    let def_count = graph.definitions.get(resolved_focus).map_or(0, Vec::len);
761    let incoming_chains = graph.find_incoming_chains(resolved_focus, params.follow_depth)?;
762    let outgoing_chains = graph.find_outgoing_chains(resolved_focus, params.follow_depth)?;
763
764    let (prod_chains, test_chains): (Vec<_>, Vec<_>) =
765        incoming_chains.iter().cloned().partition(|chain| {
766            chain
767                .chain
768                .first()
769                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
770        });
771
772    // Format output with pre-computed chains
773    let mut formatted = if params.use_summary {
774        format_focused_summary_internal(
775            graph,
776            resolved_focus,
777            params.follow_depth,
778            Some(root),
779            Some(&incoming_chains),
780            Some(&outgoing_chains),
781            def_use_sites,
782        )?
783    } else {
784        format_focused_internal(
785            graph,
786            resolved_focus,
787            params.follow_depth,
788            Some(root),
789            Some(&incoming_chains),
790            Some(&outgoing_chains),
791            def_use_sites,
792        )?
793    };
794
795    // Add FILTER header if impl_only filter was applied
796    if params.impl_only.unwrap_or(false) {
797        let filter_header = format!(
798            "FILTER: impl_only=true ({impl_trait_caller_count} of {unfiltered_caller_count} callers shown)\n",
799        );
800        formatted = format!("{filter_header}{formatted}");
801    }
802
803    Ok((
804        formatted,
805        prod_chains,
806        test_chains,
807        outgoing_chains,
808        def_count,
809    ))
810}
811
812/// Analyze a symbol's call graph across a directory with progress tracking.
813// public API; callers expect owned semantics
814#[allow(clippy::needless_pass_by_value)]
815pub fn analyze_focused_with_progress(
816    root: &Path,
817    params: &FocusedAnalysisConfig,
818    progress: Arc<AtomicUsize>,
819    ct: CancellationToken,
820) -> Result<FocusedAnalysisOutput, AnalyzeError> {
821    let entries = walk_directory(root, params.max_depth)?;
822    let internal_params = InternalFocusedParams {
823        focus: params.focus.clone(),
824        match_mode: params.match_mode.clone(),
825        follow_depth: params.follow_depth,
826        ast_recursion_limit: params.ast_recursion_limit,
827        use_summary: params.use_summary,
828        impl_only: params.impl_only,
829        def_use: params.def_use,
830        parse_timeout_micros: params.parse_timeout_micros,
831    };
832    analyze_focused_with_progress_with_entries_internal(
833        root,
834        params.max_depth,
835        &progress,
836        &ct,
837        &internal_params,
838        &entries,
839    )
840}
841
842/// Internal implementation of focused analysis using pre-walked entries and params struct.
843#[instrument(skip_all, fields(path = %root.display(), symbol = %params.focus))]
844fn analyze_focused_with_progress_with_entries_internal(
845    root: &Path,
846    _max_depth: Option<u32>,
847    progress: &Arc<AtomicUsize>,
848    ct: &CancellationToken,
849    params: &InternalFocusedParams,
850    entries: &[WalkEntry],
851) -> Result<FocusedAnalysisOutput, AnalyzeError> {
852    // Check if already cancelled
853    if ct.is_cancelled() {
854        return Err(AnalyzeError::Cancelled);
855    }
856
857    // Check if path is a file (hint to use directory)
858    if root.is_file() {
859        let formatted =
860            "Single-file focus not supported. Please provide a directory path for cross-file call graph analysis.\n"
861                .to_string();
862        return Ok(FocusedAnalysisOutput {
863            formatted,
864            next_cursor: None,
865            prod_chains: vec![],
866            test_chains: vec![],
867            outgoing_chains: vec![],
868            def_count: 0,
869            unfiltered_caller_count: 0,
870            impl_trait_caller_count: 0,
871            callers: None,
872            test_callers: None,
873            callees: None,
874            def_use_sites: vec![],
875        });
876    }
877
878    // Phase 1: Collect file analysis
879    let (analysis_results, all_impl_traits) = collect_file_analysis(
880        entries,
881        progress,
882        ct,
883        params.ast_recursion_limit,
884        params.parse_timeout_micros,
885    )?;
886
887    // Check for cancellation before building the call graph (phase 2)
888    if ct.is_cancelled() {
889        return Err(AnalyzeError::Cancelled);
890    }
891
892    // Phase 2: Build call graph
893    let mut graph = build_call_graph(analysis_results, &all_impl_traits)?;
894
895    // Check for cancellation before resolving the symbol (phase 3)
896    if ct.is_cancelled() {
897        return Err(AnalyzeError::Cancelled);
898    }
899
900    // Phase 3: Resolve symbol and apply impl_only filter.
901    // When def_use=true and the symbol is not in the call graph (e.g. a variable),
902    // fall through to def-use extraction instead of returning SymbolNotFound.
903    let resolve_result = resolve_symbol(&mut graph, params);
904    if let Err(AnalyzeError::Graph(crate::graph::GraphError::SymbolNotFound { .. })) =
905        &resolve_result
906    {
907        // Deliberately not collapsed: resolve_result must stay alive past this block
908        // so that the `?` below can propagate non-SymbolNotFound errors.
909        if params.def_use {
910            let def_use_sites =
911                collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct);
912            if def_use_sites.is_empty() {
913                // Symbol not found anywhere (neither in call graph nor as def/use site).
914                // Propagate the original SymbolNotFound error instead of returning an
915                // empty success response.
916                return Err(resolve_result.unwrap_err());
917            }
918            use std::fmt::Write as _;
919            let mut formatted = String::new();
920            let _ = writeln!(
921                formatted,
922                "FOCUS: {} (0 defs, 0 callers, 0 callees)",
923                params.focus
924            );
925            {
926                let writes = def_use_sites
927                    .iter()
928                    .filter(|s| {
929                        matches!(
930                            s.kind,
931                            crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
932                        )
933                    })
934                    .count();
935                let reads = def_use_sites
936                    .iter()
937                    .filter(|s| s.kind == crate::types::DefUseKind::Read)
938                    .count();
939                let _ = writeln!(
940                    formatted,
941                    "DEF-USE SITES  {}  ({} total: {} writes, {} reads)",
942                    params.focus,
943                    def_use_sites.len(),
944                    writes,
945                    reads
946                );
947            }
948            return Ok(FocusedAnalysisOutput {
949                formatted,
950                next_cursor: None,
951                callers: None,
952                test_callers: None,
953                callees: None,
954                prod_chains: vec![],
955                test_chains: vec![],
956                outgoing_chains: vec![],
957                def_count: 0,
958                unfiltered_caller_count: 0,
959                impl_trait_caller_count: 0,
960                def_use_sites,
961            });
962        }
963    }
964    let (resolved_focus, unfiltered_caller_count, impl_trait_caller_count) = resolve_result?;
965
966    // Check for cancellation before computing chains (phase 4)
967    if ct.is_cancelled() {
968        return Err(AnalyzeError::Cancelled);
969    }
970
971    // Phase 5 (optional, before formatting): Def-use site extraction.
972    // Use params.focus (the raw user-supplied string) rather than resolved_focus
973    // so that variable/field names that are not in the call graph still work.
974    let def_use_sites = if params.def_use {
975        collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct)
976    } else {
977        Vec::new()
978    };
979
980    // Phase 4: Compute chains and format output (includes def_use_sites in one pass)
981    let (formatted, prod_chains, test_chains, outgoing_chains, def_count) = compute_chains(
982        &graph,
983        &resolved_focus,
984        root,
985        params,
986        unfiltered_caller_count,
987        impl_trait_caller_count,
988        &def_use_sites,
989    )?;
990
991    // Compute depth-1 chains for structured output fields (always direct relationships only,
992    // regardless of `follow_depth` used for the text-formatted output).
993    let (depth1_callers, depth1_test_callers, depth1_callees) = if params.follow_depth <= 1 {
994        // Chains already at depth 1; reuse the partitioned vecs.
995        let callers = chains_to_entries(&prod_chains, Some(root));
996        let test_callers = chains_to_entries(&test_chains, Some(root));
997        let callees = chains_to_entries(&outgoing_chains, Some(root));
998        (callers, test_callers, callees)
999    } else {
1000        // follow_depth > 1: re-query at depth 1 to get only direct edges.
1001        let incoming1 = graph
1002            .find_incoming_chains(&resolved_focus, 1)
1003            .unwrap_or_default();
1004        let outgoing1 = graph
1005            .find_outgoing_chains(&resolved_focus, 1)
1006            .unwrap_or_default();
1007        let (prod1, test1): (Vec<_>, Vec<_>) = incoming1.into_iter().partition(|chain| {
1008            chain
1009                .chain
1010                .first()
1011                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
1012        });
1013        let callers = chains_to_entries(&prod1, Some(root));
1014        let test_callers = chains_to_entries(&test1, Some(root));
1015        let callees = chains_to_entries(&outgoing1, Some(root));
1016        (callers, test_callers, callees)
1017    };
1018
1019    Ok(FocusedAnalysisOutput {
1020        formatted,
1021        next_cursor: None,
1022        callers: depth1_callers,
1023        test_callers: depth1_test_callers,
1024        callees: depth1_callees,
1025        prod_chains,
1026        test_chains,
1027        outgoing_chains,
1028        def_count,
1029        unfiltered_caller_count,
1030        impl_trait_caller_count,
1031        def_use_sites,
1032    })
1033}
1034
1035/// Phase 5: Extract def-use sites for `symbol` across all entries.
1036/// Writes go before reads; within each kind ordered by file, line, then column.
1037fn collect_def_use_sites(
1038    entries: &[WalkEntry],
1039    symbol: &str,
1040    ast_recursion_limit: Option<usize>,
1041    root: &std::path::Path,
1042    ct: &CancellationToken,
1043) -> Vec<crate::types::DefUseSite> {
1044    use crate::parser::SemanticExtractor;
1045
1046    let file_entries: Vec<&WalkEntry> = entries
1047        .iter()
1048        .filter(|e| !e.is_dir && !e.is_symlink)
1049        .collect();
1050
1051    let mut sites: Vec<crate::types::DefUseSite> = file_entries
1052        .par_iter()
1053        .filter_map(|entry| {
1054            if ct.is_cancelled() {
1055                return None;
1056            }
1057
1058            // Check file size before reading
1059            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1060                tracing::debug!("skipping large file: {}", entry.path.display());
1061                return None;
1062            }
1063
1064            let Ok(source) = std::fs::read_to_string(&entry.path) else {
1065                return None;
1066            };
1067            let ext = entry
1068                .path
1069                .extension()
1070                .and_then(|e| e.to_str())
1071                .unwrap_or("");
1072            let lang = crate::lang::language_for_extension(ext)?;
1073            let file_path = entry
1074                .path
1075                .strip_prefix(root)
1076                .unwrap_or(&entry.path)
1077                .display()
1078                .to_string();
1079            let sites = SemanticExtractor::extract_def_use_for_file(
1080                &source,
1081                lang,
1082                symbol,
1083                &file_path,
1084                ast_recursion_limit,
1085            );
1086            if sites.is_empty() { None } else { Some(sites) }
1087        })
1088        .flatten()
1089        .collect();
1090
1091    // Writes before reads; within each kind: file, line, then column for deterministic order
1092    sites.sort_by(|a, b| {
1093        use crate::types::DefUseKind;
1094        let kind_ord = |k: &DefUseKind| match k {
1095            DefUseKind::Write | DefUseKind::WriteRead => 0,
1096            DefUseKind::Read => 1,
1097        };
1098        kind_ord(&a.kind)
1099            .cmp(&kind_ord(&b.kind))
1100            .then_with(|| a.file.cmp(&b.file))
1101            .then_with(|| a.line.cmp(&b.line))
1102            .then_with(|| a.column.cmp(&b.column))
1103    });
1104
1105    sites
1106}
1107
1108/// Analyze a symbol's call graph using pre-walked directory entries.
1109pub fn analyze_focused_with_progress_with_entries(
1110    root: &Path,
1111    params: &FocusedAnalysisConfig,
1112    progress: &Arc<AtomicUsize>,
1113    ct: &CancellationToken,
1114    entries: &[WalkEntry],
1115) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1116    let internal_params = InternalFocusedParams {
1117        focus: params.focus.clone(),
1118        match_mode: params.match_mode.clone(),
1119        follow_depth: params.follow_depth,
1120        ast_recursion_limit: params.ast_recursion_limit,
1121        use_summary: params.use_summary,
1122        impl_only: params.impl_only,
1123        def_use: params.def_use,
1124        parse_timeout_micros: params.parse_timeout_micros,
1125    };
1126    analyze_focused_with_progress_with_entries_internal(
1127        root,
1128        params.max_depth,
1129        progress,
1130        ct,
1131        &internal_params,
1132        entries,
1133    )
1134}
1135
1136#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
1137pub fn analyze_focused(
1138    root: &Path,
1139    focus: &str,
1140    follow_depth: u32,
1141    max_depth: Option<u32>,
1142    ast_recursion_limit: Option<usize>,
1143) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1144    let entries = walk_directory(root, max_depth)?;
1145    let counter = Arc::new(AtomicUsize::new(0));
1146    let ct = CancellationToken::new();
1147    let params = FocusedAnalysisConfig {
1148        focus: focus.to_string(),
1149        match_mode: SymbolMatchMode::Exact,
1150        follow_depth,
1151        max_depth,
1152        ast_recursion_limit,
1153        use_summary: false,
1154        impl_only: None,
1155        def_use: false,
1156        parse_timeout_micros: None,
1157    };
1158    analyze_focused_with_progress_with_entries(root, &params, &counter, &ct, &entries)
1159}
1160
1161/// Analyze a single file and return a minimal fixed schema (name, line count, language,
1162/// functions, imports) for lightweight code understanding.
1163#[instrument(skip_all, fields(path))]
1164pub fn analyze_module_file(path: &str) -> Result<crate::types::ModuleInfo, AnalyzeError> {
1165    // Check file size before reading
1166    if Path::new(path).metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1167        tracing::debug!("skipping large file: {}", path);
1168        return Err(AnalyzeError::Parser(
1169            crate::parser::ParserError::ParseError("file too large".to_string()),
1170        ));
1171    }
1172
1173    let source = std::fs::read_to_string(path)
1174        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
1175
1176    let file_path = Path::new(path);
1177    let name = file_path
1178        .file_name()
1179        .and_then(|s| s.to_str())
1180        .unwrap_or("unknown")
1181        .to_string();
1182
1183    let line_count = source.lines().count();
1184
1185    let language = file_path
1186        .extension()
1187        .and_then(|e| e.to_str())
1188        .and_then(language_for_extension)
1189        .ok_or_else(|| {
1190            AnalyzeError::Parser(crate::parser::ParserError::ParseError(
1191                "unsupported or missing file extension".to_string(),
1192            ))
1193        })?;
1194
1195    let semantic = SemanticExtractor::extract(&source, language, None, None)?;
1196
1197    let functions = semantic
1198        .functions
1199        .into_iter()
1200        .map(|f| crate::types::ModuleFunctionInfo {
1201            name: f.name,
1202            line: f.line,
1203        })
1204        .collect();
1205
1206    let imports = semantic
1207        .imports
1208        .into_iter()
1209        .map(|i| crate::types::ModuleImportInfo {
1210            module: i.module,
1211            items: i.items,
1212        })
1213        .collect();
1214
1215    Ok(crate::types::ModuleInfo {
1216        name,
1217        line_count,
1218        language: language.to_string(),
1219        functions,
1220        imports,
1221    })
1222}
1223
1224/// Scan a directory for files that import a given module path.
1225///
1226/// For each non-directory walk entry, extracts imports via [`SemanticExtractor`] and
1227/// checks whether `module` matches `ImportInfo.module` or appears in `ImportInfo.items`.
1228/// Returns a [`FocusedAnalysisOutput`] whose `formatted` field lists matching files.
1229pub fn analyze_import_lookup(
1230    root: &Path,
1231    module: &str,
1232    entries: &[WalkEntry],
1233    ast_recursion_limit: Option<usize>,
1234) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1235    let matches: Vec<(PathBuf, usize)> = entries
1236        .par_iter()
1237        .filter_map(|entry| {
1238            if entry.is_dir || entry.is_symlink {
1239                tracing::debug!("skipping symlink: {}", entry.path.display());
1240                return None;
1241            }
1242            let ext = entry
1243                .path
1244                .extension()
1245                .and_then(|e| e.to_str())
1246                .and_then(crate::lang::language_for_extension)?;
1247            let source = std::fs::read_to_string(&entry.path).ok()?;
1248            let semantic =
1249                SemanticExtractor::extract(&source, ext, ast_recursion_limit, None).ok()?;
1250            for import in &semantic.imports {
1251                if import.module == module || import.items.iter().any(|item| item == module) {
1252                    return Some((entry.path.clone(), import.line));
1253                }
1254            }
1255            None
1256        })
1257        .collect();
1258
1259    let mut text = format!("IMPORT_LOOKUP: {module}\n");
1260    text.push_str(&format!("ROOT: {}\n", root.display()));
1261    text.push_str(&format!("MATCHES: {}\n", matches.len()));
1262    for (path, line) in &matches {
1263        let rel = path.strip_prefix(root).unwrap_or(path);
1264        text.push_str(&format!("  {}:{line}\n", rel.display()));
1265    }
1266
1267    Ok(FocusedAnalysisOutput {
1268        formatted: text,
1269        next_cursor: None,
1270        prod_chains: vec![],
1271        test_chains: vec![],
1272        outgoing_chains: vec![],
1273        def_count: 0,
1274        unfiltered_caller_count: 0,
1275        impl_trait_caller_count: 0,
1276        callers: None,
1277        test_callers: None,
1278        callees: None,
1279        def_use_sites: vec![],
1280    })
1281}
1282
1283/// Resolve Python wildcard imports to actual symbol names.
1284///
1285/// For each import with items=`["*"]`, this function:
1286/// 1. Parses the relative dots (if any) and climbs the directory tree
1287/// 2. Finds the target .py file or __init__.py
1288/// 3. Extracts symbols (functions and classes) from the target
1289/// 4. Honors __all__ if defined, otherwise uses function+class names
1290///
1291/// All resolution failures are non-fatal: debug-logged and the wildcard is preserved.
1292fn resolve_wildcard_imports(file_path: &Path, imports: &mut [ImportInfo]) {
1293    use std::collections::HashMap;
1294
1295    let mut resolved_cache: HashMap<PathBuf, Vec<String>> = HashMap::new();
1296    let Ok(file_path_canonical) = file_path.canonicalize() else {
1297        tracing::debug!(file = ?file_path, "unable to canonicalize current file path");
1298        return;
1299    };
1300
1301    for import in imports.iter_mut() {
1302        if import.items != ["*"] {
1303            continue;
1304        }
1305        resolve_single_wildcard(import, file_path, &file_path_canonical, &mut resolved_cache);
1306    }
1307}
1308
1309/// Resolve one wildcard import in place. On any failure the import is left unchanged.
1310fn resolve_single_wildcard(
1311    import: &mut ImportInfo,
1312    file_path: &Path,
1313    file_path_canonical: &Path,
1314    resolved_cache: &mut std::collections::HashMap<PathBuf, Vec<String>>,
1315) {
1316    let module = import.module.clone();
1317    let dot_count = module.chars().take_while(|c| *c == '.').count();
1318    if dot_count == 0 {
1319        return;
1320    }
1321    let module_path = module.trim_start_matches('.');
1322
1323    let Some(target_to_read) = locate_target_file(file_path, dot_count, module_path, &module)
1324    else {
1325        return;
1326    };
1327
1328    let Ok(canonical) = target_to_read.canonicalize() else {
1329        tracing::debug!(target = ?target_to_read, import = %module, "unable to canonicalize path");
1330        return;
1331    };
1332
1333    if canonical == file_path_canonical {
1334        tracing::debug!(target = ?canonical, import = %module, "cannot import from self");
1335        return;
1336    }
1337
1338    if let Some(cached) = resolved_cache.get(&canonical) {
1339        tracing::debug!(import = %module, symbols_count = cached.len(), "using cached symbols");
1340        import.items.clone_from(cached);
1341        return;
1342    }
1343
1344    if let Some(symbols) = parse_target_symbols(&target_to_read, &module) {
1345        tracing::debug!(import = %module, resolved_count = symbols.len(), "wildcard import resolved");
1346        import.items.clone_from(&symbols);
1347        resolved_cache.insert(canonical, symbols);
1348    }
1349}
1350
1351/// Locate the .py file that a wildcard import refers to. Returns None if not found.
1352fn locate_target_file(
1353    file_path: &Path,
1354    dot_count: usize,
1355    module_path: &str,
1356    module: &str,
1357) -> Option<PathBuf> {
1358    let mut target_dir = file_path.parent()?.to_path_buf();
1359
1360    for _ in 1..dot_count {
1361        if !target_dir.pop() {
1362            tracing::debug!(import = %module, "unable to climb {} levels", dot_count.saturating_sub(1));
1363            return None;
1364        }
1365    }
1366
1367    let target_file = if module_path.is_empty() {
1368        target_dir.join("__init__.py")
1369    } else {
1370        let rel_path = module_path.replace('.', "/");
1371        target_dir.join(format!("{rel_path}.py"))
1372    };
1373
1374    if target_file.exists() {
1375        Some(target_file)
1376    } else if target_file.with_extension("").is_dir() {
1377        let init = target_file.with_extension("").join("__init__.py");
1378        if init.exists() { Some(init) } else { None }
1379    } else {
1380        tracing::debug!(target = ?target_file, import = %module, "target file not found");
1381        None
1382    }
1383}
1384
1385/// Read and parse a target .py file, returning its exported symbols.
1386fn parse_target_symbols(target_path: &Path, module: &str) -> Option<Vec<String>> {
1387    use tree_sitter::Parser;
1388
1389    // Check file size before reading
1390    if target_path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1391        tracing::debug!("skipping large file: {}", target_path.display());
1392        return None;
1393    }
1394
1395    let source = match std::fs::read_to_string(target_path) {
1396        Ok(s) => s,
1397        Err(e) => {
1398            tracing::debug!(target = ?target_path, import = %module, error = %e, "unable to read target file");
1399            return None;
1400        }
1401    };
1402
1403    // Parse once with tree-sitter
1404    let lang_info = crate::languages::get_language_info("python")?;
1405    let mut parser = Parser::new();
1406    if parser.set_language(&lang_info.language).is_err() {
1407        return None;
1408    }
1409    let tree = parser.parse(&source, None)?;
1410
1411    // First, try to extract __all__ from the same tree
1412    let mut symbols = Vec::new();
1413    extract_all_from_tree(&tree, &source, &mut symbols);
1414    if !symbols.is_empty() {
1415        tracing::debug!(import = %module, symbols = ?symbols, "using __all__ symbols");
1416        return Some(symbols);
1417    }
1418
1419    // Fallback: extract functions/classes from the tree
1420    let root = tree.root_node();
1421    let mut cursor = root.walk();
1422    for child in root.children(&mut cursor) {
1423        if matches!(child.kind(), "function_definition" | "class_definition")
1424            && let Some(name_node) = child.child_by_field_name("name")
1425        {
1426            let name = source[name_node.start_byte()..name_node.end_byte()].to_string();
1427            if !name.starts_with('_') {
1428                symbols.push(name);
1429            }
1430        }
1431    }
1432    tracing::debug!(import = %module, fallback_symbols = ?symbols, "using fallback function/class names");
1433    Some(symbols)
1434}
1435
1436/// Extract __all__ from a tree-sitter tree.
1437fn extract_all_from_tree(tree: &tree_sitter::Tree, source: &str, result: &mut Vec<String>) {
1438    let root = tree.root_node();
1439    let mut cursor = root.walk();
1440    for child in root.children(&mut cursor) {
1441        if child.kind() == "simple_statement" {
1442            // simple_statement contains assignment and other statement types
1443            let mut simple_cursor = child.walk();
1444            for simple_child in child.children(&mut simple_cursor) {
1445                if simple_child.kind() == "assignment"
1446                    && let Some(left) = simple_child.child_by_field_name("left")
1447                {
1448                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1449                    if target_text == "__all__"
1450                        && let Some(right) = simple_child.child_by_field_name("right")
1451                    {
1452                        extract_string_list_from_list_node(&right, source, result);
1453                    }
1454                }
1455            }
1456        } else if child.kind() == "expression_statement" {
1457            // Fallback for older Python AST structures
1458            let mut stmt_cursor = child.walk();
1459            for stmt_child in child.children(&mut stmt_cursor) {
1460                if stmt_child.kind() == "assignment"
1461                    && let Some(left) = stmt_child.child_by_field_name("left")
1462                {
1463                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1464                    if target_text == "__all__"
1465                        && let Some(right) = stmt_child.child_by_field_name("right")
1466                    {
1467                        extract_string_list_from_list_node(&right, source, result);
1468                    }
1469                }
1470            }
1471        }
1472    }
1473}
1474
1475/// Extract string literals from a Python list node.
1476fn extract_string_list_from_list_node(
1477    list_node: &tree_sitter::Node,
1478    source: &str,
1479    result: &mut Vec<String>,
1480) {
1481    let mut cursor = list_node.walk();
1482    for child in list_node.named_children(&mut cursor) {
1483        if child.kind() == "string" {
1484            let raw = source[child.start_byte()..child.end_byte()].trim();
1485            // Strip quotes: "name" -> name
1486            let unquoted = raw.trim_matches('"').trim_matches('\'').to_string();
1487            if !unquoted.is_empty() {
1488                result.push(unquoted);
1489            }
1490        }
1491    }
1492}
1493
1494/// Read a file and return its raw content with line numbers for a specified range.
1495#[cfg(test)]
1496mod tests {
1497    use super::*;
1498    use crate::formatter::format_focused_paginated;
1499    use crate::graph::InternalCallChain;
1500    use crate::pagination::{PaginationMode, decode_cursor, paginate_slice};
1501    use std::fs;
1502    use std::path::PathBuf;
1503    use tempfile::TempDir;
1504
1505    #[cfg(feature = "lang-rust")]
1506    #[test]
1507    fn analyze_str_rust_happy_path() {
1508        let source = "fn hello() -> i32 { 42 }";
1509        let result = analyze_str(source, "rs", None);
1510        assert!(result.is_ok());
1511    }
1512
1513    #[cfg(feature = "lang-python")]
1514    #[test]
1515    fn analyze_str_python_happy_path() {
1516        let source = "def greet(name):\n    return f'Hello {name}'";
1517        let result = analyze_str(source, "py", None);
1518        assert!(result.is_ok());
1519    }
1520
1521    #[cfg(feature = "lang-rust")]
1522    #[test]
1523    fn analyze_str_rust_by_language_name() {
1524        let source = "fn hello() -> i32 { 42 }";
1525        let result = analyze_str(source, "rust", None);
1526        assert!(result.is_ok());
1527    }
1528
1529    #[cfg(feature = "lang-python")]
1530    #[test]
1531    fn analyze_str_python_by_language_name() {
1532        let source = "def greet(name):\n    return f'Hello {name}'";
1533        let result = analyze_str(source, "python", None);
1534        assert!(result.is_ok());
1535    }
1536
1537    #[cfg(feature = "lang-rust")]
1538    #[test]
1539    fn analyze_str_rust_mixed_case() {
1540        let source = "fn hello() -> i32 { 42 }";
1541        let result = analyze_str(source, "RuSt", None);
1542        assert!(result.is_ok());
1543    }
1544
1545    #[cfg(feature = "lang-python")]
1546    #[test]
1547    fn analyze_str_python_mixed_case() {
1548        let source = "def greet(name):\n    return f'Hello {name}'";
1549        let result = analyze_str(source, "PyThOn", None);
1550        assert!(result.is_ok());
1551    }
1552
1553    #[test]
1554    fn analyze_str_unsupported_language() {
1555        let result = analyze_str("code", "brainfuck", None);
1556        assert!(
1557            matches!(result, Err(AnalyzeError::UnsupportedLanguage(lang)) if lang == "brainfuck")
1558        );
1559    }
1560
1561    #[cfg(feature = "lang-rust")]
1562    #[test]
1563    fn test_symbol_focus_callers_pagination_first_page() {
1564        let temp_dir = TempDir::new().unwrap();
1565
1566        // Create a file with many callers of `target`
1567        let mut code = String::from("fn target() {}\n");
1568        for i in 0..15 {
1569            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1570        }
1571        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1572
1573        // Act
1574        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1575
1576        // Paginate prod callers with page_size=5
1577        let paginated = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1578            .expect("paginate failed");
1579        assert!(
1580            paginated.total >= 5,
1581            "should have enough callers to paginate"
1582        );
1583        assert!(
1584            paginated.next_cursor.is_some(),
1585            "should have next_cursor for page 1"
1586        );
1587
1588        // Verify cursor encodes callers mode
1589        assert_eq!(paginated.items.len(), 5);
1590    }
1591
1592    #[test]
1593    fn test_symbol_focus_callers_pagination_second_page() {
1594        let temp_dir = TempDir::new().unwrap();
1595
1596        let mut code = String::from("fn target() {}\n");
1597        for i in 0..12 {
1598            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1599        }
1600        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1601
1602        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1603        let total_prod = output.prod_chains.len();
1604
1605        if total_prod > 5 {
1606            // Get page 1 cursor
1607            let p1 = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1608                .expect("paginate failed");
1609            assert!(p1.next_cursor.is_some());
1610
1611            let cursor_str = p1.next_cursor.unwrap();
1612            let cursor_data = decode_cursor(&cursor_str).expect("decode failed");
1613
1614            // Get page 2
1615            let p2 = paginate_slice(
1616                &output.prod_chains,
1617                cursor_data.offset,
1618                5,
1619                PaginationMode::Callers,
1620            )
1621            .expect("paginate failed");
1622
1623            // Format paginated output
1624            let formatted = format_focused_paginated(
1625                &p2.items,
1626                total_prod,
1627                PaginationMode::Callers,
1628                "target",
1629                &output.prod_chains,
1630                &output.test_chains,
1631                &output.outgoing_chains,
1632                output.def_count,
1633                cursor_data.offset,
1634                Some(temp_dir.path()),
1635                true,
1636            );
1637
1638            // Assert: header shows correct range for page 2
1639            let expected_start = cursor_data.offset + 1;
1640            assert!(
1641                formatted.contains(&format!("CALLERS ({}", expected_start)),
1642                "header should show page 2 range, got: {}",
1643                formatted
1644            );
1645        }
1646    }
1647
1648    #[test]
1649    fn test_chains_to_entries_empty_returns_none() {
1650        // Arrange
1651        let chains: Vec<InternalCallChain> = vec![];
1652
1653        // Act
1654        let result = chains_to_entries(&chains, None);
1655
1656        // Assert
1657        assert!(result.is_none());
1658    }
1659
1660    #[test]
1661    fn test_chains_to_entries_with_data_returns_entries() {
1662        // Arrange
1663        let chains = vec![
1664            InternalCallChain {
1665                chain: vec![("caller1".to_string(), PathBuf::from("/root/lib.rs"), 10)],
1666            },
1667            InternalCallChain {
1668                chain: vec![("caller2".to_string(), PathBuf::from("/root/other.rs"), 20)],
1669            },
1670        ];
1671        let root = PathBuf::from("/root");
1672
1673        // Act
1674        let result = chains_to_entries(&chains, Some(root.as_path()));
1675
1676        // Assert
1677        assert!(result.is_some());
1678        let entries = result.unwrap();
1679        assert_eq!(entries.len(), 2);
1680        assert_eq!(entries[0].symbol, "caller1");
1681        assert_eq!(entries[0].file, "lib.rs");
1682        assert_eq!(entries[0].line, 10);
1683        assert_eq!(entries[1].symbol, "caller2");
1684        assert_eq!(entries[1].file, "other.rs");
1685        assert_eq!(entries[1].line, 20);
1686    }
1687
1688    #[test]
1689    fn test_symbol_focus_callees_pagination() {
1690        let temp_dir = TempDir::new().unwrap();
1691
1692        // target calls many functions
1693        let mut code = String::from("fn target() {\n");
1694        for i in 0..10 {
1695            code.push_str(&format!("    callee_{:02}();\n", i));
1696        }
1697        code.push_str("}\n");
1698        for i in 0..10 {
1699            code.push_str(&format!("fn callee_{:02}() {{}}\n", i));
1700        }
1701        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1702
1703        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1704        let total_callees = output.outgoing_chains.len();
1705
1706        if total_callees > 3 {
1707            let paginated = paginate_slice(&output.outgoing_chains, 0, 3, PaginationMode::Callees)
1708                .expect("paginate failed");
1709
1710            let formatted = format_focused_paginated(
1711                &paginated.items,
1712                total_callees,
1713                PaginationMode::Callees,
1714                "target",
1715                &output.prod_chains,
1716                &output.test_chains,
1717                &output.outgoing_chains,
1718                output.def_count,
1719                0,
1720                Some(temp_dir.path()),
1721                true,
1722            );
1723
1724            assert!(
1725                formatted.contains(&format!(
1726                    "CALLEES (1-{} of {})",
1727                    paginated.items.len(),
1728                    total_callees
1729                )),
1730                "header should show callees range, got: {}",
1731                formatted
1732            );
1733        }
1734    }
1735
1736    #[test]
1737    fn test_symbol_focus_empty_prod_callers() {
1738        let temp_dir = TempDir::new().unwrap();
1739
1740        // target is only called from test functions
1741        let code = r#"
1742fn target() {}
1743
1744#[cfg(test)]
1745mod tests {
1746    use super::*;
1747    #[test]
1748    fn test_something() { target(); }
1749}
1750"#;
1751        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1752
1753        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1754
1755        // prod_chains may be empty; pagination should handle it gracefully
1756        let paginated = paginate_slice(&output.prod_chains, 0, 100, PaginationMode::Callers)
1757            .expect("paginate failed");
1758        assert_eq!(paginated.items.len(), output.prod_chains.len());
1759        assert!(
1760            paginated.next_cursor.is_none(),
1761            "no next_cursor for empty or single-page prod_chains"
1762        );
1763    }
1764
1765    #[test]
1766    fn test_impl_only_filter_header_correct_counts() {
1767        let temp_dir = TempDir::new().unwrap();
1768
1769        // Create a Rust fixture with:
1770        // - A trait definition
1771        // - An impl Trait for SomeType block that calls the focus symbol
1772        // - A regular (non-trait-impl) function that also calls the focus symbol
1773        let code = r#"
1774trait MyTrait {
1775    fn focus_symbol();
1776}
1777
1778struct SomeType;
1779
1780impl MyTrait for SomeType {
1781    fn focus_symbol() {}
1782}
1783
1784fn impl_caller() {
1785    SomeType::focus_symbol();
1786}
1787
1788fn regular_caller() {
1789    SomeType::focus_symbol();
1790}
1791"#;
1792        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1793
1794        // Call analyze_focused with impl_only=Some(true)
1795        let params = FocusedAnalysisConfig {
1796            focus: "focus_symbol".to_string(),
1797            match_mode: SymbolMatchMode::Insensitive,
1798            follow_depth: 1,
1799            max_depth: None,
1800            ast_recursion_limit: None,
1801            use_summary: false,
1802            impl_only: Some(true),
1803            def_use: false,
1804            parse_timeout_micros: None,
1805        };
1806        let output = analyze_focused_with_progress(
1807            temp_dir.path(),
1808            &params,
1809            Arc::new(AtomicUsize::new(0)),
1810            CancellationToken::new(),
1811        )
1812        .unwrap();
1813
1814        // Assert the result contains "FILTER: impl_only=true"
1815        assert!(
1816            output.formatted.contains("FILTER: impl_only=true"),
1817            "formatted output should contain FILTER header for impl_only=true, got: {}",
1818            output.formatted
1819        );
1820
1821        // Assert the retained count N < total count M
1822        assert!(
1823            output.impl_trait_caller_count < output.unfiltered_caller_count,
1824            "impl_trait_caller_count ({}) should be less than unfiltered_caller_count ({})",
1825            output.impl_trait_caller_count,
1826            output.unfiltered_caller_count
1827        );
1828
1829        // Assert format is "FILTER: impl_only=true (N of M callers shown)"
1830        let filter_line = output
1831            .formatted
1832            .lines()
1833            .find(|line| line.contains("FILTER: impl_only=true"))
1834            .expect("should find FILTER line");
1835        assert!(
1836            filter_line.contains(&format!(
1837                "({} of {} callers shown)",
1838                output.impl_trait_caller_count, output.unfiltered_caller_count
1839            )),
1840            "FILTER line should show correct N of M counts, got: {}",
1841            filter_line
1842        );
1843    }
1844
1845    #[test]
1846    fn test_callers_count_matches_formatted_output() {
1847        let temp_dir = TempDir::new().unwrap();
1848
1849        // Create a file with multiple callers of `target`
1850        let code = r#"
1851fn target() {}
1852fn caller_a() { target(); }
1853fn caller_b() { target(); }
1854fn caller_c() { target(); }
1855"#;
1856        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1857
1858        // Analyze the symbol
1859        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1860
1861        // Extract CALLERS count from formatted output
1862        let formatted = &output.formatted;
1863        let callers_count_from_output = formatted
1864            .lines()
1865            .find(|line| line.contains("FOCUS:"))
1866            .and_then(|line| {
1867                line.split(',')
1868                    .find(|part| part.contains("callers"))
1869                    .and_then(|part| {
1870                        part.trim()
1871                            .split_whitespace()
1872                            .next()
1873                            .and_then(|s| s.parse::<usize>().ok())
1874                    })
1875            })
1876            .expect("should find CALLERS count in formatted output");
1877
1878        // Compute expected count from prod_chains (unique first-caller names)
1879        let expected_callers_count = output
1880            .prod_chains
1881            .iter()
1882            .filter_map(|chain| chain.chain.first().map(|(name, _, _)| name))
1883            .collect::<std::collections::HashSet<_>>()
1884            .len();
1885
1886        assert_eq!(
1887            callers_count_from_output, expected_callers_count,
1888            "CALLERS count in formatted output should match unique-first-caller count in prod_chains"
1889        );
1890    }
1891
1892    #[cfg(feature = "lang-rust")]
1893    #[test]
1894    fn test_def_use_focused_analysis() {
1895        let temp_dir = TempDir::new().unwrap();
1896        fs::write(
1897            temp_dir.path().join("lib.rs"),
1898            "fn example() {\n    let x = 10;\n    x += 1;\n    println!(\"{}\", x);\n    let y = x + 1;\n}\n",
1899        )
1900        .unwrap();
1901
1902        let entries = walk_directory(temp_dir.path(), None).unwrap();
1903        let counter = Arc::new(AtomicUsize::new(0));
1904        let ct = CancellationToken::new();
1905        let params = FocusedAnalysisConfig {
1906            focus: "x".to_string(),
1907            match_mode: SymbolMatchMode::Exact,
1908            follow_depth: 1,
1909            max_depth: None,
1910            ast_recursion_limit: None,
1911            use_summary: false,
1912            impl_only: None,
1913            def_use: true,
1914            parse_timeout_micros: None,
1915        };
1916
1917        let output = analyze_focused_with_progress_with_entries(
1918            temp_dir.path(),
1919            &params,
1920            &counter,
1921            &ct,
1922            &entries,
1923        )
1924        .expect("def_use analysis should succeed");
1925
1926        assert!(
1927            !output.def_use_sites.is_empty(),
1928            "should find def-use sites for x"
1929        );
1930        assert!(
1931            output
1932                .def_use_sites
1933                .iter()
1934                .any(|s| s.kind == crate::types::DefUseKind::Write),
1935            "should have at least one Write site",
1936        );
1937        // No location appears as both write and read
1938        let write_locs: std::collections::HashSet<_> = output
1939            .def_use_sites
1940            .iter()
1941            .filter(|s| {
1942                matches!(
1943                    s.kind,
1944                    crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
1945                )
1946            })
1947            .map(|s| (&s.file, s.line, s.column))
1948            .collect();
1949        assert!(
1950            output
1951                .def_use_sites
1952                .iter()
1953                .filter(|s| s.kind == crate::types::DefUseKind::Read)
1954                .all(|s| !write_locs.contains(&(&s.file, s.line, s.column))),
1955            "no location should appear as both write and read",
1956        );
1957        assert!(
1958            output.formatted.contains("DEF-USE SITES"),
1959            "formatted output should contain DEF-USE SITES"
1960        );
1961    }
1962
1963    fn make_temp_file(content: &str) -> tempfile::NamedTempFile {
1964        let mut f = tempfile::NamedTempFile::new().unwrap();
1965        use std::io::Write;
1966        f.write_all(content.as_bytes()).unwrap();
1967        f.flush().unwrap();
1968        f
1969    }
1970}
aptu_coder_core/analyze.rs

aptu_coder_core/
analyze.rs