aptu_coder_core/
analyze.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Main analysis engine for extracting code structure from files and directories.
4//!
5//! Implements the five MCP tools: `analyze_directory` (Overview), `analyze_file` (`FileDetails`),
6//! `analyze_symbol` (call graph), `analyze_module` (lightweight index), and `analyze_raw` (raw file read). Handles parallel processing and cancellation.
7
8use crate::formatter::{
9    format_file_details, format_focused_internal, format_focused_summary_internal, format_structure,
10};
11use crate::graph::{CallGraph, InternalCallChain};
12use crate::lang::{language_for_extension, supported_languages};
13use crate::parser::{ElementExtractor, SemanticExtractor};
14use crate::test_detection::is_test_file;
15use crate::traversal::{WalkEntry, walk_directory};
16use crate::types::{
17    AnalysisMode, FileInfo, ImplTraitInfo, ImportInfo, SemanticAnalysis, SymbolMatchMode,
18};
19use rayon::prelude::*;
20#[cfg(feature = "schemars")]
21use schemars::JsonSchema;
22use serde::{Deserialize, Serialize};
23use std::path::{Path, PathBuf};
24use std::sync::Arc;
25use std::sync::atomic::{AtomicUsize, Ordering};
26use std::time::Instant;
27use thiserror::Error;
28use tokio_util::sync::CancellationToken;
29use tracing::instrument;
30
31pub const MAX_FILE_SIZE_BYTES: u64 = 10_000_000;
32
33#[derive(Debug, Error)]
34#[non_exhaustive]
35pub enum AnalyzeError {
36    #[error("Traversal error: {0}")]
37    Traversal(#[from] crate::traversal::TraversalError),
38    #[error("Parser error: {0}")]
39    Parser(#[from] crate::parser::ParserError),
40    #[error("Graph error: {0}")]
41    Graph(#[from] crate::graph::GraphError),
42    #[error("Formatter error: {0}")]
43    Formatter(#[from] crate::formatter::FormatterError),
44    #[error("Analysis cancelled")]
45    Cancelled,
46    #[error("unsupported language: {0}")]
47    UnsupportedLanguage(String),
48    #[error("I/O error: {0}")]
49    Io(#[from] std::io::Error),
50    #[error("invalid range: start ({start}) > end ({end}); file has {total} lines")]
51    InvalidRange {
52        start: usize,
53        end: usize,
54        total: usize,
55    },
56    #[error("path is a directory, not a file: {0}")]
57    NotAFile(PathBuf),
58}
59
60/// Result of directory analysis containing both formatted output and file data.
61#[derive(Debug, Clone, Serialize)]
62#[cfg_attr(feature = "schemars", derive(JsonSchema))]
63#[non_exhaustive]
64pub struct AnalysisOutput {
65    #[cfg_attr(
66        feature = "schemars",
67        schemars(description = "Formatted text representation of the analysis")
68    )]
69    pub formatted: String,
70    #[cfg_attr(
71        feature = "schemars",
72        schemars(description = "List of files analyzed in the directory")
73    )]
74    pub files: Vec<FileInfo>,
75    /// Walk entries used internally for summary generation; not serialized.
76    #[serde(skip)]
77    #[cfg_attr(feature = "schemars", schemars(skip))]
78    pub entries: Vec<WalkEntry>,
79    /// Subtree file counts computed from an unbounded walk; used by `format_summary`; not serialized.
80    #[serde(skip)]
81    #[cfg_attr(feature = "schemars", schemars(skip))]
82    pub subtree_counts: Option<Vec<(std::path::PathBuf, usize)>>,
83    #[serde(skip_serializing_if = "Option::is_none")]
84    #[cfg_attr(
85        feature = "schemars",
86        schemars(
87            description = "Opaque cursor token for the next page of results (absent when no more results)"
88        )
89    )]
90    pub next_cursor: Option<String>,
91}
92
93/// Result of file-level semantic analysis.
94#[derive(Debug, Clone, Serialize)]
95#[cfg_attr(feature = "schemars", derive(JsonSchema))]
96#[non_exhaustive]
97pub struct FileAnalysisOutput {
98    #[cfg_attr(
99        feature = "schemars",
100        schemars(description = "Formatted text representation of the analysis")
101    )]
102    pub formatted: String,
103    #[cfg_attr(
104        feature = "schemars",
105        schemars(description = "Semantic analysis data including functions, classes, and imports")
106    )]
107    pub semantic: SemanticAnalysis,
108    #[cfg_attr(
109        feature = "schemars",
110        schemars(description = "Total line count of the analyzed file")
111    )]
112    #[cfg_attr(
113        feature = "schemars",
114        schemars(schema_with = "crate::schema_helpers::integer_schema")
115    )]
116    pub line_count: usize,
117    #[serde(skip_serializing_if = "Option::is_none")]
118    #[cfg_attr(
119        feature = "schemars",
120        schemars(
121            description = "Opaque cursor token for the next page of results (absent when no more results)"
122        )
123    )]
124    pub next_cursor: Option<String>,
125}
126
127impl FileAnalysisOutput {
128    /// Create a new `FileAnalysisOutput`.
129    #[must_use]
130    pub fn new(
131        formatted: String,
132        semantic: SemanticAnalysis,
133        line_count: usize,
134        next_cursor: Option<String>,
135    ) -> Self {
136        Self {
137            formatted,
138            semantic,
139            line_count,
140            next_cursor,
141        }
142    }
143}
144#[instrument(skip_all, fields(path = %root.display()))]
145// public API; callers expect owned semantics
146#[allow(clippy::needless_pass_by_value)]
147pub fn analyze_directory_with_progress(
148    root: &Path,
149    entries: Vec<WalkEntry>,
150    progress: Arc<AtomicUsize>,
151    ct: CancellationToken,
152) -> Result<AnalysisOutput, AnalyzeError> {
153    // Check if already cancelled
154    if ct.is_cancelled() {
155        return Err(AnalyzeError::Cancelled);
156    }
157
158    // Detect language from file extension
159    let file_entries: Vec<&WalkEntry> = entries
160        .iter()
161        .filter(|e| !e.is_dir && !e.is_symlink)
162        .collect();
163
164    let start = Instant::now();
165    tracing::debug!(file_count = file_entries.len(), root = %root.display(), "analysis start");
166
167    // Parallel analysis of files
168    let analysis_results: Vec<FileInfo> = file_entries
169        .par_iter()
170        .filter_map(|entry| {
171            // Check cancellation per file
172            if ct.is_cancelled() {
173                return None;
174            }
175
176            let path_str = entry.path.display().to_string();
177
178            // Detect language from extension
179            let ext = entry.path.extension().and_then(|e| e.to_str());
180
181            // Check file size before reading
182            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
183                tracing::debug!("skipping large file: {}", entry.path.display());
184                progress.fetch_add(1, Ordering::Relaxed);
185                return None;
186            }
187
188            // Try to read file content; skip binary or unreadable files
189            let Ok(source) = std::fs::read_to_string(&entry.path) else {
190                progress.fetch_add(1, Ordering::Relaxed);
191                return None;
192            };
193
194            // Count lines
195            let line_count = source.lines().count();
196
197            // Detect language and extract counts
198            let (language, function_count, class_count) = if let Some(ext_str) = ext {
199                if let Some(lang) = language_for_extension(ext_str) {
200                    let lang_str = lang.to_string();
201                    match ElementExtractor::extract_with_depth(&source, &lang_str) {
202                        Ok((func_count, class_count)) => (lang_str, func_count, class_count),
203                        Err(_) => (lang_str, 0, 0),
204                    }
205                } else {
206                    ("unknown".to_string(), 0, 0)
207                }
208            } else {
209                ("unknown".to_string(), 0, 0)
210            };
211
212            progress.fetch_add(1, Ordering::Relaxed);
213
214            let is_test = is_test_file(&entry.path);
215
216            Some(FileInfo {
217                path: path_str,
218                line_count,
219                function_count,
220                class_count,
221                language,
222                is_test,
223            })
224        })
225        .collect();
226
227    // Check if cancelled after parallel processing
228    if ct.is_cancelled() {
229        return Err(AnalyzeError::Cancelled);
230    }
231
232    tracing::debug!(
233        file_count = file_entries.len(),
234        duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
235        "analysis complete"
236    );
237
238    // Format output
239    let formatted = format_structure(&entries, &analysis_results, None);
240
241    Ok(AnalysisOutput {
242        formatted,
243        files: analysis_results,
244        entries,
245        next_cursor: None,
246        subtree_counts: None,
247    })
248}
249
250/// Analyze a directory structure and return formatted output and file data.
251#[instrument(skip_all, fields(path = %root.display()))]
252pub fn analyze_directory(
253    root: &Path,
254    max_depth: Option<u32>,
255) -> Result<AnalysisOutput, AnalyzeError> {
256    let entries = walk_directory(root, max_depth)?;
257    let counter = Arc::new(AtomicUsize::new(0));
258    let ct = CancellationToken::new();
259    analyze_directory_with_progress(root, entries, counter, ct)
260}
261
262/// Determine analysis mode based on parameters and path.
263#[must_use]
264pub fn determine_mode(path: &str, focus: Option<&str>) -> AnalysisMode {
265    if focus.is_some() {
266        return AnalysisMode::SymbolFocus;
267    }
268
269    let path_obj = Path::new(path);
270    if path_obj.is_dir() {
271        AnalysisMode::Overview
272    } else {
273        AnalysisMode::FileDetails
274    }
275}
276
277/// Analyze a single file and return semantic analysis with formatted output.
278#[instrument(skip_all, fields(path))]
279pub fn analyze_file(
280    path: &str,
281    ast_recursion_limit: Option<usize>,
282) -> Result<FileAnalysisOutput, AnalyzeError> {
283    let start = Instant::now();
284
285    // Check file size before reading
286    if Path::new(path).metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
287        tracing::debug!("skipping large file: {}", path);
288        return Err(AnalyzeError::Parser(
289            crate::parser::ParserError::ParseError("file too large".to_string()),
290        ));
291    }
292
293    let source = std::fs::read_to_string(path)
294        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
295
296    let line_count = source.lines().count();
297
298    // Detect language from extension
299    let ext = Path::new(path)
300        .extension()
301        .and_then(|e| e.to_str())
302        .and_then(language_for_extension)
303        .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string);
304
305    // Extract semantic information
306    let mut semantic = SemanticExtractor::extract(&source, &ext, ast_recursion_limit)?;
307
308    // Populate the file path on references now that the path is known
309    for r in &mut semantic.references {
310        r.location = path.to_string();
311    }
312
313    // Resolve Python wildcard imports
314    if ext == "python" {
315        resolve_wildcard_imports(Path::new(path), &mut semantic.imports);
316    }
317
318    // Detect if this is a test file
319    let is_test = is_test_file(Path::new(path));
320
321    // Extract parent directory for relative path display
322    let parent_dir = Path::new(path).parent();
323
324    // Format output
325    let formatted = format_file_details(path, &semantic, line_count, is_test, parent_dir);
326
327    tracing::debug!(path = %path, language = %ext, functions = semantic.functions.len(), classes = semantic.classes.len(), imports = semantic.imports.len(), duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX), "file analysis complete");
328
329    Ok(FileAnalysisOutput::new(
330        formatted, semantic, line_count, None,
331    ))
332}
333
334/// Analyze source code from a string buffer without filesystem access.
335///
336/// This function analyzes in-memory source code by language identifier. The `language`
337/// parameter can be either a language name (e.g., `"rust"`, `"python"`, `"go"`) or a file
338/// extension (e.g., `"rs"`, `"py"`).
339///
340/// Accepted language identifiers depend on compiled features. Use [`supported_languages()`] to
341/// discover the available language names at runtime, and [`language_for_extension()`] to resolve
342/// a file extension to its supported language identifier.
343///
344/// # Arguments
345///
346/// * `source` - The source code to analyze
347/// * `language` - The language identifier (language name or extension)
348/// * `ast_recursion_limit` - Optional limit for AST traversal depth
349///
350/// # Returns
351///
352/// - `Ok(FileAnalysisOutput)` on success
353/// - `Err(AnalyzeError::UnsupportedLanguage)` if the language is not recognized
354/// - `Err(AnalyzeError::Parser)` if parsing fails
355///
356/// # Notes
357///
358/// - Python wildcard import resolution is skipped for in-memory analysis (no filesystem path available)
359/// - The formatted output uses the standard file-details formatter, so it includes a `FILE:` header with an empty path
360#[inline]
361pub fn analyze_str(
362    source: &str,
363    language: &str,
364    ast_recursion_limit: Option<usize>,
365) -> Result<FileAnalysisOutput, AnalyzeError> {
366    // Resolve language: first try as a file extension, then as a language name
367    // (case-insensitive match against supported_languages()).
368    let lang = language_for_extension(language).or_else(|| {
369        let lower = language.to_ascii_lowercase();
370        supported_languages()
371            .iter()
372            .find(|&&name| name == lower)
373            .copied()
374    });
375    let lang = lang.ok_or_else(|| AnalyzeError::UnsupportedLanguage(language.to_string()))?;
376
377    // Extract semantic information
378    let mut semantic = SemanticExtractor::extract(source, lang, ast_recursion_limit)?;
379
380    // Populate a stable in-memory sentinel on all reference locations
381    for r in &mut semantic.references {
382        r.location = "<memory>".to_string();
383    }
384
385    // Count lines in the source
386    let line_count = source.lines().count();
387
388    // Format output with empty path (no filesystem access)
389    let formatted = format_file_details("", &semantic, line_count, false, None);
390
391    Ok(FileAnalysisOutput::new(
392        formatted, semantic, line_count, None,
393    ))
394}
395
396/// Single entry in a call chain (depth-1 direct caller or callee).
397#[derive(Debug, Clone, Serialize, Deserialize)]
398#[cfg_attr(feature = "schemars", derive(JsonSchema))]
399pub struct CallChainEntry {
400    #[cfg_attr(
401        feature = "schemars",
402        schemars(description = "Symbol name of the caller or callee")
403    )]
404    pub symbol: String,
405    #[cfg_attr(
406        feature = "schemars",
407        schemars(description = "File path relative to the repository root")
408    )]
409    pub file: String,
410    #[cfg_attr(
411        feature = "schemars",
412        schemars(
413            description = "Line number of the definition or call site (1-indexed)",
414            schema_with = "crate::schema_helpers::integer_schema"
415        )
416    )]
417    pub line: usize,
418}
419
420/// Result of focused symbol analysis.
421#[derive(Debug, Serialize)]
422#[cfg_attr(feature = "schemars", derive(JsonSchema))]
423#[non_exhaustive]
424pub struct FocusedAnalysisOutput {
425    #[cfg_attr(
426        feature = "schemars",
427        schemars(description = "Formatted text representation of the call graph analysis")
428    )]
429    pub formatted: String,
430    #[serde(skip_serializing_if = "Option::is_none")]
431    #[cfg_attr(
432        feature = "schemars",
433        schemars(
434            description = "Opaque cursor token for the next page of results (absent when no more results)"
435        )
436    )]
437    pub next_cursor: Option<String>,
438    /// Production caller chains (partitioned from incoming chains, excluding test callers).
439    /// Not serialized; used for pagination in lib.rs.
440    #[serde(skip)]
441    #[cfg_attr(feature = "schemars", schemars(skip))]
442    pub prod_chains: Vec<InternalCallChain>,
443    /// Test caller chains. Not serialized; used for pagination summary in lib.rs.
444    #[serde(skip)]
445    #[cfg_attr(feature = "schemars", schemars(skip))]
446    pub test_chains: Vec<InternalCallChain>,
447    /// Outgoing (callee) chains. Not serialized; used for pagination in lib.rs.
448    #[serde(skip)]
449    #[cfg_attr(feature = "schemars", schemars(skip))]
450    pub outgoing_chains: Vec<InternalCallChain>,
451    /// Number of definitions for the symbol. Not serialized; used for pagination headers.
452    #[serde(skip)]
453    #[cfg_attr(feature = "schemars", schemars(skip))]
454    pub def_count: usize,
455    /// Total unique callers before `impl_only` filter. Not serialized; used for FILTER header.
456    #[serde(skip)]
457    #[cfg_attr(feature = "schemars", schemars(skip))]
458    pub unfiltered_caller_count: usize,
459    /// Unique callers after `impl_only` filter. Not serialized; used for FILTER header.
460    #[serde(skip)]
461    #[cfg_attr(feature = "schemars", schemars(skip))]
462    pub impl_trait_caller_count: usize,
463    /// Direct (depth-1) production callers. `follow_depth` does not affect this field.
464    #[serde(skip_serializing_if = "Option::is_none")]
465    pub callers: Option<Vec<CallChainEntry>>,
466    /// Direct (depth-1) test callers. `follow_depth` does not affect this field.
467    #[serde(skip_serializing_if = "Option::is_none")]
468    pub test_callers: Option<Vec<CallChainEntry>>,
469    /// Direct (depth-1) callees. `follow_depth` does not affect this field.
470    #[serde(skip_serializing_if = "Option::is_none")]
471    pub callees: Option<Vec<CallChainEntry>>,
472    /// Definition and use sites for the symbol.
473    #[serde(default)]
474    pub def_use_sites: Vec<crate::types::DefUseSite>,
475}
476
477/// Parameters for focused symbol analysis. Groups high-arity parameters to keep
478/// function signatures under clippy's default 7-argument threshold.
479#[derive(Clone)]
480pub struct FocusedAnalysisConfig {
481    pub focus: String,
482    pub match_mode: SymbolMatchMode,
483    pub follow_depth: u32,
484    pub max_depth: Option<u32>,
485    pub ast_recursion_limit: Option<usize>,
486    pub use_summary: bool,
487    pub impl_only: Option<bool>,
488    pub def_use: bool,
489}
490
491/// Internal parameters for focused analysis phases.
492#[derive(Clone)]
493struct InternalFocusedParams {
494    focus: String,
495    match_mode: SymbolMatchMode,
496    follow_depth: u32,
497    ast_recursion_limit: Option<usize>,
498    use_summary: bool,
499    impl_only: Option<bool>,
500    def_use: bool,
501}
502
503/// Type alias for analysis results: (`file_path`, `semantic_analysis`) pairs and impl-trait info.
504type FileAnalysisBatch = (Vec<(PathBuf, SemanticAnalysis)>, Vec<ImplTraitInfo>);
505
506/// Phase 1: Collect semantic analysis for all files in parallel.
507fn collect_file_analysis(
508    entries: &[WalkEntry],
509    progress: &Arc<AtomicUsize>,
510    ct: &CancellationToken,
511    ast_recursion_limit: Option<usize>,
512) -> Result<FileAnalysisBatch, AnalyzeError> {
513    // Check if already cancelled
514    if ct.is_cancelled() {
515        return Err(AnalyzeError::Cancelled);
516    }
517
518    // Use pre-walked entries (passed by caller)
519    // Collect semantic analysis for all files in parallel
520    let file_entries: Vec<&WalkEntry> = entries
521        .iter()
522        .filter(|e| !e.is_dir && !e.is_symlink)
523        .collect();
524
525    let analysis_results: Vec<(PathBuf, SemanticAnalysis)> = file_entries
526        .par_iter()
527        .filter_map(|entry| {
528            // Check cancellation per file
529            if ct.is_cancelled() {
530                return None;
531            }
532
533            let ext = entry.path.extension().and_then(|e| e.to_str());
534
535            // Check file size before reading
536            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
537                tracing::debug!("skipping large file: {}", entry.path.display());
538                progress.fetch_add(1, Ordering::Relaxed);
539                return None;
540            }
541
542            // Try to read file content
543            let Ok(source) = std::fs::read_to_string(&entry.path) else {
544                progress.fetch_add(1, Ordering::Relaxed);
545                return None;
546            };
547
548            // Detect language and extract semantic information
549            let language = if let Some(ext_str) = ext {
550                language_for_extension(ext_str)
551                    .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string)
552            } else {
553                "unknown".to_string()
554            };
555
556            if let Ok(mut semantic) =
557                SemanticExtractor::extract(&source, &language, ast_recursion_limit)
558            {
559                // Populate file path on references
560                for r in &mut semantic.references {
561                    r.location = entry.path.display().to_string();
562                }
563                // Populate file path on impl_traits (already extracted during SemanticExtractor::extract)
564                for trait_info in &mut semantic.impl_traits {
565                    trait_info.path.clone_from(&entry.path);
566                }
567                progress.fetch_add(1, Ordering::Relaxed);
568                Some((entry.path.clone(), semantic))
569            } else {
570                progress.fetch_add(1, Ordering::Relaxed);
571                None
572            }
573        })
574        .collect();
575
576    // Check if cancelled after parallel processing
577    if ct.is_cancelled() {
578        return Err(AnalyzeError::Cancelled);
579    }
580
581    // Collect all impl-trait info from analysis results
582    let all_impl_traits: Vec<ImplTraitInfo> = analysis_results
583        .iter()
584        .flat_map(|(_, sem)| sem.impl_traits.iter().cloned())
585        .collect();
586
587    Ok((analysis_results, all_impl_traits))
588}
589
590/// Phase 2: Build call graph from analysis results.
591fn build_call_graph(
592    analysis_results: Vec<(PathBuf, SemanticAnalysis)>,
593    all_impl_traits: &[ImplTraitInfo],
594) -> Result<CallGraph, AnalyzeError> {
595    // Build call graph. Always build without impl_only filter first so we can
596    // record the unfiltered caller count before discarding those edges.
597    CallGraph::build_from_results(
598        analysis_results,
599        all_impl_traits,
600        false, // filter applied below after counting
601    )
602    .map_err(std::convert::Into::into)
603}
604
605/// Phase 3: Resolve symbol and apply `impl_only` filter.
606/// Returns (`resolved_focus`, `unfiltered_caller_count`, `impl_trait_caller_count`).
607/// CRITICAL: Must capture `unfiltered_caller_count` BEFORE `retain()`, then apply `retain()`,
608/// then compute `impl_trait_caller_count`.
609fn resolve_symbol(
610    graph: &mut CallGraph,
611    params: &InternalFocusedParams,
612) -> Result<(String, usize, usize), AnalyzeError> {
613    // Resolve symbol name using the requested match mode.
614    let resolved_focus = if params.match_mode == SymbolMatchMode::Exact {
615        let exists = graph.definitions.contains_key(&params.focus)
616            || graph.callers.contains_key(&params.focus)
617            || graph.callees.contains_key(&params.focus);
618        if exists {
619            params.focus.clone()
620        } else {
621            return Err(crate::graph::GraphError::SymbolNotFound {
622                symbol: params.focus.clone(),
623                hint: "Try match_mode=insensitive for a case-insensitive search, or match_mode=prefix to list symbols starting with this name.".to_string(),
624            }
625            .into());
626        }
627    } else {
628        graph.resolve_symbol_indexed(&params.focus, &params.match_mode)?
629    };
630
631    // Count unique callers for the focus symbol before applying impl_only filter.
632    let unfiltered_caller_count = graph.callers.get(&resolved_focus).map_or(0, |edges| {
633        edges
634            .iter()
635            .map(|e| &e.neighbor_name)
636            .collect::<std::collections::HashSet<_>>()
637            .len()
638    });
639
640    // Apply impl_only filter now if requested, then count filtered callers.
641    // Filter all caller adjacency lists so traversal and formatting are consistently
642    // restricted to impl-trait edges regardless of follow_depth.
643    let impl_trait_caller_count = if params.impl_only.unwrap_or(false) {
644        for edges in graph.callers.values_mut() {
645            edges.retain(|e| e.is_impl_trait);
646        }
647        graph.callers.get(&resolved_focus).map_or(0, |edges| {
648            edges
649                .iter()
650                .map(|e| &e.neighbor_name)
651                .collect::<std::collections::HashSet<_>>()
652                .len()
653        })
654    } else {
655        unfiltered_caller_count
656    };
657
658    Ok((
659        resolved_focus,
660        unfiltered_caller_count,
661        impl_trait_caller_count,
662    ))
663}
664
665/// Type alias for `compute_chains` return type: (`formatted_output`, `prod_chains`, `test_chains`, `outgoing_chains`, `def_count`).
666type ChainComputeResult = (
667    String,
668    Vec<InternalCallChain>,
669    Vec<InternalCallChain>,
670    Vec<InternalCallChain>,
671    usize,
672);
673
674/// Helper function to convert InternalCallChain data to CallChainEntry vec.
675/// Takes the first (depth-1) element of each chain and converts it to a CallChainEntry.
676/// Returns None if chains is empty, otherwise returns a vec of up to 10 entries.
677fn chains_to_entries(
678    chains: &[InternalCallChain],
679    root: Option<&std::path::Path>,
680) -> Option<Vec<CallChainEntry>> {
681    if chains.is_empty() {
682        return None;
683    }
684    let entries: Vec<CallChainEntry> = chains
685        .iter()
686        .take(10)
687        .filter_map(|chain| {
688            let (symbol, path, line) = chain.chain.first()?;
689            let file = match root {
690                Some(root) => path
691                    .strip_prefix(root)
692                    .unwrap_or(path.as_path())
693                    .to_string_lossy()
694                    .into_owned(),
695                None => path.to_string_lossy().into_owned(),
696            };
697            Some(CallChainEntry {
698                symbol: symbol.clone(),
699                file,
700                line: *line,
701            })
702        })
703        .collect();
704    if entries.is_empty() {
705        None
706    } else {
707        Some(entries)
708    }
709}
710
711/// Phase 4: Compute chains and format output.
712fn compute_chains(
713    graph: &CallGraph,
714    resolved_focus: &str,
715    root: &Path,
716    params: &InternalFocusedParams,
717    unfiltered_caller_count: usize,
718    impl_trait_caller_count: usize,
719    def_use_sites: &[crate::types::DefUseSite],
720) -> Result<ChainComputeResult, AnalyzeError> {
721    // Compute chain data for pagination (always, regardless of summary mode)
722    let def_count = graph.definitions.get(resolved_focus).map_or(0, Vec::len);
723    let incoming_chains = graph.find_incoming_chains(resolved_focus, params.follow_depth)?;
724    let outgoing_chains = graph.find_outgoing_chains(resolved_focus, params.follow_depth)?;
725
726    let (prod_chains, test_chains): (Vec<_>, Vec<_>) =
727        incoming_chains.iter().cloned().partition(|chain| {
728            chain
729                .chain
730                .first()
731                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
732        });
733
734    // Format output with pre-computed chains
735    let mut formatted = if params.use_summary {
736        format_focused_summary_internal(
737            graph,
738            resolved_focus,
739            params.follow_depth,
740            Some(root),
741            Some(&incoming_chains),
742            Some(&outgoing_chains),
743            def_use_sites,
744        )?
745    } else {
746        format_focused_internal(
747            graph,
748            resolved_focus,
749            params.follow_depth,
750            Some(root),
751            Some(&incoming_chains),
752            Some(&outgoing_chains),
753            def_use_sites,
754        )?
755    };
756
757    // Add FILTER header if impl_only filter was applied
758    if params.impl_only.unwrap_or(false) {
759        let filter_header = format!(
760            "FILTER: impl_only=true ({impl_trait_caller_count} of {unfiltered_caller_count} callers shown)\n",
761        );
762        formatted = format!("{filter_header}{formatted}");
763    }
764
765    Ok((
766        formatted,
767        prod_chains,
768        test_chains,
769        outgoing_chains,
770        def_count,
771    ))
772}
773
774/// Analyze a symbol's call graph across a directory with progress tracking.
775// public API; callers expect owned semantics
776#[allow(clippy::needless_pass_by_value)]
777pub fn analyze_focused_with_progress(
778    root: &Path,
779    params: &FocusedAnalysisConfig,
780    progress: Arc<AtomicUsize>,
781    ct: CancellationToken,
782) -> Result<FocusedAnalysisOutput, AnalyzeError> {
783    let entries = walk_directory(root, params.max_depth)?;
784    let internal_params = InternalFocusedParams {
785        focus: params.focus.clone(),
786        match_mode: params.match_mode.clone(),
787        follow_depth: params.follow_depth,
788        ast_recursion_limit: params.ast_recursion_limit,
789        use_summary: params.use_summary,
790        impl_only: params.impl_only,
791        def_use: params.def_use,
792    };
793    analyze_focused_with_progress_with_entries_internal(
794        root,
795        params.max_depth,
796        &progress,
797        &ct,
798        &internal_params,
799        &entries,
800    )
801}
802
803/// Internal implementation of focused analysis using pre-walked entries and params struct.
804#[instrument(skip_all, fields(path = %root.display(), symbol = %params.focus))]
805fn analyze_focused_with_progress_with_entries_internal(
806    root: &Path,
807    _max_depth: Option<u32>,
808    progress: &Arc<AtomicUsize>,
809    ct: &CancellationToken,
810    params: &InternalFocusedParams,
811    entries: &[WalkEntry],
812) -> Result<FocusedAnalysisOutput, AnalyzeError> {
813    // Check if already cancelled
814    if ct.is_cancelled() {
815        return Err(AnalyzeError::Cancelled);
816    }
817
818    // Check if path is a file (hint to use directory)
819    if root.is_file() {
820        let formatted =
821            "Single-file focus not supported. Please provide a directory path for cross-file call graph analysis.\n"
822                .to_string();
823        return Ok(FocusedAnalysisOutput {
824            formatted,
825            next_cursor: None,
826            prod_chains: vec![],
827            test_chains: vec![],
828            outgoing_chains: vec![],
829            def_count: 0,
830            unfiltered_caller_count: 0,
831            impl_trait_caller_count: 0,
832            callers: None,
833            test_callers: None,
834            callees: None,
835            def_use_sites: vec![],
836        });
837    }
838
839    // Phase 1: Collect file analysis
840    let (analysis_results, all_impl_traits) =
841        collect_file_analysis(entries, progress, ct, params.ast_recursion_limit)?;
842
843    // Check for cancellation before building the call graph (phase 2)
844    if ct.is_cancelled() {
845        return Err(AnalyzeError::Cancelled);
846    }
847
848    // Phase 2: Build call graph
849    let mut graph = build_call_graph(analysis_results, &all_impl_traits)?;
850
851    // Check for cancellation before resolving the symbol (phase 3)
852    if ct.is_cancelled() {
853        return Err(AnalyzeError::Cancelled);
854    }
855
856    // Phase 3: Resolve symbol and apply impl_only filter.
857    // When def_use=true and the symbol is not in the call graph (e.g. a variable),
858    // fall through to def-use extraction instead of returning SymbolNotFound.
859    let resolve_result = resolve_symbol(&mut graph, params);
860    if let Err(AnalyzeError::Graph(crate::graph::GraphError::SymbolNotFound { .. })) =
861        &resolve_result
862    {
863        // Deliberately not collapsed: resolve_result must stay alive past this block
864        // so that the `?` below can propagate non-SymbolNotFound errors.
865        if params.def_use {
866            let def_use_sites =
867                collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct);
868            if def_use_sites.is_empty() {
869                // Symbol not found anywhere (neither in call graph nor as def/use site).
870                // Propagate the original SymbolNotFound error instead of returning an
871                // empty success response.
872                return Err(resolve_result.unwrap_err());
873            }
874            use std::fmt::Write as _;
875            let mut formatted = String::new();
876            let _ = writeln!(
877                formatted,
878                "FOCUS: {} (0 defs, 0 callers, 0 callees)",
879                params.focus
880            );
881            {
882                let writes = def_use_sites
883                    .iter()
884                    .filter(|s| {
885                        matches!(
886                            s.kind,
887                            crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
888                        )
889                    })
890                    .count();
891                let reads = def_use_sites
892                    .iter()
893                    .filter(|s| s.kind == crate::types::DefUseKind::Read)
894                    .count();
895                let _ = writeln!(
896                    formatted,
897                    "DEF-USE SITES  {}  ({} total: {} writes, {} reads)",
898                    params.focus,
899                    def_use_sites.len(),
900                    writes,
901                    reads
902                );
903            }
904            return Ok(FocusedAnalysisOutput {
905                formatted,
906                next_cursor: None,
907                callers: None,
908                test_callers: None,
909                callees: None,
910                prod_chains: vec![],
911                test_chains: vec![],
912                outgoing_chains: vec![],
913                def_count: 0,
914                unfiltered_caller_count: 0,
915                impl_trait_caller_count: 0,
916                def_use_sites,
917            });
918        }
919    }
920    let (resolved_focus, unfiltered_caller_count, impl_trait_caller_count) = resolve_result?;
921
922    // Check for cancellation before computing chains (phase 4)
923    if ct.is_cancelled() {
924        return Err(AnalyzeError::Cancelled);
925    }
926
927    // Phase 5 (optional, before formatting): Def-use site extraction.
928    // Use params.focus (the raw user-supplied string) rather than resolved_focus
929    // so that variable/field names that are not in the call graph still work.
930    let def_use_sites = if params.def_use {
931        collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct)
932    } else {
933        Vec::new()
934    };
935
936    // Phase 4: Compute chains and format output (includes def_use_sites in one pass)
937    let (formatted, prod_chains, test_chains, outgoing_chains, def_count) = compute_chains(
938        &graph,
939        &resolved_focus,
940        root,
941        params,
942        unfiltered_caller_count,
943        impl_trait_caller_count,
944        &def_use_sites,
945    )?;
946
947    // Compute depth-1 chains for structured output fields (always direct relationships only,
948    // regardless of `follow_depth` used for the text-formatted output).
949    let (depth1_callers, depth1_test_callers, depth1_callees) = if params.follow_depth <= 1 {
950        // Chains already at depth 1; reuse the partitioned vecs.
951        let callers = chains_to_entries(&prod_chains, Some(root));
952        let test_callers = chains_to_entries(&test_chains, Some(root));
953        let callees = chains_to_entries(&outgoing_chains, Some(root));
954        (callers, test_callers, callees)
955    } else {
956        // follow_depth > 1: re-query at depth 1 to get only direct edges.
957        let incoming1 = graph
958            .find_incoming_chains(&resolved_focus, 1)
959            .unwrap_or_default();
960        let outgoing1 = graph
961            .find_outgoing_chains(&resolved_focus, 1)
962            .unwrap_or_default();
963        let (prod1, test1): (Vec<_>, Vec<_>) = incoming1.into_iter().partition(|chain| {
964            chain
965                .chain
966                .first()
967                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
968        });
969        let callers = chains_to_entries(&prod1, Some(root));
970        let test_callers = chains_to_entries(&test1, Some(root));
971        let callees = chains_to_entries(&outgoing1, Some(root));
972        (callers, test_callers, callees)
973    };
974
975    Ok(FocusedAnalysisOutput {
976        formatted,
977        next_cursor: None,
978        callers: depth1_callers,
979        test_callers: depth1_test_callers,
980        callees: depth1_callees,
981        prod_chains,
982        test_chains,
983        outgoing_chains,
984        def_count,
985        unfiltered_caller_count,
986        impl_trait_caller_count,
987        def_use_sites,
988    })
989}
990
991/// Phase 5: Extract def-use sites for `symbol` across all entries.
992/// Writes go before reads; within each kind ordered by file, line, then column.
993fn collect_def_use_sites(
994    entries: &[WalkEntry],
995    symbol: &str,
996    ast_recursion_limit: Option<usize>,
997    root: &std::path::Path,
998    ct: &CancellationToken,
999) -> Vec<crate::types::DefUseSite> {
1000    use crate::parser::SemanticExtractor;
1001
1002    let file_entries: Vec<&WalkEntry> = entries
1003        .iter()
1004        .filter(|e| !e.is_dir && !e.is_symlink)
1005        .collect();
1006
1007    let mut sites: Vec<crate::types::DefUseSite> = file_entries
1008        .par_iter()
1009        .filter_map(|entry| {
1010            if ct.is_cancelled() {
1011                return None;
1012            }
1013
1014            // Check file size before reading
1015            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1016                tracing::debug!("skipping large file: {}", entry.path.display());
1017                return None;
1018            }
1019
1020            let Ok(source) = std::fs::read_to_string(&entry.path) else {
1021                return None;
1022            };
1023            let ext = entry
1024                .path
1025                .extension()
1026                .and_then(|e| e.to_str())
1027                .unwrap_or("");
1028            let lang = crate::lang::language_for_extension(ext)?;
1029            let file_path = entry
1030                .path
1031                .strip_prefix(root)
1032                .unwrap_or(&entry.path)
1033                .display()
1034                .to_string();
1035            let sites = SemanticExtractor::extract_def_use_for_file(
1036                &source,
1037                lang,
1038                symbol,
1039                &file_path,
1040                ast_recursion_limit,
1041            );
1042            if sites.is_empty() { None } else { Some(sites) }
1043        })
1044        .flatten()
1045        .collect();
1046
1047    // Writes before reads; within each kind: file, line, then column for deterministic order
1048    sites.sort_by(|a, b| {
1049        use crate::types::DefUseKind;
1050        let kind_ord = |k: &DefUseKind| match k {
1051            DefUseKind::Write | DefUseKind::WriteRead => 0,
1052            DefUseKind::Read => 1,
1053        };
1054        kind_ord(&a.kind)
1055            .cmp(&kind_ord(&b.kind))
1056            .then_with(|| a.file.cmp(&b.file))
1057            .then_with(|| a.line.cmp(&b.line))
1058            .then_with(|| a.column.cmp(&b.column))
1059    });
1060
1061    sites
1062}
1063
1064/// Analyze a symbol's call graph using pre-walked directory entries.
1065pub fn analyze_focused_with_progress_with_entries(
1066    root: &Path,
1067    params: &FocusedAnalysisConfig,
1068    progress: &Arc<AtomicUsize>,
1069    ct: &CancellationToken,
1070    entries: &[WalkEntry],
1071) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1072    let internal_params = InternalFocusedParams {
1073        focus: params.focus.clone(),
1074        match_mode: params.match_mode.clone(),
1075        follow_depth: params.follow_depth,
1076        ast_recursion_limit: params.ast_recursion_limit,
1077        use_summary: params.use_summary,
1078        impl_only: params.impl_only,
1079        def_use: params.def_use,
1080    };
1081    analyze_focused_with_progress_with_entries_internal(
1082        root,
1083        params.max_depth,
1084        progress,
1085        ct,
1086        &internal_params,
1087        entries,
1088    )
1089}
1090
1091#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
1092pub fn analyze_focused(
1093    root: &Path,
1094    focus: &str,
1095    follow_depth: u32,
1096    max_depth: Option<u32>,
1097    ast_recursion_limit: Option<usize>,
1098) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1099    let entries = walk_directory(root, max_depth)?;
1100    let counter = Arc::new(AtomicUsize::new(0));
1101    let ct = CancellationToken::new();
1102    let params = FocusedAnalysisConfig {
1103        focus: focus.to_string(),
1104        match_mode: SymbolMatchMode::Exact,
1105        follow_depth,
1106        max_depth,
1107        ast_recursion_limit,
1108        use_summary: false,
1109        impl_only: None,
1110        def_use: false,
1111    };
1112    analyze_focused_with_progress_with_entries(root, &params, &counter, &ct, &entries)
1113}
1114
1115/// Analyze a single file and return a minimal fixed schema (name, line count, language,
1116/// functions, imports) for lightweight code understanding.
1117#[instrument(skip_all, fields(path))]
1118pub fn analyze_module_file(path: &str) -> Result<crate::types::ModuleInfo, AnalyzeError> {
1119    // Check file size before reading
1120    if Path::new(path).metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1121        tracing::debug!("skipping large file: {}", path);
1122        return Err(AnalyzeError::Parser(
1123            crate::parser::ParserError::ParseError("file too large".to_string()),
1124        ));
1125    }
1126
1127    let source = std::fs::read_to_string(path)
1128        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
1129
1130    let file_path = Path::new(path);
1131    let name = file_path
1132        .file_name()
1133        .and_then(|s| s.to_str())
1134        .unwrap_or("unknown")
1135        .to_string();
1136
1137    let line_count = source.lines().count();
1138
1139    let language = file_path
1140        .extension()
1141        .and_then(|e| e.to_str())
1142        .and_then(language_for_extension)
1143        .ok_or_else(|| {
1144            AnalyzeError::Parser(crate::parser::ParserError::ParseError(
1145                "unsupported or missing file extension".to_string(),
1146            ))
1147        })?;
1148
1149    let semantic = SemanticExtractor::extract(&source, language, None)?;
1150
1151    let functions = semantic
1152        .functions
1153        .into_iter()
1154        .map(|f| crate::types::ModuleFunctionInfo {
1155            name: f.name,
1156            line: f.line,
1157        })
1158        .collect();
1159
1160    let imports = semantic
1161        .imports
1162        .into_iter()
1163        .map(|i| crate::types::ModuleImportInfo {
1164            module: i.module,
1165            items: i.items,
1166        })
1167        .collect();
1168
1169    Ok(crate::types::ModuleInfo {
1170        name,
1171        line_count,
1172        language: language.to_string(),
1173        functions,
1174        imports,
1175    })
1176}
1177
1178/// Scan a directory for files that import a given module path.
1179///
1180/// For each non-directory walk entry, extracts imports via [`SemanticExtractor`] and
1181/// checks whether `module` matches `ImportInfo.module` or appears in `ImportInfo.items`.
1182/// Returns a [`FocusedAnalysisOutput`] whose `formatted` field lists matching files.
1183pub fn analyze_import_lookup(
1184    root: &Path,
1185    module: &str,
1186    entries: &[WalkEntry],
1187    ast_recursion_limit: Option<usize>,
1188) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1189    let matches: Vec<(PathBuf, usize)> = entries
1190        .par_iter()
1191        .filter_map(|entry| {
1192            if entry.is_dir || entry.is_symlink {
1193                tracing::debug!("skipping symlink: {}", entry.path.display());
1194                return None;
1195            }
1196            let ext = entry
1197                .path
1198                .extension()
1199                .and_then(|e| e.to_str())
1200                .and_then(crate::lang::language_for_extension)?;
1201            let source = std::fs::read_to_string(&entry.path).ok()?;
1202            let semantic = SemanticExtractor::extract(&source, ext, ast_recursion_limit).ok()?;
1203            for import in &semantic.imports {
1204                if import.module == module || import.items.iter().any(|item| item == module) {
1205                    return Some((entry.path.clone(), import.line));
1206                }
1207            }
1208            None
1209        })
1210        .collect();
1211
1212    let mut text = format!("IMPORT_LOOKUP: {module}\n");
1213    text.push_str(&format!("ROOT: {}\n", root.display()));
1214    text.push_str(&format!("MATCHES: {}\n", matches.len()));
1215    for (path, line) in &matches {
1216        let rel = path.strip_prefix(root).unwrap_or(path);
1217        text.push_str(&format!("  {}:{line}\n", rel.display()));
1218    }
1219
1220    Ok(FocusedAnalysisOutput {
1221        formatted: text,
1222        next_cursor: None,
1223        prod_chains: vec![],
1224        test_chains: vec![],
1225        outgoing_chains: vec![],
1226        def_count: 0,
1227        unfiltered_caller_count: 0,
1228        impl_trait_caller_count: 0,
1229        callers: None,
1230        test_callers: None,
1231        callees: None,
1232        def_use_sites: vec![],
1233    })
1234}
1235
1236/// Resolve Python wildcard imports to actual symbol names.
1237///
1238/// For each import with items=`["*"]`, this function:
1239/// 1. Parses the relative dots (if any) and climbs the directory tree
1240/// 2. Finds the target .py file or __init__.py
1241/// 3. Extracts symbols (functions and classes) from the target
1242/// 4. Honors __all__ if defined, otherwise uses function+class names
1243///
1244/// All resolution failures are non-fatal: debug-logged and the wildcard is preserved.
1245fn resolve_wildcard_imports(file_path: &Path, imports: &mut [ImportInfo]) {
1246    use std::collections::HashMap;
1247
1248    let mut resolved_cache: HashMap<PathBuf, Vec<String>> = HashMap::new();
1249    let Ok(file_path_canonical) = file_path.canonicalize() else {
1250        tracing::debug!(file = ?file_path, "unable to canonicalize current file path");
1251        return;
1252    };
1253
1254    for import in imports.iter_mut() {
1255        if import.items != ["*"] {
1256            continue;
1257        }
1258        resolve_single_wildcard(import, file_path, &file_path_canonical, &mut resolved_cache);
1259    }
1260}
1261
1262/// Resolve one wildcard import in place. On any failure the import is left unchanged.
1263fn resolve_single_wildcard(
1264    import: &mut ImportInfo,
1265    file_path: &Path,
1266    file_path_canonical: &Path,
1267    resolved_cache: &mut std::collections::HashMap<PathBuf, Vec<String>>,
1268) {
1269    let module = import.module.clone();
1270    let dot_count = module.chars().take_while(|c| *c == '.').count();
1271    if dot_count == 0 {
1272        return;
1273    }
1274    let module_path = module.trim_start_matches('.');
1275
1276    let Some(target_to_read) = locate_target_file(file_path, dot_count, module_path, &module)
1277    else {
1278        return;
1279    };
1280
1281    let Ok(canonical) = target_to_read.canonicalize() else {
1282        tracing::debug!(target = ?target_to_read, import = %module, "unable to canonicalize path");
1283        return;
1284    };
1285
1286    if canonical == file_path_canonical {
1287        tracing::debug!(target = ?canonical, import = %module, "cannot import from self");
1288        return;
1289    }
1290
1291    if let Some(cached) = resolved_cache.get(&canonical) {
1292        tracing::debug!(import = %module, symbols_count = cached.len(), "using cached symbols");
1293        import.items.clone_from(cached);
1294        return;
1295    }
1296
1297    if let Some(symbols) = parse_target_symbols(&target_to_read, &module) {
1298        tracing::debug!(import = %module, resolved_count = symbols.len(), "wildcard import resolved");
1299        import.items.clone_from(&symbols);
1300        resolved_cache.insert(canonical, symbols);
1301    }
1302}
1303
1304/// Locate the .py file that a wildcard import refers to. Returns None if not found.
1305fn locate_target_file(
1306    file_path: &Path,
1307    dot_count: usize,
1308    module_path: &str,
1309    module: &str,
1310) -> Option<PathBuf> {
1311    let mut target_dir = file_path.parent()?.to_path_buf();
1312
1313    for _ in 1..dot_count {
1314        if !target_dir.pop() {
1315            tracing::debug!(import = %module, "unable to climb {} levels", dot_count.saturating_sub(1));
1316            return None;
1317        }
1318    }
1319
1320    let target_file = if module_path.is_empty() {
1321        target_dir.join("__init__.py")
1322    } else {
1323        let rel_path = module_path.replace('.', "/");
1324        target_dir.join(format!("{rel_path}.py"))
1325    };
1326
1327    if target_file.exists() {
1328        Some(target_file)
1329    } else if target_file.with_extension("").is_dir() {
1330        let init = target_file.with_extension("").join("__init__.py");
1331        if init.exists() { Some(init) } else { None }
1332    } else {
1333        tracing::debug!(target = ?target_file, import = %module, "target file not found");
1334        None
1335    }
1336}
1337
1338/// Read and parse a target .py file, returning its exported symbols.
1339fn parse_target_symbols(target_path: &Path, module: &str) -> Option<Vec<String>> {
1340    use tree_sitter::Parser;
1341
1342    // Check file size before reading
1343    if target_path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1344        tracing::debug!("skipping large file: {}", target_path.display());
1345        return None;
1346    }
1347
1348    let source = match std::fs::read_to_string(target_path) {
1349        Ok(s) => s,
1350        Err(e) => {
1351            tracing::debug!(target = ?target_path, import = %module, error = %e, "unable to read target file");
1352            return None;
1353        }
1354    };
1355
1356    // Parse once with tree-sitter
1357    let lang_info = crate::languages::get_language_info("python")?;
1358    let mut parser = Parser::new();
1359    if parser.set_language(&lang_info.language).is_err() {
1360        return None;
1361    }
1362    let tree = parser.parse(&source, None)?;
1363
1364    // First, try to extract __all__ from the same tree
1365    let mut symbols = Vec::new();
1366    extract_all_from_tree(&tree, &source, &mut symbols);
1367    if !symbols.is_empty() {
1368        tracing::debug!(import = %module, symbols = ?symbols, "using __all__ symbols");
1369        return Some(symbols);
1370    }
1371
1372    // Fallback: extract functions/classes from the tree
1373    let root = tree.root_node();
1374    let mut cursor = root.walk();
1375    for child in root.children(&mut cursor) {
1376        if matches!(child.kind(), "function_definition" | "class_definition")
1377            && let Some(name_node) = child.child_by_field_name("name")
1378        {
1379            let name = source[name_node.start_byte()..name_node.end_byte()].to_string();
1380            if !name.starts_with('_') {
1381                symbols.push(name);
1382            }
1383        }
1384    }
1385    tracing::debug!(import = %module, fallback_symbols = ?symbols, "using fallback function/class names");
1386    Some(symbols)
1387}
1388
1389/// Extract __all__ from a tree-sitter tree.
1390fn extract_all_from_tree(tree: &tree_sitter::Tree, source: &str, result: &mut Vec<String>) {
1391    let root = tree.root_node();
1392    let mut cursor = root.walk();
1393    for child in root.children(&mut cursor) {
1394        if child.kind() == "simple_statement" {
1395            // simple_statement contains assignment and other statement types
1396            let mut simple_cursor = child.walk();
1397            for simple_child in child.children(&mut simple_cursor) {
1398                if simple_child.kind() == "assignment"
1399                    && let Some(left) = simple_child.child_by_field_name("left")
1400                {
1401                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1402                    if target_text == "__all__"
1403                        && let Some(right) = simple_child.child_by_field_name("right")
1404                    {
1405                        extract_string_list_from_list_node(&right, source, result);
1406                    }
1407                }
1408            }
1409        } else if child.kind() == "expression_statement" {
1410            // Fallback for older Python AST structures
1411            let mut stmt_cursor = child.walk();
1412            for stmt_child in child.children(&mut stmt_cursor) {
1413                if stmt_child.kind() == "assignment"
1414                    && let Some(left) = stmt_child.child_by_field_name("left")
1415                {
1416                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1417                    if target_text == "__all__"
1418                        && let Some(right) = stmt_child.child_by_field_name("right")
1419                    {
1420                        extract_string_list_from_list_node(&right, source, result);
1421                    }
1422                }
1423            }
1424        }
1425    }
1426}
1427
1428/// Extract string literals from a Python list node.
1429fn extract_string_list_from_list_node(
1430    list_node: &tree_sitter::Node,
1431    source: &str,
1432    result: &mut Vec<String>,
1433) {
1434    let mut cursor = list_node.walk();
1435    for child in list_node.named_children(&mut cursor) {
1436        if child.kind() == "string" {
1437            let raw = source[child.start_byte()..child.end_byte()].trim();
1438            // Strip quotes: "name" -> name
1439            let unquoted = raw.trim_matches('"').trim_matches('\'').to_string();
1440            if !unquoted.is_empty() {
1441                result.push(unquoted);
1442            }
1443        }
1444    }
1445}
1446
1447/// Read a file and return its raw content with line numbers for a specified range.
1448///
1449/// # Arguments
1450/// - `path`: File path to read
1451/// - `start_line`: Starting line (1-indexed, optional; defaults to 1)
1452/// - `end_line`: Ending line (1-indexed, optional; defaults to total lines)
1453///
1454/// # Returns
1455/// - `Ok(AnalyzeRawOutput)` with formatted content and metadata
1456/// - `Err(AnalyzeError::NotAFile)` if path is a directory
1457/// - `Err(AnalyzeError::InvalidRange)` if start > end
1458/// - `Err(AnalyzeError::Io)` for file I/O errors
1459pub fn analyze_raw_range(
1460    path: &Path,
1461    start_line: Option<usize>,
1462    end_line: Option<usize>,
1463) -> Result<crate::types::AnalyzeRawOutput, AnalyzeError> {
1464    if path.is_dir() {
1465        return Err(AnalyzeError::NotAFile(path.to_path_buf()));
1466    }
1467
1468    // Check file size before reading
1469    if path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1470        tracing::debug!("skipping large file: {}", path.display());
1471        return Err(AnalyzeError::Parser(
1472            crate::parser::ParserError::ParseError("file too large".to_string()),
1473        ));
1474    }
1475
1476    let raw = std::fs::read_to_string(path)?;
1477    let lines: Vec<&str> = raw.lines().collect();
1478    let total = lines.len();
1479    if total == 0 {
1480        return Ok(crate::types::AnalyzeRawOutput {
1481            path: path.display().to_string(),
1482            total_lines: 0,
1483            start_line: 0,
1484            end_line: 0,
1485            content: String::new(),
1486        });
1487    }
1488    let start = start_line.unwrap_or(1).max(1).min(total.max(1));
1489    let end = end_line.unwrap_or(total).min(total).max(1);
1490    if start > end {
1491        return Err(AnalyzeError::InvalidRange { start, end, total });
1492    }
1493    let width = end.to_string().len();
1494    let content = lines[start - 1..end]
1495        .iter()
1496        .enumerate()
1497        .map(|(i, line)| format!("{:>width$}: {}", start + i, line, width = width))
1498        .collect::<Vec<_>>()
1499        .join("\n");
1500    Ok(crate::types::AnalyzeRawOutput {
1501        path: path.display().to_string(),
1502        total_lines: total,
1503        start_line: start,
1504        end_line: end,
1505        content,
1506    })
1507}
1508
1509#[cfg(test)]
1510mod tests {
1511    use super::*;
1512    use crate::formatter::format_focused_paginated;
1513    use crate::graph::InternalCallChain;
1514    use crate::pagination::{PaginationMode, decode_cursor, paginate_slice};
1515    use std::fs;
1516    use std::path::PathBuf;
1517    use tempfile::TempDir;
1518
1519    #[cfg(feature = "lang-rust")]
1520    #[test]
1521    fn analyze_str_rust_happy_path() {
1522        let source = "fn hello() -> i32 { 42 }";
1523        let result = analyze_str(source, "rs", None);
1524        assert!(result.is_ok());
1525    }
1526
1527    #[cfg(feature = "lang-python")]
1528    #[test]
1529    fn analyze_str_python_happy_path() {
1530        let source = "def greet(name):\n    return f'Hello {name}'";
1531        let result = analyze_str(source, "py", None);
1532        assert!(result.is_ok());
1533    }
1534
1535    #[cfg(feature = "lang-rust")]
1536    #[test]
1537    fn analyze_str_rust_by_language_name() {
1538        let source = "fn hello() -> i32 { 42 }";
1539        let result = analyze_str(source, "rust", None);
1540        assert!(result.is_ok());
1541    }
1542
1543    #[cfg(feature = "lang-python")]
1544    #[test]
1545    fn analyze_str_python_by_language_name() {
1546        let source = "def greet(name):\n    return f'Hello {name}'";
1547        let result = analyze_str(source, "python", None);
1548        assert!(result.is_ok());
1549    }
1550
1551    #[cfg(feature = "lang-rust")]
1552    #[test]
1553    fn analyze_str_rust_mixed_case() {
1554        let source = "fn hello() -> i32 { 42 }";
1555        let result = analyze_str(source, "RuSt", None);
1556        assert!(result.is_ok());
1557    }
1558
1559    #[cfg(feature = "lang-python")]
1560    #[test]
1561    fn analyze_str_python_mixed_case() {
1562        let source = "def greet(name):\n    return f'Hello {name}'";
1563        let result = analyze_str(source, "PyThOn", None);
1564        assert!(result.is_ok());
1565    }
1566
1567    #[test]
1568    fn analyze_str_unsupported_language() {
1569        let result = analyze_str("code", "brainfuck", None);
1570        assert!(
1571            matches!(result, Err(AnalyzeError::UnsupportedLanguage(lang)) if lang == "brainfuck")
1572        );
1573    }
1574
1575    #[cfg(feature = "lang-rust")]
1576    #[test]
1577    fn test_symbol_focus_callers_pagination_first_page() {
1578        let temp_dir = TempDir::new().unwrap();
1579
1580        // Create a file with many callers of `target`
1581        let mut code = String::from("fn target() {}\n");
1582        for i in 0..15 {
1583            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1584        }
1585        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1586
1587        // Act
1588        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1589
1590        // Paginate prod callers with page_size=5
1591        let paginated = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1592            .expect("paginate failed");
1593        assert!(
1594            paginated.total >= 5,
1595            "should have enough callers to paginate"
1596        );
1597        assert!(
1598            paginated.next_cursor.is_some(),
1599            "should have next_cursor for page 1"
1600        );
1601
1602        // Verify cursor encodes callers mode
1603        assert_eq!(paginated.items.len(), 5);
1604    }
1605
1606    #[test]
1607    fn test_symbol_focus_callers_pagination_second_page() {
1608        let temp_dir = TempDir::new().unwrap();
1609
1610        let mut code = String::from("fn target() {}\n");
1611        for i in 0..12 {
1612            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1613        }
1614        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1615
1616        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1617        let total_prod = output.prod_chains.len();
1618
1619        if total_prod > 5 {
1620            // Get page 1 cursor
1621            let p1 = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1622                .expect("paginate failed");
1623            assert!(p1.next_cursor.is_some());
1624
1625            let cursor_str = p1.next_cursor.unwrap();
1626            let cursor_data = decode_cursor(&cursor_str).expect("decode failed");
1627
1628            // Get page 2
1629            let p2 = paginate_slice(
1630                &output.prod_chains,
1631                cursor_data.offset,
1632                5,
1633                PaginationMode::Callers,
1634            )
1635            .expect("paginate failed");
1636
1637            // Format paginated output
1638            let formatted = format_focused_paginated(
1639                &p2.items,
1640                total_prod,
1641                PaginationMode::Callers,
1642                "target",
1643                &output.prod_chains,
1644                &output.test_chains,
1645                &output.outgoing_chains,
1646                output.def_count,
1647                cursor_data.offset,
1648                Some(temp_dir.path()),
1649                true,
1650            );
1651
1652            // Assert: header shows correct range for page 2
1653            let expected_start = cursor_data.offset + 1;
1654            assert!(
1655                formatted.contains(&format!("CALLERS ({}", expected_start)),
1656                "header should show page 2 range, got: {}",
1657                formatted
1658            );
1659        }
1660    }
1661
1662    #[test]
1663    fn test_chains_to_entries_empty_returns_none() {
1664        // Arrange
1665        let chains: Vec<InternalCallChain> = vec![];
1666
1667        // Act
1668        let result = chains_to_entries(&chains, None);
1669
1670        // Assert
1671        assert!(result.is_none());
1672    }
1673
1674    #[test]
1675    fn test_chains_to_entries_with_data_returns_entries() {
1676        // Arrange
1677        let chains = vec![
1678            InternalCallChain {
1679                chain: vec![("caller1".to_string(), PathBuf::from("/root/lib.rs"), 10)],
1680            },
1681            InternalCallChain {
1682                chain: vec![("caller2".to_string(), PathBuf::from("/root/other.rs"), 20)],
1683            },
1684        ];
1685        let root = PathBuf::from("/root");
1686
1687        // Act
1688        let result = chains_to_entries(&chains, Some(root.as_path()));
1689
1690        // Assert
1691        assert!(result.is_some());
1692        let entries = result.unwrap();
1693        assert_eq!(entries.len(), 2);
1694        assert_eq!(entries[0].symbol, "caller1");
1695        assert_eq!(entries[0].file, "lib.rs");
1696        assert_eq!(entries[0].line, 10);
1697        assert_eq!(entries[1].symbol, "caller2");
1698        assert_eq!(entries[1].file, "other.rs");
1699        assert_eq!(entries[1].line, 20);
1700    }
1701
1702    #[test]
1703    fn test_symbol_focus_callees_pagination() {
1704        let temp_dir = TempDir::new().unwrap();
1705
1706        // target calls many functions
1707        let mut code = String::from("fn target() {\n");
1708        for i in 0..10 {
1709            code.push_str(&format!("    callee_{:02}();\n", i));
1710        }
1711        code.push_str("}\n");
1712        for i in 0..10 {
1713            code.push_str(&format!("fn callee_{:02}() {{}}\n", i));
1714        }
1715        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1716
1717        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1718        let total_callees = output.outgoing_chains.len();
1719
1720        if total_callees > 3 {
1721            let paginated = paginate_slice(&output.outgoing_chains, 0, 3, PaginationMode::Callees)
1722                .expect("paginate failed");
1723
1724            let formatted = format_focused_paginated(
1725                &paginated.items,
1726                total_callees,
1727                PaginationMode::Callees,
1728                "target",
1729                &output.prod_chains,
1730                &output.test_chains,
1731                &output.outgoing_chains,
1732                output.def_count,
1733                0,
1734                Some(temp_dir.path()),
1735                true,
1736            );
1737
1738            assert!(
1739                formatted.contains(&format!(
1740                    "CALLEES (1-{} of {})",
1741                    paginated.items.len(),
1742                    total_callees
1743                )),
1744                "header should show callees range, got: {}",
1745                formatted
1746            );
1747        }
1748    }
1749
1750    #[test]
1751    fn test_symbol_focus_empty_prod_callers() {
1752        let temp_dir = TempDir::new().unwrap();
1753
1754        // target is only called from test functions
1755        let code = r#"
1756fn target() {}
1757
1758#[cfg(test)]
1759mod tests {
1760    use super::*;
1761    #[test]
1762    fn test_something() { target(); }
1763}
1764"#;
1765        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1766
1767        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1768
1769        // prod_chains may be empty; pagination should handle it gracefully
1770        let paginated = paginate_slice(&output.prod_chains, 0, 100, PaginationMode::Callers)
1771            .expect("paginate failed");
1772        assert_eq!(paginated.items.len(), output.prod_chains.len());
1773        assert!(
1774            paginated.next_cursor.is_none(),
1775            "no next_cursor for empty or single-page prod_chains"
1776        );
1777    }
1778
1779    #[test]
1780    fn test_impl_only_filter_header_correct_counts() {
1781        let temp_dir = TempDir::new().unwrap();
1782
1783        // Create a Rust fixture with:
1784        // - A trait definition
1785        // - An impl Trait for SomeType block that calls the focus symbol
1786        // - A regular (non-trait-impl) function that also calls the focus symbol
1787        let code = r#"
1788trait MyTrait {
1789    fn focus_symbol();
1790}
1791
1792struct SomeType;
1793
1794impl MyTrait for SomeType {
1795    fn focus_symbol() {}
1796}
1797
1798fn impl_caller() {
1799    SomeType::focus_symbol();
1800}
1801
1802fn regular_caller() {
1803    SomeType::focus_symbol();
1804}
1805"#;
1806        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1807
1808        // Call analyze_focused with impl_only=Some(true)
1809        let params = FocusedAnalysisConfig {
1810            focus: "focus_symbol".to_string(),
1811            match_mode: SymbolMatchMode::Insensitive,
1812            follow_depth: 1,
1813            max_depth: None,
1814            ast_recursion_limit: None,
1815            use_summary: false,
1816            impl_only: Some(true),
1817            def_use: false,
1818        };
1819        let output = analyze_focused_with_progress(
1820            temp_dir.path(),
1821            &params,
1822            Arc::new(AtomicUsize::new(0)),
1823            CancellationToken::new(),
1824        )
1825        .unwrap();
1826
1827        // Assert the result contains "FILTER: impl_only=true"
1828        assert!(
1829            output.formatted.contains("FILTER: impl_only=true"),
1830            "formatted output should contain FILTER header for impl_only=true, got: {}",
1831            output.formatted
1832        );
1833
1834        // Assert the retained count N < total count M
1835        assert!(
1836            output.impl_trait_caller_count < output.unfiltered_caller_count,
1837            "impl_trait_caller_count ({}) should be less than unfiltered_caller_count ({})",
1838            output.impl_trait_caller_count,
1839            output.unfiltered_caller_count
1840        );
1841
1842        // Assert format is "FILTER: impl_only=true (N of M callers shown)"
1843        let filter_line = output
1844            .formatted
1845            .lines()
1846            .find(|line| line.contains("FILTER: impl_only=true"))
1847            .expect("should find FILTER line");
1848        assert!(
1849            filter_line.contains(&format!(
1850                "({} of {} callers shown)",
1851                output.impl_trait_caller_count, output.unfiltered_caller_count
1852            )),
1853            "FILTER line should show correct N of M counts, got: {}",
1854            filter_line
1855        );
1856    }
1857
1858    #[test]
1859    fn test_callers_count_matches_formatted_output() {
1860        let temp_dir = TempDir::new().unwrap();
1861
1862        // Create a file with multiple callers of `target`
1863        let code = r#"
1864fn target() {}
1865fn caller_a() { target(); }
1866fn caller_b() { target(); }
1867fn caller_c() { target(); }
1868"#;
1869        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1870
1871        // Analyze the symbol
1872        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1873
1874        // Extract CALLERS count from formatted output
1875        let formatted = &output.formatted;
1876        let callers_count_from_output = formatted
1877            .lines()
1878            .find(|line| line.contains("FOCUS:"))
1879            .and_then(|line| {
1880                line.split(',')
1881                    .find(|part| part.contains("callers"))
1882                    .and_then(|part| {
1883                        part.trim()
1884                            .split_whitespace()
1885                            .next()
1886                            .and_then(|s| s.parse::<usize>().ok())
1887                    })
1888            })
1889            .expect("should find CALLERS count in formatted output");
1890
1891        // Compute expected count from prod_chains (unique first-caller names)
1892        let expected_callers_count = output
1893            .prod_chains
1894            .iter()
1895            .filter_map(|chain| chain.chain.first().map(|(name, _, _)| name))
1896            .collect::<std::collections::HashSet<_>>()
1897            .len();
1898
1899        assert_eq!(
1900            callers_count_from_output, expected_callers_count,
1901            "CALLERS count in formatted output should match unique-first-caller count in prod_chains"
1902        );
1903    }
1904
1905    #[cfg(feature = "lang-rust")]
1906    #[test]
1907    fn test_def_use_focused_analysis() {
1908        let temp_dir = TempDir::new().unwrap();
1909        fs::write(
1910            temp_dir.path().join("lib.rs"),
1911            "fn example() {\n    let x = 10;\n    x += 1;\n    println!(\"{}\", x);\n    let y = x + 1;\n}\n",
1912        )
1913        .unwrap();
1914
1915        let entries = walk_directory(temp_dir.path(), None).unwrap();
1916        let counter = Arc::new(AtomicUsize::new(0));
1917        let ct = CancellationToken::new();
1918        let params = FocusedAnalysisConfig {
1919            focus: "x".to_string(),
1920            match_mode: SymbolMatchMode::Exact,
1921            follow_depth: 1,
1922            max_depth: None,
1923            ast_recursion_limit: None,
1924            use_summary: false,
1925            impl_only: None,
1926            def_use: true,
1927        };
1928
1929        let output = analyze_focused_with_progress_with_entries(
1930            temp_dir.path(),
1931            &params,
1932            &counter,
1933            &ct,
1934            &entries,
1935        )
1936        .expect("def_use analysis should succeed");
1937
1938        assert!(
1939            !output.def_use_sites.is_empty(),
1940            "should find def-use sites for x"
1941        );
1942        assert!(
1943            output
1944                .def_use_sites
1945                .iter()
1946                .any(|s| s.kind == crate::types::DefUseKind::Write),
1947            "should have at least one Write site",
1948        );
1949        // No location appears as both write and read
1950        let write_locs: std::collections::HashSet<_> = output
1951            .def_use_sites
1952            .iter()
1953            .filter(|s| {
1954                matches!(
1955                    s.kind,
1956                    crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
1957                )
1958            })
1959            .map(|s| (&s.file, s.line, s.column))
1960            .collect();
1961        assert!(
1962            output
1963                .def_use_sites
1964                .iter()
1965                .filter(|s| s.kind == crate::types::DefUseKind::Read)
1966                .all(|s| !write_locs.contains(&(&s.file, s.line, s.column))),
1967            "no location should appear as both write and read",
1968        );
1969        assert!(
1970            output.formatted.contains("DEF-USE SITES"),
1971            "formatted output should contain DEF-USE SITES"
1972        );
1973    }
1974
1975    fn make_temp_file(content: &str) -> tempfile::NamedTempFile {
1976        let mut f = tempfile::NamedTempFile::new().unwrap();
1977        use std::io::Write;
1978        f.write_all(content.as_bytes()).unwrap();
1979        f.flush().unwrap();
1980        f
1981    }
1982
1983    #[test]
1984    fn test_analyze_raw_full_file() {
1985        let f = make_temp_file("line1\nline2\nline3\n");
1986        let out = analyze_raw_range(f.path(), None, None).unwrap();
1987        assert_eq!(out.total_lines, 3);
1988        assert_eq!(out.start_line, 1);
1989        assert_eq!(out.end_line, 3);
1990        assert!(out.content.contains("line1"));
1991        assert!(out.content.contains("line3"));
1992    }
1993
1994    #[test]
1995    fn test_analyze_raw_partial_range() {
1996        let f = make_temp_file("a\nb\nc\nd\ne\n");
1997        let out = analyze_raw_range(f.path(), Some(2), Some(4)).unwrap();
1998        assert_eq!(out.start_line, 2);
1999        assert_eq!(out.end_line, 4);
2000        assert!(out.content.contains("b"));
2001        assert!(out.content.contains("d"));
2002        assert!(!out.content.contains("a"));
2003        assert!(!out.content.contains("e"));
2004    }
2005
2006    #[test]
2007    fn test_analyze_raw_invalid_range() {
2008        let f = make_temp_file("a\nb\nc\n");
2009        let err = analyze_raw_range(f.path(), Some(3), Some(1)).unwrap_err();
2010        assert!(matches!(err, AnalyzeError::InvalidRange { .. }));
2011    }
2012
2013    #[test]
2014    fn test_analyze_raw_clamped_range() {
2015        let f = make_temp_file("x\ny\nz\n");
2016        // end_line beyond total should clamp
2017        let out = analyze_raw_range(f.path(), Some(1), Some(999)).unwrap();
2018        assert_eq!(out.end_line, 3);
2019        assert_eq!(out.total_lines, 3);
2020    }
2021
2022    #[test]
2023    fn test_analyze_raw_empty_file() {
2024        let f = make_temp_file("");
2025        let out = analyze_raw_range(f.path(), None, None).unwrap();
2026        assert_eq!(out.total_lines, 0);
2027        assert_eq!(out.content, "");
2028    }
2029}
aptu_coder_core/analyze.rs

aptu_coder_core/
analyze.rs