code_analyze_core/
analyze.rs

1// SPDX-FileCopyrightText: 2026 code-analyze-mcp contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Main analysis engine for extracting code structure from files and directories.
4//!
5//! Implements the four MCP tools: `analyze_directory` (Overview), `analyze_file` (`FileDetails`),
6//! `analyze_symbol` (call graph), and `analyze_module` (lightweight index). Handles parallel processing and cancellation.
7
8use crate::formatter::{
9    format_file_details, format_focused_internal, format_focused_summary_internal, format_structure,
10};
11use crate::graph::{CallGraph, InternalCallChain};
12use crate::lang::language_for_extension;
13use crate::parser::{ElementExtractor, SemanticExtractor};
14use crate::test_detection::is_test_file;
15use crate::traversal::{WalkEntry, walk_directory};
16use crate::types::{
17    AnalysisMode, FileInfo, ImplTraitInfo, ImportInfo, SemanticAnalysis, SymbolMatchMode,
18};
19use rayon::prelude::*;
20#[cfg(feature = "schemars")]
21use schemars::JsonSchema;
22use serde::Serialize;
23use std::path::{Path, PathBuf};
24use std::sync::Arc;
25use std::sync::atomic::{AtomicUsize, Ordering};
26use std::time::Instant;
27use thiserror::Error;
28use tokio_util::sync::CancellationToken;
29use tracing::instrument;
30
31#[derive(Debug, Error)]
32#[non_exhaustive]
33pub enum AnalyzeError {
34    #[error("Traversal error: {0}")]
35    Traversal(#[from] crate::traversal::TraversalError),
36    #[error("Parser error: {0}")]
37    Parser(#[from] crate::parser::ParserError),
38    #[error("Graph error: {0}")]
39    Graph(#[from] crate::graph::GraphError),
40    #[error("Formatter error: {0}")]
41    Formatter(#[from] crate::formatter::FormatterError),
42    #[error("Analysis cancelled")]
43    Cancelled,
44}
45
46/// Result of directory analysis containing both formatted output and file data.
47#[derive(Debug, Clone, Serialize)]
48#[cfg_attr(feature = "schemars", derive(JsonSchema))]
49#[non_exhaustive]
50pub struct AnalysisOutput {
51    #[cfg_attr(
52        feature = "schemars",
53        schemars(description = "Formatted text representation of the analysis")
54    )]
55    pub formatted: String,
56    #[cfg_attr(
57        feature = "schemars",
58        schemars(description = "List of files analyzed in the directory")
59    )]
60    pub files: Vec<FileInfo>,
61    /// Walk entries used internally for summary generation; not serialized.
62    #[serde(skip)]
63    #[cfg_attr(feature = "schemars", schemars(skip))]
64    pub entries: Vec<WalkEntry>,
65    /// Subtree file counts computed from an unbounded walk; used by `format_summary`; not serialized.
66    #[serde(skip)]
67    #[cfg_attr(feature = "schemars", schemars(skip))]
68    pub subtree_counts: Option<Vec<(std::path::PathBuf, usize)>>,
69    #[serde(skip_serializing_if = "Option::is_none")]
70    #[cfg_attr(
71        feature = "schemars",
72        schemars(
73            description = "Opaque cursor token for the next page of results (absent when no more results)"
74        )
75    )]
76    pub next_cursor: Option<String>,
77}
78
79/// Result of file-level semantic analysis.
80#[derive(Debug, Clone, Serialize)]
81#[cfg_attr(feature = "schemars", derive(JsonSchema))]
82#[non_exhaustive]
83pub struct FileAnalysisOutput {
84    #[cfg_attr(
85        feature = "schemars",
86        schemars(description = "Formatted text representation of the analysis")
87    )]
88    pub formatted: String,
89    #[cfg_attr(
90        feature = "schemars",
91        schemars(description = "Semantic analysis data including functions, classes, and imports")
92    )]
93    pub semantic: SemanticAnalysis,
94    #[cfg_attr(
95        feature = "schemars",
96        schemars(description = "Total line count of the analyzed file")
97    )]
98    #[cfg_attr(
99        feature = "schemars",
100        schemars(schema_with = "crate::schema_helpers::integer_schema")
101    )]
102    pub line_count: usize,
103    #[serde(skip_serializing_if = "Option::is_none")]
104    #[cfg_attr(
105        feature = "schemars",
106        schemars(
107            description = "Opaque cursor token for the next page of results (absent when no more results)"
108        )
109    )]
110    pub next_cursor: Option<String>,
111}
112
113impl FileAnalysisOutput {
114    /// Create a new `FileAnalysisOutput`.
115    #[must_use]
116    pub fn new(
117        formatted: String,
118        semantic: SemanticAnalysis,
119        line_count: usize,
120        next_cursor: Option<String>,
121    ) -> Self {
122        Self {
123            formatted,
124            semantic,
125            line_count,
126            next_cursor,
127        }
128    }
129}
130#[instrument(skip_all, fields(path = %root.display()))]
131// public API; callers expect owned semantics
132#[allow(clippy::needless_pass_by_value)]
133pub fn analyze_directory_with_progress(
134    root: &Path,
135    entries: Vec<WalkEntry>,
136    progress: Arc<AtomicUsize>,
137    ct: CancellationToken,
138) -> Result<AnalysisOutput, AnalyzeError> {
139    // Check if already cancelled
140    if ct.is_cancelled() {
141        return Err(AnalyzeError::Cancelled);
142    }
143
144    // Detect language from file extension
145    let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
146
147    let start = Instant::now();
148    tracing::debug!(file_count = file_entries.len(), root = %root.display(), "analysis start");
149
150    // Parallel analysis of files
151    let analysis_results: Vec<FileInfo> = file_entries
152        .par_iter()
153        .filter_map(|entry| {
154            // Check cancellation per file
155            if ct.is_cancelled() {
156                return None;
157            }
158
159            let path_str = entry.path.display().to_string();
160
161            // Detect language from extension
162            let ext = entry.path.extension().and_then(|e| e.to_str());
163
164            // Try to read file content; skip binary or unreadable files
165            let Ok(source) = std::fs::read_to_string(&entry.path) else {
166                progress.fetch_add(1, Ordering::Relaxed);
167                return None;
168            };
169
170            // Count lines
171            let line_count = source.lines().count();
172
173            // Detect language and extract counts
174            let (language, function_count, class_count) = if let Some(ext_str) = ext {
175                if let Some(lang) = language_for_extension(ext_str) {
176                    let lang_str = lang.to_string();
177                    match ElementExtractor::extract_with_depth(&source, &lang_str) {
178                        Ok((func_count, class_count)) => (lang_str, func_count, class_count),
179                        Err(_) => (lang_str, 0, 0),
180                    }
181                } else {
182                    ("unknown".to_string(), 0, 0)
183                }
184            } else {
185                ("unknown".to_string(), 0, 0)
186            };
187
188            progress.fetch_add(1, Ordering::Relaxed);
189
190            let is_test = is_test_file(&entry.path);
191
192            Some(FileInfo {
193                path: path_str,
194                line_count,
195                function_count,
196                class_count,
197                language,
198                is_test,
199            })
200        })
201        .collect();
202
203    // Check if cancelled after parallel processing
204    if ct.is_cancelled() {
205        return Err(AnalyzeError::Cancelled);
206    }
207
208    tracing::debug!(
209        file_count = file_entries.len(),
210        duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
211        "analysis complete"
212    );
213
214    // Format output
215    let formatted = format_structure(&entries, &analysis_results, None);
216
217    Ok(AnalysisOutput {
218        formatted,
219        files: analysis_results,
220        entries,
221        next_cursor: None,
222        subtree_counts: None,
223    })
224}
225
226/// Analyze a directory structure and return formatted output and file data.
227#[instrument(skip_all, fields(path = %root.display()))]
228pub fn analyze_directory(
229    root: &Path,
230    max_depth: Option<u32>,
231) -> Result<AnalysisOutput, AnalyzeError> {
232    let entries = walk_directory(root, max_depth)?;
233    let counter = Arc::new(AtomicUsize::new(0));
234    let ct = CancellationToken::new();
235    analyze_directory_with_progress(root, entries, counter, ct)
236}
237
238/// Determine analysis mode based on parameters and path.
239#[must_use]
240pub fn determine_mode(path: &str, focus: Option<&str>) -> AnalysisMode {
241    if focus.is_some() {
242        return AnalysisMode::SymbolFocus;
243    }
244
245    let path_obj = Path::new(path);
246    if path_obj.is_dir() {
247        AnalysisMode::Overview
248    } else {
249        AnalysisMode::FileDetails
250    }
251}
252
253/// Analyze a single file and return semantic analysis with formatted output.
254#[instrument(skip_all, fields(path))]
255pub fn analyze_file(
256    path: &str,
257    ast_recursion_limit: Option<usize>,
258) -> Result<FileAnalysisOutput, AnalyzeError> {
259    let start = Instant::now();
260    let source = std::fs::read_to_string(path)
261        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
262
263    let line_count = source.lines().count();
264
265    // Detect language from extension
266    let ext = Path::new(path)
267        .extension()
268        .and_then(|e| e.to_str())
269        .and_then(language_for_extension)
270        .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string);
271
272    // Extract semantic information
273    let mut semantic = SemanticExtractor::extract(&source, &ext, ast_recursion_limit)?;
274
275    // Populate the file path on references now that the path is known
276    for r in &mut semantic.references {
277        r.location = path.to_string();
278    }
279
280    // Resolve Python wildcard imports
281    if ext == "python" {
282        resolve_wildcard_imports(Path::new(path), &mut semantic.imports);
283    }
284
285    // Detect if this is a test file
286    let is_test = is_test_file(Path::new(path));
287
288    // Extract parent directory for relative path display
289    let parent_dir = Path::new(path).parent();
290
291    // Format output
292    let formatted = format_file_details(path, &semantic, line_count, is_test, parent_dir);
293
294    tracing::debug!(path = %path, language = %ext, functions = semantic.functions.len(), classes = semantic.classes.len(), imports = semantic.imports.len(), duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX), "file analysis complete");
295
296    Ok(FileAnalysisOutput::new(
297        formatted, semantic, line_count, None,
298    ))
299}
300
301/// Result of focused symbol analysis.
302#[derive(Debug, Serialize)]
303#[cfg_attr(feature = "schemars", derive(JsonSchema))]
304#[non_exhaustive]
305pub struct FocusedAnalysisOutput {
306    #[cfg_attr(
307        feature = "schemars",
308        schemars(description = "Formatted text representation of the call graph analysis")
309    )]
310    pub formatted: String,
311    #[serde(skip_serializing_if = "Option::is_none")]
312    #[cfg_attr(
313        feature = "schemars",
314        schemars(
315            description = "Opaque cursor token for the next page of results (absent when no more results)"
316        )
317    )]
318    pub next_cursor: Option<String>,
319    /// Production caller chains (partitioned from incoming chains, excluding test callers).
320    /// Not serialized; used for pagination in lib.rs.
321    #[serde(skip)]
322    #[cfg_attr(feature = "schemars", schemars(skip))]
323    pub prod_chains: Vec<InternalCallChain>,
324    /// Test caller chains. Not serialized; used for pagination summary in lib.rs.
325    #[serde(skip)]
326    #[cfg_attr(feature = "schemars", schemars(skip))]
327    pub test_chains: Vec<InternalCallChain>,
328    /// Outgoing (callee) chains. Not serialized; used for pagination in lib.rs.
329    #[serde(skip)]
330    #[cfg_attr(feature = "schemars", schemars(skip))]
331    pub outgoing_chains: Vec<InternalCallChain>,
332    /// Number of definitions for the symbol. Not serialized; used for pagination headers.
333    #[serde(skip)]
334    #[cfg_attr(feature = "schemars", schemars(skip))]
335    pub def_count: usize,
336    /// Total unique callers before `impl_only` filter. Not serialized; used for FILTER header.
337    #[serde(skip)]
338    #[cfg_attr(feature = "schemars", schemars(skip))]
339    pub unfiltered_caller_count: usize,
340    /// Unique callers after `impl_only` filter. Not serialized; used for FILTER header.
341    #[serde(skip)]
342    #[cfg_attr(feature = "schemars", schemars(skip))]
343    pub impl_trait_caller_count: usize,
344}
345
346/// Parameters for focused symbol analysis. Groups high-arity parameters to keep
347/// function signatures under clippy's default 7-argument threshold.
348#[derive(Clone)]
349pub struct FocusedAnalysisConfig {
350    pub focus: String,
351    pub match_mode: SymbolMatchMode,
352    pub follow_depth: u32,
353    pub max_depth: Option<u32>,
354    pub ast_recursion_limit: Option<usize>,
355    pub use_summary: bool,
356    pub impl_only: Option<bool>,
357}
358
359/// Internal parameters for focused analysis phases.
360#[derive(Clone)]
361struct FocusedAnalysisParams {
362    focus: String,
363    match_mode: SymbolMatchMode,
364    follow_depth: u32,
365    ast_recursion_limit: Option<usize>,
366    use_summary: bool,
367    impl_only: Option<bool>,
368}
369
370/// Type alias for analysis results: (`file_path`, `semantic_analysis`) pairs and impl-trait info.
371type AnalysisResults = (Vec<(PathBuf, SemanticAnalysis)>, Vec<ImplTraitInfo>);
372
373/// Phase 1: Collect semantic analysis for all files in parallel.
374fn collect_file_analysis(
375    entries: &[WalkEntry],
376    progress: &Arc<AtomicUsize>,
377    ct: &CancellationToken,
378    ast_recursion_limit: Option<usize>,
379) -> Result<AnalysisResults, AnalyzeError> {
380    // Check if already cancelled
381    if ct.is_cancelled() {
382        return Err(AnalyzeError::Cancelled);
383    }
384
385    // Use pre-walked entries (passed by caller)
386    // Collect semantic analysis for all files in parallel
387    let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
388
389    let analysis_results: Vec<(PathBuf, SemanticAnalysis)> = file_entries
390        .par_iter()
391        .filter_map(|entry| {
392            // Check cancellation per file
393            if ct.is_cancelled() {
394                return None;
395            }
396
397            let ext = entry.path.extension().and_then(|e| e.to_str());
398
399            // Try to read file content
400            let Ok(source) = std::fs::read_to_string(&entry.path) else {
401                progress.fetch_add(1, Ordering::Relaxed);
402                return None;
403            };
404
405            // Detect language and extract semantic information
406            let language = if let Some(ext_str) = ext {
407                language_for_extension(ext_str)
408                    .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string)
409            } else {
410                "unknown".to_string()
411            };
412
413            if let Ok(mut semantic) =
414                SemanticExtractor::extract(&source, &language, ast_recursion_limit)
415            {
416                // Populate file path on references
417                for r in &mut semantic.references {
418                    r.location = entry.path.display().to_string();
419                }
420                // Populate file path on impl_traits (already extracted during SemanticExtractor::extract)
421                for trait_info in &mut semantic.impl_traits {
422                    trait_info.path.clone_from(&entry.path);
423                }
424                progress.fetch_add(1, Ordering::Relaxed);
425                Some((entry.path.clone(), semantic))
426            } else {
427                progress.fetch_add(1, Ordering::Relaxed);
428                None
429            }
430        })
431        .collect();
432
433    // Check if cancelled after parallel processing
434    if ct.is_cancelled() {
435        return Err(AnalyzeError::Cancelled);
436    }
437
438    // Collect all impl-trait info from analysis results
439    let all_impl_traits: Vec<ImplTraitInfo> = analysis_results
440        .iter()
441        .flat_map(|(_, sem)| sem.impl_traits.iter().cloned())
442        .collect();
443
444    Ok((analysis_results, all_impl_traits))
445}
446
447/// Phase 2: Build call graph from analysis results.
448fn build_call_graph(
449    analysis_results: Vec<(PathBuf, SemanticAnalysis)>,
450    all_impl_traits: &[ImplTraitInfo],
451) -> Result<CallGraph, AnalyzeError> {
452    // Build call graph. Always build without impl_only filter first so we can
453    // record the unfiltered caller count before discarding those edges.
454    CallGraph::build_from_results(
455        analysis_results,
456        all_impl_traits,
457        false, // filter applied below after counting
458    )
459    .map_err(std::convert::Into::into)
460}
461
462/// Phase 3: Resolve symbol and apply `impl_only` filter.
463/// Returns (`resolved_focus`, `unfiltered_caller_count`, `impl_trait_caller_count`).
464/// CRITICAL: Must capture `unfiltered_caller_count` BEFORE `retain()`, then apply `retain()`,
465/// then compute `impl_trait_caller_count`.
466fn resolve_symbol(
467    graph: &mut CallGraph,
468    params: &FocusedAnalysisParams,
469) -> Result<(String, usize, usize), AnalyzeError> {
470    // Resolve symbol name using the requested match mode.
471    let resolved_focus = if params.match_mode == SymbolMatchMode::Exact {
472        let exists = graph.definitions.contains_key(&params.focus)
473            || graph.callers.contains_key(&params.focus)
474            || graph.callees.contains_key(&params.focus);
475        if exists {
476            params.focus.clone()
477        } else {
478            return Err(crate::graph::GraphError::SymbolNotFound {
479                symbol: params.focus.clone(),
480                hint: "Try match_mode=insensitive for a case-insensitive search, or match_mode=prefix to list symbols starting with this name.".to_string(),
481            }
482            .into());
483        }
484    } else {
485        graph.resolve_symbol_indexed(&params.focus, &params.match_mode)?
486    };
487
488    // Count unique callers for the focus symbol before applying impl_only filter.
489    let unfiltered_caller_count = graph.callers.get(&resolved_focus).map_or(0, |edges| {
490        edges
491            .iter()
492            .map(|e| &e.neighbor_name)
493            .collect::<std::collections::HashSet<_>>()
494            .len()
495    });
496
497    // Apply impl_only filter now if requested, then count filtered callers.
498    // Filter all caller adjacency lists so traversal and formatting are consistently
499    // restricted to impl-trait edges regardless of follow_depth.
500    let impl_trait_caller_count = if params.impl_only.unwrap_or(false) {
501        for edges in graph.callers.values_mut() {
502            edges.retain(|e| e.is_impl_trait);
503        }
504        graph.callers.get(&resolved_focus).map_or(0, |edges| {
505            edges
506                .iter()
507                .map(|e| &e.neighbor_name)
508                .collect::<std::collections::HashSet<_>>()
509                .len()
510        })
511    } else {
512        unfiltered_caller_count
513    };
514
515    Ok((
516        resolved_focus,
517        unfiltered_caller_count,
518        impl_trait_caller_count,
519    ))
520}
521
522/// Type alias for `compute_chains` return type: (`formatted_output`, `prod_chains`, `test_chains`, `outgoing_chains`, `def_count`).
523type ChainComputeResult = (
524    String,
525    Vec<InternalCallChain>,
526    Vec<InternalCallChain>,
527    Vec<InternalCallChain>,
528    usize,
529);
530
531/// Phase 4: Compute chains and format output.
532fn compute_chains(
533    graph: &CallGraph,
534    resolved_focus: &str,
535    root: &Path,
536    params: &FocusedAnalysisParams,
537    unfiltered_caller_count: usize,
538    impl_trait_caller_count: usize,
539) -> Result<ChainComputeResult, AnalyzeError> {
540    // Compute chain data for pagination (always, regardless of summary mode)
541    let def_count = graph.definitions.get(resolved_focus).map_or(0, Vec::len);
542    let incoming_chains = graph.find_incoming_chains(resolved_focus, params.follow_depth)?;
543    let outgoing_chains = graph.find_outgoing_chains(resolved_focus, params.follow_depth)?;
544
545    let (prod_chains, test_chains): (Vec<_>, Vec<_>) =
546        incoming_chains.iter().cloned().partition(|chain| {
547            chain
548                .chain
549                .first()
550                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
551        });
552
553    // Format output with pre-computed chains
554    let mut formatted = if params.use_summary {
555        format_focused_summary_internal(
556            graph,
557            resolved_focus,
558            params.follow_depth,
559            Some(root),
560            Some(&incoming_chains),
561            Some(&outgoing_chains),
562        )?
563    } else {
564        format_focused_internal(
565            graph,
566            resolved_focus,
567            params.follow_depth,
568            Some(root),
569            Some(&incoming_chains),
570            Some(&outgoing_chains),
571        )?
572    };
573
574    // Add FILTER header if impl_only filter was applied
575    if params.impl_only.unwrap_or(false) {
576        let filter_header = format!(
577            "FILTER: impl_only=true ({impl_trait_caller_count} of {unfiltered_caller_count} callers shown)\n",
578        );
579        formatted = format!("{filter_header}{formatted}");
580    }
581
582    Ok((
583        formatted,
584        prod_chains,
585        test_chains,
586        outgoing_chains,
587        def_count,
588    ))
589}
590
591/// Analyze a symbol's call graph across a directory with progress tracking.
592// public API; callers expect owned semantics
593#[allow(clippy::needless_pass_by_value)]
594pub fn analyze_focused_with_progress(
595    root: &Path,
596    params: &FocusedAnalysisConfig,
597    progress: Arc<AtomicUsize>,
598    ct: CancellationToken,
599) -> Result<FocusedAnalysisOutput, AnalyzeError> {
600    let entries = walk_directory(root, params.max_depth)?;
601    let internal_params = FocusedAnalysisParams {
602        focus: params.focus.clone(),
603        match_mode: params.match_mode.clone(),
604        follow_depth: params.follow_depth,
605        ast_recursion_limit: params.ast_recursion_limit,
606        use_summary: params.use_summary,
607        impl_only: params.impl_only,
608    };
609    analyze_focused_with_progress_with_entries_internal(
610        root,
611        params.max_depth,
612        &progress,
613        &ct,
614        &internal_params,
615        &entries,
616    )
617}
618
619/// Internal implementation of focused analysis using pre-walked entries and params struct.
620#[instrument(skip_all, fields(path = %root.display(), symbol = %params.focus))]
621fn analyze_focused_with_progress_with_entries_internal(
622    root: &Path,
623    _max_depth: Option<u32>,
624    progress: &Arc<AtomicUsize>,
625    ct: &CancellationToken,
626    params: &FocusedAnalysisParams,
627    entries: &[WalkEntry],
628) -> Result<FocusedAnalysisOutput, AnalyzeError> {
629    // Check if already cancelled
630    if ct.is_cancelled() {
631        return Err(AnalyzeError::Cancelled);
632    }
633
634    // Check if path is a file (hint to use directory)
635    if root.is_file() {
636        let formatted =
637            "Single-file focus not supported. Please provide a directory path for cross-file call graph analysis.\n"
638                .to_string();
639        return Ok(FocusedAnalysisOutput {
640            formatted,
641            next_cursor: None,
642            prod_chains: vec![],
643            test_chains: vec![],
644            outgoing_chains: vec![],
645            def_count: 0,
646            unfiltered_caller_count: 0,
647            impl_trait_caller_count: 0,
648        });
649    }
650
651    // Phase 1: Collect file analysis
652    let (analysis_results, all_impl_traits) =
653        collect_file_analysis(entries, progress, ct, params.ast_recursion_limit)?;
654
655    // Check for cancellation before building the call graph (phase 2)
656    if ct.is_cancelled() {
657        return Err(AnalyzeError::Cancelled);
658    }
659
660    // Phase 2: Build call graph
661    let mut graph = build_call_graph(analysis_results, &all_impl_traits)?;
662
663    // Check for cancellation before resolving the symbol (phase 3)
664    if ct.is_cancelled() {
665        return Err(AnalyzeError::Cancelled);
666    }
667
668    // Phase 3: Resolve symbol and apply impl_only filter
669    let (resolved_focus, unfiltered_caller_count, impl_trait_caller_count) =
670        resolve_symbol(&mut graph, params)?;
671
672    // Check for cancellation before computing chains (phase 4)
673    if ct.is_cancelled() {
674        return Err(AnalyzeError::Cancelled);
675    }
676
677    // Phase 4: Compute chains and format output
678    let (formatted, prod_chains, test_chains, outgoing_chains, def_count) = compute_chains(
679        &graph,
680        &resolved_focus,
681        root,
682        params,
683        unfiltered_caller_count,
684        impl_trait_caller_count,
685    )?;
686
687    Ok(FocusedAnalysisOutput {
688        formatted,
689        next_cursor: None,
690        prod_chains,
691        test_chains,
692        outgoing_chains,
693        def_count,
694        unfiltered_caller_count,
695        impl_trait_caller_count,
696    })
697}
698
699/// Analyze a symbol's call graph using pre-walked directory entries.
700pub fn analyze_focused_with_progress_with_entries(
701    root: &Path,
702    params: &FocusedAnalysisConfig,
703    progress: &Arc<AtomicUsize>,
704    ct: &CancellationToken,
705    entries: &[WalkEntry],
706) -> Result<FocusedAnalysisOutput, AnalyzeError> {
707    let internal_params = FocusedAnalysisParams {
708        focus: params.focus.clone(),
709        match_mode: params.match_mode.clone(),
710        follow_depth: params.follow_depth,
711        ast_recursion_limit: params.ast_recursion_limit,
712        use_summary: params.use_summary,
713        impl_only: params.impl_only,
714    };
715    analyze_focused_with_progress_with_entries_internal(
716        root,
717        params.max_depth,
718        progress,
719        ct,
720        &internal_params,
721        entries,
722    )
723}
724
725#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
726pub fn analyze_focused(
727    root: &Path,
728    focus: &str,
729    follow_depth: u32,
730    max_depth: Option<u32>,
731    ast_recursion_limit: Option<usize>,
732) -> Result<FocusedAnalysisOutput, AnalyzeError> {
733    let entries = walk_directory(root, max_depth)?;
734    let counter = Arc::new(AtomicUsize::new(0));
735    let ct = CancellationToken::new();
736    let params = FocusedAnalysisConfig {
737        focus: focus.to_string(),
738        match_mode: SymbolMatchMode::Exact,
739        follow_depth,
740        max_depth,
741        ast_recursion_limit,
742        use_summary: false,
743        impl_only: None,
744    };
745    analyze_focused_with_progress_with_entries(root, &params, &counter, &ct, &entries)
746}
747
748/// Analyze a single file and return a minimal fixed schema (name, line count, language,
749/// functions, imports) for lightweight code understanding.
750#[instrument(skip_all, fields(path))]
751pub fn analyze_module_file(path: &str) -> Result<crate::types::ModuleInfo, AnalyzeError> {
752    let source = std::fs::read_to_string(path)
753        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
754
755    let file_path = Path::new(path);
756    let name = file_path
757        .file_name()
758        .and_then(|s| s.to_str())
759        .unwrap_or("unknown")
760        .to_string();
761
762    let line_count = source.lines().count();
763
764    let language = file_path
765        .extension()
766        .and_then(|e| e.to_str())
767        .and_then(language_for_extension)
768        .ok_or_else(|| {
769            AnalyzeError::Parser(crate::parser::ParserError::ParseError(
770                "unsupported or missing file extension".to_string(),
771            ))
772        })?;
773
774    let semantic = SemanticExtractor::extract(&source, language, None)?;
775
776    let functions = semantic
777        .functions
778        .into_iter()
779        .map(|f| crate::types::ModuleFunctionInfo {
780            name: f.name,
781            line: f.line,
782        })
783        .collect();
784
785    let imports = semantic
786        .imports
787        .into_iter()
788        .map(|i| crate::types::ModuleImportInfo {
789            module: i.module,
790            items: i.items,
791        })
792        .collect();
793
794    Ok(crate::types::ModuleInfo {
795        name,
796        line_count,
797        language: language.to_string(),
798        functions,
799        imports,
800    })
801}
802
803/// Resolve Python wildcard imports to actual symbol names.
804///
805/// For each import with items=`["*"]`, this function:
806/// 1. Parses the relative dots (if any) and climbs the directory tree
807/// 2. Finds the target .py file or __init__.py
808/// 3. Extracts symbols (functions and classes) from the target
809/// 4. Honors __all__ if defined, otherwise uses function+class names
810///
811/// All resolution failures are non-fatal: debug-logged and the wildcard is preserved.
812fn resolve_wildcard_imports(file_path: &Path, imports: &mut [ImportInfo]) {
813    use std::collections::HashMap;
814
815    let mut resolved_cache: HashMap<PathBuf, Vec<String>> = HashMap::new();
816    let Ok(file_path_canonical) = file_path.canonicalize() else {
817        tracing::debug!(file = ?file_path, "unable to canonicalize current file path");
818        return;
819    };
820
821    for import in imports.iter_mut() {
822        if import.items != ["*"] {
823            continue;
824        }
825        resolve_single_wildcard(import, file_path, &file_path_canonical, &mut resolved_cache);
826    }
827}
828
829/// Resolve one wildcard import in place. On any failure the import is left unchanged.
830fn resolve_single_wildcard(
831    import: &mut ImportInfo,
832    file_path: &Path,
833    file_path_canonical: &Path,
834    resolved_cache: &mut std::collections::HashMap<PathBuf, Vec<String>>,
835) {
836    let module = import.module.clone();
837    let dot_count = module.chars().take_while(|c| *c == '.').count();
838    if dot_count == 0 {
839        return;
840    }
841    let module_path = module.trim_start_matches('.');
842
843    let Some(target_to_read) = locate_target_file(file_path, dot_count, module_path, &module)
844    else {
845        return;
846    };
847
848    let Ok(canonical) = target_to_read.canonicalize() else {
849        tracing::debug!(target = ?target_to_read, import = %module, "unable to canonicalize path");
850        return;
851    };
852
853    if canonical == file_path_canonical {
854        tracing::debug!(target = ?canonical, import = %module, "cannot import from self");
855        return;
856    }
857
858    if let Some(cached) = resolved_cache.get(&canonical) {
859        tracing::debug!(import = %module, symbols_count = cached.len(), "using cached symbols");
860        import.items.clone_from(cached);
861        return;
862    }
863
864    if let Some(symbols) = parse_target_symbols(&target_to_read, &module) {
865        tracing::debug!(import = %module, resolved_count = symbols.len(), "wildcard import resolved");
866        import.items.clone_from(&symbols);
867        resolved_cache.insert(canonical, symbols);
868    }
869}
870
871/// Locate the .py file that a wildcard import refers to. Returns None if not found.
872fn locate_target_file(
873    file_path: &Path,
874    dot_count: usize,
875    module_path: &str,
876    module: &str,
877) -> Option<PathBuf> {
878    let mut target_dir = file_path.parent()?.to_path_buf();
879
880    for _ in 1..dot_count {
881        if !target_dir.pop() {
882            tracing::debug!(import = %module, "unable to climb {} levels", dot_count.saturating_sub(1));
883            return None;
884        }
885    }
886
887    let target_file = if module_path.is_empty() {
888        target_dir.join("__init__.py")
889    } else {
890        let rel_path = module_path.replace('.', "/");
891        target_dir.join(format!("{rel_path}.py"))
892    };
893
894    if target_file.exists() {
895        Some(target_file)
896    } else if target_file.with_extension("").is_dir() {
897        let init = target_file.with_extension("").join("__init__.py");
898        if init.exists() { Some(init) } else { None }
899    } else {
900        tracing::debug!(target = ?target_file, import = %module, "target file not found");
901        None
902    }
903}
904
905/// Read and parse a target .py file, returning its exported symbols.
906fn parse_target_symbols(target_path: &Path, module: &str) -> Option<Vec<String>> {
907    use tree_sitter::Parser;
908
909    let source = match std::fs::read_to_string(target_path) {
910        Ok(s) => s,
911        Err(e) => {
912            tracing::debug!(target = ?target_path, import = %module, error = %e, "unable to read target file");
913            return None;
914        }
915    };
916
917    // Parse once with tree-sitter
918    let lang_info = crate::languages::get_language_info("python")?;
919    let mut parser = Parser::new();
920    if parser.set_language(&lang_info.language).is_err() {
921        return None;
922    }
923    let tree = parser.parse(&source, None)?;
924
925    // First, try to extract __all__ from the same tree
926    let mut symbols = Vec::new();
927    extract_all_from_tree(&tree, &source, &mut symbols);
928    if !symbols.is_empty() {
929        tracing::debug!(import = %module, symbols = ?symbols, "using __all__ symbols");
930        return Some(symbols);
931    }
932
933    // Fallback: extract functions/classes from the tree
934    let root = tree.root_node();
935    let mut cursor = root.walk();
936    for child in root.children(&mut cursor) {
937        if matches!(child.kind(), "function_definition" | "class_definition")
938            && let Some(name_node) = child.child_by_field_name("name")
939        {
940            let name = source[name_node.start_byte()..name_node.end_byte()].to_string();
941            if !name.starts_with('_') {
942                symbols.push(name);
943            }
944        }
945    }
946    tracing::debug!(import = %module, fallback_symbols = ?symbols, "using fallback function/class names");
947    Some(symbols)
948}
949
950/// Extract __all__ from a tree-sitter tree.
951fn extract_all_from_tree(tree: &tree_sitter::Tree, source: &str, result: &mut Vec<String>) {
952    let root = tree.root_node();
953    let mut cursor = root.walk();
954    for child in root.children(&mut cursor) {
955        if child.kind() == "simple_statement" {
956            // simple_statement contains assignment and other statement types
957            let mut simple_cursor = child.walk();
958            for simple_child in child.children(&mut simple_cursor) {
959                if simple_child.kind() == "assignment"
960                    && let Some(left) = simple_child.child_by_field_name("left")
961                {
962                    let target_text = source[left.start_byte()..left.end_byte()].trim();
963                    if target_text == "__all__"
964                        && let Some(right) = simple_child.child_by_field_name("right")
965                    {
966                        extract_string_list_from_list_node(&right, source, result);
967                    }
968                }
969            }
970        } else if child.kind() == "expression_statement" {
971            // Fallback for older Python AST structures
972            let mut stmt_cursor = child.walk();
973            for stmt_child in child.children(&mut stmt_cursor) {
974                if stmt_child.kind() == "assignment"
975                    && let Some(left) = stmt_child.child_by_field_name("left")
976                {
977                    let target_text = source[left.start_byte()..left.end_byte()].trim();
978                    if target_text == "__all__"
979                        && let Some(right) = stmt_child.child_by_field_name("right")
980                    {
981                        extract_string_list_from_list_node(&right, source, result);
982                    }
983                }
984            }
985        }
986    }
987}
988
989/// Extract string literals from a Python list node.
990fn extract_string_list_from_list_node(
991    list_node: &tree_sitter::Node,
992    source: &str,
993    result: &mut Vec<String>,
994) {
995    let mut cursor = list_node.walk();
996    for child in list_node.named_children(&mut cursor) {
997        if child.kind() == "string" {
998            let raw = source[child.start_byte()..child.end_byte()].trim();
999            // Strip quotes: "name" -> name
1000            let unquoted = raw.trim_matches('"').trim_matches('\'').to_string();
1001            if !unquoted.is_empty() {
1002                result.push(unquoted);
1003            }
1004        }
1005    }
1006}
1007
1008#[cfg(all(test, feature = "lang-rust"))]
1009mod tests {
1010    use super::*;
1011    use crate::formatter::format_focused_paginated;
1012    use crate::pagination::{PaginationMode, decode_cursor, paginate_slice};
1013    use std::fs;
1014    use tempfile::TempDir;
1015
1016    #[test]
1017    fn test_symbol_focus_callers_pagination_first_page() {
1018        let temp_dir = TempDir::new().unwrap();
1019
1020        // Create a file with many callers of `target`
1021        let mut code = String::from("fn target() {}\n");
1022        for i in 0..15 {
1023            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1024        }
1025        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1026
1027        // Act
1028        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1029
1030        // Paginate prod callers with page_size=5
1031        let paginated = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1032            .expect("paginate failed");
1033        assert!(
1034            paginated.total >= 5,
1035            "should have enough callers to paginate"
1036        );
1037        assert!(
1038            paginated.next_cursor.is_some(),
1039            "should have next_cursor for page 1"
1040        );
1041
1042        // Verify cursor encodes callers mode
1043        assert_eq!(paginated.items.len(), 5);
1044    }
1045
1046    #[test]
1047    fn test_symbol_focus_callers_pagination_second_page() {
1048        let temp_dir = TempDir::new().unwrap();
1049
1050        let mut code = String::from("fn target() {}\n");
1051        for i in 0..12 {
1052            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1053        }
1054        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1055
1056        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1057        let total_prod = output.prod_chains.len();
1058
1059        if total_prod > 5 {
1060            // Get page 1 cursor
1061            let p1 = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1062                .expect("paginate failed");
1063            assert!(p1.next_cursor.is_some());
1064
1065            let cursor_str = p1.next_cursor.unwrap();
1066            let cursor_data = decode_cursor(&cursor_str).expect("decode failed");
1067
1068            // Get page 2
1069            let p2 = paginate_slice(
1070                &output.prod_chains,
1071                cursor_data.offset,
1072                5,
1073                PaginationMode::Callers,
1074            )
1075            .expect("paginate failed");
1076
1077            // Format paginated output
1078            let formatted = format_focused_paginated(
1079                &p2.items,
1080                total_prod,
1081                PaginationMode::Callers,
1082                "target",
1083                &output.prod_chains,
1084                &output.test_chains,
1085                &output.outgoing_chains,
1086                output.def_count,
1087                cursor_data.offset,
1088                Some(temp_dir.path()),
1089                true,
1090            );
1091
1092            // Assert: header shows correct range for page 2
1093            let expected_start = cursor_data.offset + 1;
1094            assert!(
1095                formatted.contains(&format!("CALLERS ({}", expected_start)),
1096                "header should show page 2 range, got: {}",
1097                formatted
1098            );
1099        }
1100    }
1101
1102    #[test]
1103    fn test_symbol_focus_callees_pagination() {
1104        let temp_dir = TempDir::new().unwrap();
1105
1106        // target calls many functions
1107        let mut code = String::from("fn target() {\n");
1108        for i in 0..10 {
1109            code.push_str(&format!("    callee_{:02}();\n", i));
1110        }
1111        code.push_str("}\n");
1112        for i in 0..10 {
1113            code.push_str(&format!("fn callee_{:02}() {{}}\n", i));
1114        }
1115        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1116
1117        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1118        let total_callees = output.outgoing_chains.len();
1119
1120        if total_callees > 3 {
1121            let paginated = paginate_slice(&output.outgoing_chains, 0, 3, PaginationMode::Callees)
1122                .expect("paginate failed");
1123
1124            let formatted = format_focused_paginated(
1125                &paginated.items,
1126                total_callees,
1127                PaginationMode::Callees,
1128                "target",
1129                &output.prod_chains,
1130                &output.test_chains,
1131                &output.outgoing_chains,
1132                output.def_count,
1133                0,
1134                Some(temp_dir.path()),
1135                true,
1136            );
1137
1138            assert!(
1139                formatted.contains(&format!(
1140                    "CALLEES (1-{} of {})",
1141                    paginated.items.len(),
1142                    total_callees
1143                )),
1144                "header should show callees range, got: {}",
1145                formatted
1146            );
1147        }
1148    }
1149
1150    #[test]
1151    fn test_symbol_focus_empty_prod_callers() {
1152        let temp_dir = TempDir::new().unwrap();
1153
1154        // target is only called from test functions
1155        let code = r#"
1156fn target() {}
1157
1158#[cfg(test)]
1159mod tests {
1160    use super::*;
1161    #[test]
1162    fn test_something() { target(); }
1163}
1164"#;
1165        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1166
1167        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1168
1169        // prod_chains may be empty; pagination should handle it gracefully
1170        let paginated = paginate_slice(&output.prod_chains, 0, 100, PaginationMode::Callers)
1171            .expect("paginate failed");
1172        assert_eq!(paginated.items.len(), output.prod_chains.len());
1173        assert!(
1174            paginated.next_cursor.is_none(),
1175            "no next_cursor for empty or single-page prod_chains"
1176        );
1177    }
1178
1179    #[test]
1180    fn test_impl_only_filter_header_correct_counts() {
1181        let temp_dir = TempDir::new().unwrap();
1182
1183        // Create a Rust fixture with:
1184        // - A trait definition
1185        // - An impl Trait for SomeType block that calls the focus symbol
1186        // - A regular (non-trait-impl) function that also calls the focus symbol
1187        let code = r#"
1188trait MyTrait {
1189    fn focus_symbol();
1190}
1191
1192struct SomeType;
1193
1194impl MyTrait for SomeType {
1195    fn focus_symbol() {}
1196}
1197
1198fn impl_caller() {
1199    SomeType::focus_symbol();
1200}
1201
1202fn regular_caller() {
1203    SomeType::focus_symbol();
1204}
1205"#;
1206        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1207
1208        // Call analyze_focused with impl_only=Some(true)
1209        let params = FocusedAnalysisConfig {
1210            focus: "focus_symbol".to_string(),
1211            match_mode: SymbolMatchMode::Insensitive,
1212            follow_depth: 1,
1213            max_depth: None,
1214            ast_recursion_limit: None,
1215            use_summary: false,
1216            impl_only: Some(true),
1217        };
1218        let output = analyze_focused_with_progress(
1219            temp_dir.path(),
1220            &params,
1221            Arc::new(AtomicUsize::new(0)),
1222            CancellationToken::new(),
1223        )
1224        .unwrap();
1225
1226        // Assert the result contains "FILTER: impl_only=true"
1227        assert!(
1228            output.formatted.contains("FILTER: impl_only=true"),
1229            "formatted output should contain FILTER header for impl_only=true, got: {}",
1230            output.formatted
1231        );
1232
1233        // Assert the retained count N < total count M
1234        assert!(
1235            output.impl_trait_caller_count < output.unfiltered_caller_count,
1236            "impl_trait_caller_count ({}) should be less than unfiltered_caller_count ({})",
1237            output.impl_trait_caller_count,
1238            output.unfiltered_caller_count
1239        );
1240
1241        // Assert format is "FILTER: impl_only=true (N of M callers shown)"
1242        let filter_line = output
1243            .formatted
1244            .lines()
1245            .find(|line| line.contains("FILTER: impl_only=true"))
1246            .expect("should find FILTER line");
1247        assert!(
1248            filter_line.contains(&format!(
1249                "({} of {} callers shown)",
1250                output.impl_trait_caller_count, output.unfiltered_caller_count
1251            )),
1252            "FILTER line should show correct N of M counts, got: {}",
1253            filter_line
1254        );
1255    }
1256
1257    #[test]
1258    fn test_callers_count_matches_formatted_output() {
1259        let temp_dir = TempDir::new().unwrap();
1260
1261        // Create a file with multiple callers of `target`
1262        let code = r#"
1263fn target() {}
1264fn caller_a() { target(); }
1265fn caller_b() { target(); }
1266fn caller_c() { target(); }
1267"#;
1268        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1269
1270        // Analyze the symbol
1271        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1272
1273        // Extract CALLERS count from formatted output
1274        let formatted = &output.formatted;
1275        let callers_count_from_output = formatted
1276            .lines()
1277            .find(|line| line.contains("FOCUS:"))
1278            .and_then(|line| {
1279                line.split(',')
1280                    .find(|part| part.contains("callers"))
1281                    .and_then(|part| {
1282                        part.trim()
1283                            .split_whitespace()
1284                            .next()
1285                            .and_then(|s| s.parse::<usize>().ok())
1286                    })
1287            })
1288            .expect("should find CALLERS count in formatted output");
1289
1290        // Compute expected count from prod_chains (unique first-caller names)
1291        let expected_callers_count = output
1292            .prod_chains
1293            .iter()
1294            .filter_map(|chain| chain.chain.first().map(|(name, _, _)| name))
1295            .collect::<std::collections::HashSet<_>>()
1296            .len();
1297
1298        assert_eq!(
1299            callers_count_from_output, expected_callers_count,
1300            "CALLERS count in formatted output should match unique-first-caller count in prod_chains"
1301        );
1302    }
1303}
code_analyze_core/analyze.rs

code_analyze_core/
analyze.rs