code_analyze_core/
analyze.rs

1//! Main analysis engine for extracting code structure from files and directories.
2//!
3//! Implements the four MCP tools: `analyze_directory` (Overview), `analyze_file` (`FileDetails`),
4//! `analyze_symbol` (call graph), and `analyze_module` (lightweight index). Handles parallel processing and cancellation.
5
6use crate::formatter::{
7    format_file_details, format_focused_internal, format_focused_summary_internal, format_structure,
8};
9use crate::graph::{CallGraph, InternalCallChain};
10use crate::lang::language_for_extension;
11use crate::parser::{ElementExtractor, SemanticExtractor};
12use crate::test_detection::is_test_file;
13use crate::traversal::{WalkEntry, walk_directory};
14use crate::types::{
15    AnalysisMode, FileInfo, ImplTraitInfo, ImportInfo, SemanticAnalysis, SymbolMatchMode,
16};
17use rayon::prelude::*;
18#[cfg(feature = "schemars")]
19use schemars::JsonSchema;
20use serde::Serialize;
21use std::path::{Path, PathBuf};
22use std::sync::Arc;
23use std::sync::atomic::{AtomicUsize, Ordering};
24use std::time::Instant;
25use thiserror::Error;
26use tokio_util::sync::CancellationToken;
27use tracing::instrument;
28
29#[derive(Debug, Error)]
30#[non_exhaustive]
31pub enum AnalyzeError {
32    #[error("Traversal error: {0}")]
33    Traversal(#[from] crate::traversal::TraversalError),
34    #[error("Parser error: {0}")]
35    Parser(#[from] crate::parser::ParserError),
36    #[error("Graph error: {0}")]
37    Graph(#[from] crate::graph::GraphError),
38    #[error("Formatter error: {0}")]
39    Formatter(#[from] crate::formatter::FormatterError),
40    #[error("Analysis cancelled")]
41    Cancelled,
42}
43
44/// Result of directory analysis containing both formatted output and file data.
45#[derive(Debug, Clone, Serialize)]
46#[cfg_attr(feature = "schemars", derive(JsonSchema))]
47#[non_exhaustive]
48pub struct AnalysisOutput {
49    #[cfg_attr(
50        feature = "schemars",
51        schemars(description = "Formatted text representation of the analysis")
52    )]
53    pub formatted: String,
54    #[cfg_attr(
55        feature = "schemars",
56        schemars(description = "List of files analyzed in the directory")
57    )]
58    pub files: Vec<FileInfo>,
59    /// Walk entries used internally for summary generation; not serialized.
60    #[serde(skip)]
61    #[cfg_attr(feature = "schemars", schemars(skip))]
62    pub entries: Vec<WalkEntry>,
63    /// Subtree file counts computed from an unbounded walk; used by `format_summary`; not serialized.
64    #[serde(skip)]
65    #[cfg_attr(feature = "schemars", schemars(skip))]
66    pub subtree_counts: Option<Vec<(std::path::PathBuf, usize)>>,
67    #[serde(skip_serializing_if = "Option::is_none")]
68    #[cfg_attr(
69        feature = "schemars",
70        schemars(
71            description = "Opaque cursor token for the next page of results (absent when no more results)"
72        )
73    )]
74    pub next_cursor: Option<String>,
75}
76
77/// Result of file-level semantic analysis.
78#[derive(Debug, Clone, Serialize)]
79#[cfg_attr(feature = "schemars", derive(JsonSchema))]
80#[non_exhaustive]
81pub struct FileAnalysisOutput {
82    #[cfg_attr(
83        feature = "schemars",
84        schemars(description = "Formatted text representation of the analysis")
85    )]
86    pub formatted: String,
87    #[cfg_attr(
88        feature = "schemars",
89        schemars(description = "Semantic analysis data including functions, classes, and imports")
90    )]
91    pub semantic: SemanticAnalysis,
92    #[cfg_attr(
93        feature = "schemars",
94        schemars(description = "Total line count of the analyzed file")
95    )]
96    #[cfg_attr(
97        feature = "schemars",
98        schemars(schema_with = "crate::schema_helpers::integer_schema")
99    )]
100    pub line_count: usize,
101    #[serde(skip_serializing_if = "Option::is_none")]
102    #[cfg_attr(
103        feature = "schemars",
104        schemars(
105            description = "Opaque cursor token for the next page of results (absent when no more results)"
106        )
107    )]
108    pub next_cursor: Option<String>,
109}
110
111impl FileAnalysisOutput {
112    /// Create a new `FileAnalysisOutput`.
113    #[must_use]
114    pub fn new(
115        formatted: String,
116        semantic: SemanticAnalysis,
117        line_count: usize,
118        next_cursor: Option<String>,
119    ) -> Self {
120        Self {
121            formatted,
122            semantic,
123            line_count,
124            next_cursor,
125        }
126    }
127}
128#[instrument(skip_all, fields(path = %root.display()))]
129// public API; callers expect owned semantics
130#[allow(clippy::needless_pass_by_value)]
131pub fn analyze_directory_with_progress(
132    root: &Path,
133    entries: Vec<WalkEntry>,
134    progress: Arc<AtomicUsize>,
135    ct: CancellationToken,
136) -> Result<AnalysisOutput, AnalyzeError> {
137    // Check if already cancelled
138    if ct.is_cancelled() {
139        return Err(AnalyzeError::Cancelled);
140    }
141
142    // Detect language from file extension
143    let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
144
145    let start = Instant::now();
146    tracing::debug!(file_count = file_entries.len(), root = %root.display(), "analysis start");
147
148    // Parallel analysis of files
149    let analysis_results: Vec<FileInfo> = file_entries
150        .par_iter()
151        .filter_map(|entry| {
152            // Check cancellation per file
153            if ct.is_cancelled() {
154                return None;
155            }
156
157            let path_str = entry.path.display().to_string();
158
159            // Detect language from extension
160            let ext = entry.path.extension().and_then(|e| e.to_str());
161
162            // Try to read file content; skip binary or unreadable files
163            let Ok(source) = std::fs::read_to_string(&entry.path) else {
164                progress.fetch_add(1, Ordering::Relaxed);
165                return None;
166            };
167
168            // Count lines
169            let line_count = source.lines().count();
170
171            // Detect language and extract counts
172            let (language, function_count, class_count) = if let Some(ext_str) = ext {
173                if let Some(lang) = language_for_extension(ext_str) {
174                    let lang_str = lang.to_string();
175                    match ElementExtractor::extract_with_depth(&source, &lang_str) {
176                        Ok((func_count, class_count)) => (lang_str, func_count, class_count),
177                        Err(_) => (lang_str, 0, 0),
178                    }
179                } else {
180                    ("unknown".to_string(), 0, 0)
181                }
182            } else {
183                ("unknown".to_string(), 0, 0)
184            };
185
186            progress.fetch_add(1, Ordering::Relaxed);
187
188            let is_test = is_test_file(&entry.path);
189
190            Some(FileInfo {
191                path: path_str,
192                line_count,
193                function_count,
194                class_count,
195                language,
196                is_test,
197            })
198        })
199        .collect();
200
201    // Check if cancelled after parallel processing
202    if ct.is_cancelled() {
203        return Err(AnalyzeError::Cancelled);
204    }
205
206    tracing::debug!(
207        file_count = file_entries.len(),
208        duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
209        "analysis complete"
210    );
211
212    // Format output
213    let formatted = format_structure(&entries, &analysis_results, None);
214
215    Ok(AnalysisOutput {
216        formatted,
217        files: analysis_results,
218        entries,
219        next_cursor: None,
220        subtree_counts: None,
221    })
222}
223
224/// Analyze a directory structure and return formatted output and file data.
225#[instrument(skip_all, fields(path = %root.display()))]
226pub fn analyze_directory(
227    root: &Path,
228    max_depth: Option<u32>,
229) -> Result<AnalysisOutput, AnalyzeError> {
230    let entries = walk_directory(root, max_depth)?;
231    let counter = Arc::new(AtomicUsize::new(0));
232    let ct = CancellationToken::new();
233    analyze_directory_with_progress(root, entries, counter, ct)
234}
235
236/// Determine analysis mode based on parameters and path.
237#[must_use]
238pub fn determine_mode(path: &str, focus: Option<&str>) -> AnalysisMode {
239    if focus.is_some() {
240        return AnalysisMode::SymbolFocus;
241    }
242
243    let path_obj = Path::new(path);
244    if path_obj.is_dir() {
245        AnalysisMode::Overview
246    } else {
247        AnalysisMode::FileDetails
248    }
249}
250
251/// Analyze a single file and return semantic analysis with formatted output.
252#[instrument(skip_all, fields(path))]
253pub fn analyze_file(
254    path: &str,
255    ast_recursion_limit: Option<usize>,
256) -> Result<FileAnalysisOutput, AnalyzeError> {
257    let start = Instant::now();
258    let source = std::fs::read_to_string(path)
259        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
260
261    let line_count = source.lines().count();
262
263    // Detect language from extension
264    let ext = Path::new(path)
265        .extension()
266        .and_then(|e| e.to_str())
267        .and_then(language_for_extension)
268        .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string);
269
270    // Extract semantic information
271    let mut semantic = SemanticExtractor::extract(&source, &ext, ast_recursion_limit)?;
272
273    // Populate the file path on references now that the path is known
274    for r in &mut semantic.references {
275        r.location = path.to_string();
276    }
277
278    // Resolve Python wildcard imports
279    if ext == "python" {
280        resolve_wildcard_imports(Path::new(path), &mut semantic.imports);
281    }
282
283    // Detect if this is a test file
284    let is_test = is_test_file(Path::new(path));
285
286    // Extract parent directory for relative path display
287    let parent_dir = Path::new(path).parent();
288
289    // Format output
290    let formatted = format_file_details(path, &semantic, line_count, is_test, parent_dir);
291
292    tracing::debug!(path = %path, language = %ext, functions = semantic.functions.len(), classes = semantic.classes.len(), imports = semantic.imports.len(), duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX), "file analysis complete");
293
294    Ok(FileAnalysisOutput::new(
295        formatted, semantic, line_count, None,
296    ))
297}
298
299/// Result of focused symbol analysis.
300#[derive(Debug, Serialize)]
301#[cfg_attr(feature = "schemars", derive(JsonSchema))]
302#[non_exhaustive]
303pub struct FocusedAnalysisOutput {
304    #[cfg_attr(
305        feature = "schemars",
306        schemars(description = "Formatted text representation of the call graph analysis")
307    )]
308    pub formatted: String,
309    #[serde(skip_serializing_if = "Option::is_none")]
310    #[cfg_attr(
311        feature = "schemars",
312        schemars(
313            description = "Opaque cursor token for the next page of results (absent when no more results)"
314        )
315    )]
316    pub next_cursor: Option<String>,
317    /// Production caller chains (partitioned from incoming chains, excluding test callers).
318    /// Not serialized; used for pagination in lib.rs.
319    #[serde(skip)]
320    #[cfg_attr(feature = "schemars", schemars(skip))]
321    pub prod_chains: Vec<InternalCallChain>,
322    /// Test caller chains. Not serialized; used for pagination summary in lib.rs.
323    #[serde(skip)]
324    #[cfg_attr(feature = "schemars", schemars(skip))]
325    pub test_chains: Vec<InternalCallChain>,
326    /// Outgoing (callee) chains. Not serialized; used for pagination in lib.rs.
327    #[serde(skip)]
328    #[cfg_attr(feature = "schemars", schemars(skip))]
329    pub outgoing_chains: Vec<InternalCallChain>,
330    /// Number of definitions for the symbol. Not serialized; used for pagination headers.
331    #[serde(skip)]
332    #[cfg_attr(feature = "schemars", schemars(skip))]
333    pub def_count: usize,
334    /// Total unique callers before `impl_only` filter. Not serialized; used for FILTER header.
335    #[serde(skip)]
336    #[cfg_attr(feature = "schemars", schemars(skip))]
337    pub unfiltered_caller_count: usize,
338    /// Unique callers after `impl_only` filter. Not serialized; used for FILTER header.
339    #[serde(skip)]
340    #[cfg_attr(feature = "schemars", schemars(skip))]
341    pub impl_trait_caller_count: usize,
342}
343
344/// Parameters for focused symbol analysis. Groups high-arity parameters to keep
345/// function signatures under clippy's default 7-argument threshold.
346#[derive(Clone)]
347pub struct FocusedAnalysisConfig {
348    pub focus: String,
349    pub match_mode: SymbolMatchMode,
350    pub follow_depth: u32,
351    pub max_depth: Option<u32>,
352    pub ast_recursion_limit: Option<usize>,
353    pub use_summary: bool,
354    pub impl_only: Option<bool>,
355}
356
357/// Internal parameters for focused analysis phases.
358#[derive(Clone)]
359struct FocusedAnalysisParams {
360    focus: String,
361    match_mode: SymbolMatchMode,
362    follow_depth: u32,
363    ast_recursion_limit: Option<usize>,
364    use_summary: bool,
365    impl_only: Option<bool>,
366}
367
368/// Type alias for analysis results: (`file_path`, `semantic_analysis`) pairs and impl-trait info.
369type AnalysisResults = (Vec<(PathBuf, SemanticAnalysis)>, Vec<ImplTraitInfo>);
370
371/// Phase 1: Collect semantic analysis for all files in parallel.
372fn collect_file_analysis(
373    entries: &[WalkEntry],
374    progress: &Arc<AtomicUsize>,
375    ct: &CancellationToken,
376    ast_recursion_limit: Option<usize>,
377) -> Result<AnalysisResults, AnalyzeError> {
378    // Check if already cancelled
379    if ct.is_cancelled() {
380        return Err(AnalyzeError::Cancelled);
381    }
382
383    // Use pre-walked entries (passed by caller)
384    // Collect semantic analysis for all files in parallel
385    let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
386
387    let analysis_results: Vec<(PathBuf, SemanticAnalysis)> = file_entries
388        .par_iter()
389        .filter_map(|entry| {
390            // Check cancellation per file
391            if ct.is_cancelled() {
392                return None;
393            }
394
395            let ext = entry.path.extension().and_then(|e| e.to_str());
396
397            // Try to read file content
398            let Ok(source) = std::fs::read_to_string(&entry.path) else {
399                progress.fetch_add(1, Ordering::Relaxed);
400                return None;
401            };
402
403            // Detect language and extract semantic information
404            let language = if let Some(ext_str) = ext {
405                language_for_extension(ext_str)
406                    .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string)
407            } else {
408                "unknown".to_string()
409            };
410
411            if let Ok(mut semantic) =
412                SemanticExtractor::extract(&source, &language, ast_recursion_limit)
413            {
414                // Populate file path on references
415                for r in &mut semantic.references {
416                    r.location = entry.path.display().to_string();
417                }
418                // Populate file path on impl_traits (already extracted during SemanticExtractor::extract)
419                for trait_info in &mut semantic.impl_traits {
420                    trait_info.path.clone_from(&entry.path);
421                }
422                progress.fetch_add(1, Ordering::Relaxed);
423                Some((entry.path.clone(), semantic))
424            } else {
425                progress.fetch_add(1, Ordering::Relaxed);
426                None
427            }
428        })
429        .collect();
430
431    // Check if cancelled after parallel processing
432    if ct.is_cancelled() {
433        return Err(AnalyzeError::Cancelled);
434    }
435
436    // Collect all impl-trait info from analysis results
437    let all_impl_traits: Vec<ImplTraitInfo> = analysis_results
438        .iter()
439        .flat_map(|(_, sem)| sem.impl_traits.iter().cloned())
440        .collect();
441
442    Ok((analysis_results, all_impl_traits))
443}
444
445/// Phase 2: Build call graph from analysis results.
446fn build_call_graph(
447    analysis_results: Vec<(PathBuf, SemanticAnalysis)>,
448    all_impl_traits: &[ImplTraitInfo],
449) -> Result<CallGraph, AnalyzeError> {
450    // Build call graph. Always build without impl_only filter first so we can
451    // record the unfiltered caller count before discarding those edges.
452    CallGraph::build_from_results(
453        analysis_results,
454        all_impl_traits,
455        false, // filter applied below after counting
456    )
457    .map_err(std::convert::Into::into)
458}
459
460/// Phase 3: Resolve symbol and apply `impl_only` filter.
461/// Returns (`resolved_focus`, `unfiltered_caller_count`, `impl_trait_caller_count`).
462/// CRITICAL: Must capture `unfiltered_caller_count` BEFORE `retain()`, then apply `retain()`,
463/// then compute `impl_trait_caller_count`.
464fn resolve_symbol(
465    graph: &mut CallGraph,
466    params: &FocusedAnalysisParams,
467) -> Result<(String, usize, usize), AnalyzeError> {
468    // Resolve symbol name using the requested match mode.
469    let resolved_focus = if params.match_mode == SymbolMatchMode::Exact {
470        let exists = graph.definitions.contains_key(&params.focus)
471            || graph.callers.contains_key(&params.focus)
472            || graph.callees.contains_key(&params.focus);
473        if exists {
474            params.focus.clone()
475        } else {
476            return Err(crate::graph::GraphError::SymbolNotFound {
477                symbol: params.focus.clone(),
478                hint: "Try match_mode=insensitive for a case-insensitive search.".to_string(),
479            }
480            .into());
481        }
482    } else {
483        graph.resolve_symbol_indexed(&params.focus, &params.match_mode)?
484    };
485
486    // Count unique callers for the focus symbol before applying impl_only filter.
487    let unfiltered_caller_count = graph.callers.get(&resolved_focus).map_or(0, |edges| {
488        edges
489            .iter()
490            .map(|e| &e.neighbor_name)
491            .collect::<std::collections::HashSet<_>>()
492            .len()
493    });
494
495    // Apply impl_only filter now if requested, then count filtered callers.
496    // Filter all caller adjacency lists so traversal and formatting are consistently
497    // restricted to impl-trait edges regardless of follow_depth.
498    let impl_trait_caller_count = if params.impl_only.unwrap_or(false) {
499        for edges in graph.callers.values_mut() {
500            edges.retain(|e| e.is_impl_trait);
501        }
502        graph.callers.get(&resolved_focus).map_or(0, |edges| {
503            edges
504                .iter()
505                .map(|e| &e.neighbor_name)
506                .collect::<std::collections::HashSet<_>>()
507                .len()
508        })
509    } else {
510        unfiltered_caller_count
511    };
512
513    Ok((
514        resolved_focus,
515        unfiltered_caller_count,
516        impl_trait_caller_count,
517    ))
518}
519
520/// Type alias for `compute_chains` return type: (`formatted_output`, `prod_chains`, `test_chains`, `outgoing_chains`, `def_count`).
521type ChainComputeResult = (
522    String,
523    Vec<InternalCallChain>,
524    Vec<InternalCallChain>,
525    Vec<InternalCallChain>,
526    usize,
527);
528
529/// Phase 4: Compute chains and format output.
530fn compute_chains(
531    graph: &CallGraph,
532    resolved_focus: &str,
533    root: &Path,
534    params: &FocusedAnalysisParams,
535    unfiltered_caller_count: usize,
536    impl_trait_caller_count: usize,
537) -> Result<ChainComputeResult, AnalyzeError> {
538    // Compute chain data for pagination (always, regardless of summary mode)
539    let def_count = graph.definitions.get(resolved_focus).map_or(0, Vec::len);
540    let incoming_chains = graph.find_incoming_chains(resolved_focus, params.follow_depth)?;
541    let outgoing_chains = graph.find_outgoing_chains(resolved_focus, params.follow_depth)?;
542
543    let (prod_chains, test_chains): (Vec<_>, Vec<_>) =
544        incoming_chains.iter().cloned().partition(|chain| {
545            chain
546                .chain
547                .first()
548                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
549        });
550
551    // Format output with pre-computed chains
552    let mut formatted = if params.use_summary {
553        format_focused_summary_internal(
554            graph,
555            resolved_focus,
556            params.follow_depth,
557            Some(root),
558            Some(&incoming_chains),
559            Some(&outgoing_chains),
560        )?
561    } else {
562        format_focused_internal(
563            graph,
564            resolved_focus,
565            params.follow_depth,
566            Some(root),
567            Some(&incoming_chains),
568            Some(&outgoing_chains),
569        )?
570    };
571
572    // Add FILTER header if impl_only filter was applied
573    if params.impl_only.unwrap_or(false) {
574        let filter_header = format!(
575            "FILTER: impl_only=true ({impl_trait_caller_count} of {unfiltered_caller_count} callers shown)\n",
576        );
577        formatted = format!("{filter_header}{formatted}");
578    }
579
580    Ok((
581        formatted,
582        prod_chains,
583        test_chains,
584        outgoing_chains,
585        def_count,
586    ))
587}
588
589/// Analyze a symbol's call graph across a directory with progress tracking.
590// public API; callers expect owned semantics
591#[allow(clippy::needless_pass_by_value)]
592pub fn analyze_focused_with_progress(
593    root: &Path,
594    params: &FocusedAnalysisConfig,
595    progress: Arc<AtomicUsize>,
596    ct: CancellationToken,
597) -> Result<FocusedAnalysisOutput, AnalyzeError> {
598    let entries = walk_directory(root, params.max_depth)?;
599    let internal_params = FocusedAnalysisParams {
600        focus: params.focus.clone(),
601        match_mode: params.match_mode.clone(),
602        follow_depth: params.follow_depth,
603        ast_recursion_limit: params.ast_recursion_limit,
604        use_summary: params.use_summary,
605        impl_only: params.impl_only,
606    };
607    analyze_focused_with_progress_with_entries_internal(
608        root,
609        params.max_depth,
610        &progress,
611        &ct,
612        &internal_params,
613        &entries,
614    )
615}
616
617/// Internal implementation of focused analysis using pre-walked entries and params struct.
618#[instrument(skip_all, fields(path = %root.display(), symbol = %params.focus))]
619fn analyze_focused_with_progress_with_entries_internal(
620    root: &Path,
621    _max_depth: Option<u32>,
622    progress: &Arc<AtomicUsize>,
623    ct: &CancellationToken,
624    params: &FocusedAnalysisParams,
625    entries: &[WalkEntry],
626) -> Result<FocusedAnalysisOutput, AnalyzeError> {
627    // Check if already cancelled
628    if ct.is_cancelled() {
629        return Err(AnalyzeError::Cancelled);
630    }
631
632    // Check if path is a file (hint to use directory)
633    if root.is_file() {
634        let formatted =
635            "Single-file focus not supported. Please provide a directory path for cross-file call graph analysis.\n"
636                .to_string();
637        return Ok(FocusedAnalysisOutput {
638            formatted,
639            next_cursor: None,
640            prod_chains: vec![],
641            test_chains: vec![],
642            outgoing_chains: vec![],
643            def_count: 0,
644            unfiltered_caller_count: 0,
645            impl_trait_caller_count: 0,
646        });
647    }
648
649    // Phase 1: Collect file analysis
650    let (analysis_results, all_impl_traits) =
651        collect_file_analysis(entries, progress, ct, params.ast_recursion_limit)?;
652
653    // Check for cancellation before building the call graph (phase 2)
654    if ct.is_cancelled() {
655        return Err(AnalyzeError::Cancelled);
656    }
657
658    // Phase 2: Build call graph
659    let mut graph = build_call_graph(analysis_results, &all_impl_traits)?;
660
661    // Check for cancellation before resolving the symbol (phase 3)
662    if ct.is_cancelled() {
663        return Err(AnalyzeError::Cancelled);
664    }
665
666    // Phase 3: Resolve symbol and apply impl_only filter
667    let (resolved_focus, unfiltered_caller_count, impl_trait_caller_count) =
668        resolve_symbol(&mut graph, params)?;
669
670    // Check for cancellation before computing chains (phase 4)
671    if ct.is_cancelled() {
672        return Err(AnalyzeError::Cancelled);
673    }
674
675    // Phase 4: Compute chains and format output
676    let (formatted, prod_chains, test_chains, outgoing_chains, def_count) = compute_chains(
677        &graph,
678        &resolved_focus,
679        root,
680        params,
681        unfiltered_caller_count,
682        impl_trait_caller_count,
683    )?;
684
685    Ok(FocusedAnalysisOutput {
686        formatted,
687        next_cursor: None,
688        prod_chains,
689        test_chains,
690        outgoing_chains,
691        def_count,
692        unfiltered_caller_count,
693        impl_trait_caller_count,
694    })
695}
696
697/// Analyze a symbol's call graph using pre-walked directory entries.
698pub fn analyze_focused_with_progress_with_entries(
699    root: &Path,
700    params: &FocusedAnalysisConfig,
701    progress: &Arc<AtomicUsize>,
702    ct: &CancellationToken,
703    entries: &[WalkEntry],
704) -> Result<FocusedAnalysisOutput, AnalyzeError> {
705    let internal_params = FocusedAnalysisParams {
706        focus: params.focus.clone(),
707        match_mode: params.match_mode.clone(),
708        follow_depth: params.follow_depth,
709        ast_recursion_limit: params.ast_recursion_limit,
710        use_summary: params.use_summary,
711        impl_only: params.impl_only,
712    };
713    analyze_focused_with_progress_with_entries_internal(
714        root,
715        params.max_depth,
716        progress,
717        ct,
718        &internal_params,
719        entries,
720    )
721}
722
723#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
724pub fn analyze_focused(
725    root: &Path,
726    focus: &str,
727    follow_depth: u32,
728    max_depth: Option<u32>,
729    ast_recursion_limit: Option<usize>,
730) -> Result<FocusedAnalysisOutput, AnalyzeError> {
731    let entries = walk_directory(root, max_depth)?;
732    let counter = Arc::new(AtomicUsize::new(0));
733    let ct = CancellationToken::new();
734    let params = FocusedAnalysisConfig {
735        focus: focus.to_string(),
736        match_mode: SymbolMatchMode::Exact,
737        follow_depth,
738        max_depth,
739        ast_recursion_limit,
740        use_summary: false,
741        impl_only: None,
742    };
743    analyze_focused_with_progress_with_entries(root, &params, &counter, &ct, &entries)
744}
745
746/// Analyze a single file and return a minimal fixed schema (name, line count, language,
747/// functions, imports) for lightweight code understanding.
748#[instrument(skip_all, fields(path))]
749pub fn analyze_module_file(path: &str) -> Result<crate::types::ModuleInfo, AnalyzeError> {
750    let source = std::fs::read_to_string(path)
751        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
752
753    let file_path = Path::new(path);
754    let name = file_path
755        .file_name()
756        .and_then(|s| s.to_str())
757        .unwrap_or("unknown")
758        .to_string();
759
760    let line_count = source.lines().count();
761
762    let language = file_path
763        .extension()
764        .and_then(|e| e.to_str())
765        .and_then(language_for_extension)
766        .ok_or_else(|| {
767            AnalyzeError::Parser(crate::parser::ParserError::ParseError(
768                "unsupported or missing file extension".to_string(),
769            ))
770        })?;
771
772    let semantic = SemanticExtractor::extract(&source, language, None)?;
773
774    let functions = semantic
775        .functions
776        .into_iter()
777        .map(|f| crate::types::ModuleFunctionInfo {
778            name: f.name,
779            line: f.line,
780        })
781        .collect();
782
783    let imports = semantic
784        .imports
785        .into_iter()
786        .map(|i| crate::types::ModuleImportInfo {
787            module: i.module,
788            items: i.items,
789        })
790        .collect();
791
792    Ok(crate::types::ModuleInfo {
793        name,
794        line_count,
795        language: language.to_string(),
796        functions,
797        imports,
798    })
799}
800
801/// Resolve Python wildcard imports to actual symbol names.
802///
803/// For each import with items=`["*"]`, this function:
804/// 1. Parses the relative dots (if any) and climbs the directory tree
805/// 2. Finds the target .py file or __init__.py
806/// 3. Extracts symbols (functions and classes) from the target
807/// 4. Honors __all__ if defined, otherwise uses function+class names
808///
809/// All resolution failures are non-fatal: debug-logged and the wildcard is preserved.
810fn resolve_wildcard_imports(file_path: &Path, imports: &mut [ImportInfo]) {
811    use std::collections::HashMap;
812
813    let mut resolved_cache: HashMap<PathBuf, Vec<String>> = HashMap::new();
814    let Ok(file_path_canonical) = file_path.canonicalize() else {
815        tracing::debug!(file = ?file_path, "unable to canonicalize current file path");
816        return;
817    };
818
819    for import in imports.iter_mut() {
820        if import.items != ["*"] {
821            continue;
822        }
823        resolve_single_wildcard(import, file_path, &file_path_canonical, &mut resolved_cache);
824    }
825}
826
827/// Resolve one wildcard import in place. On any failure the import is left unchanged.
828fn resolve_single_wildcard(
829    import: &mut ImportInfo,
830    file_path: &Path,
831    file_path_canonical: &Path,
832    resolved_cache: &mut std::collections::HashMap<PathBuf, Vec<String>>,
833) {
834    let module = import.module.clone();
835    let dot_count = module.chars().take_while(|c| *c == '.').count();
836    if dot_count == 0 {
837        return;
838    }
839    let module_path = module.trim_start_matches('.');
840
841    let Some(target_to_read) = locate_target_file(file_path, dot_count, module_path, &module)
842    else {
843        return;
844    };
845
846    let Ok(canonical) = target_to_read.canonicalize() else {
847        tracing::debug!(target = ?target_to_read, import = %module, "unable to canonicalize path");
848        return;
849    };
850
851    if canonical == file_path_canonical {
852        tracing::debug!(target = ?canonical, import = %module, "cannot import from self");
853        return;
854    }
855
856    if let Some(cached) = resolved_cache.get(&canonical) {
857        tracing::debug!(import = %module, symbols_count = cached.len(), "using cached symbols");
858        import.items.clone_from(cached);
859        return;
860    }
861
862    if let Some(symbols) = parse_target_symbols(&target_to_read, &module) {
863        tracing::debug!(import = %module, resolved_count = symbols.len(), "wildcard import resolved");
864        import.items.clone_from(&symbols);
865        resolved_cache.insert(canonical, symbols);
866    }
867}
868
869/// Locate the .py file that a wildcard import refers to. Returns None if not found.
870fn locate_target_file(
871    file_path: &Path,
872    dot_count: usize,
873    module_path: &str,
874    module: &str,
875) -> Option<PathBuf> {
876    let mut target_dir = file_path.parent()?.to_path_buf();
877
878    for _ in 1..dot_count {
879        if !target_dir.pop() {
880            tracing::debug!(import = %module, "unable to climb {} levels", dot_count.saturating_sub(1));
881            return None;
882        }
883    }
884
885    let target_file = if module_path.is_empty() {
886        target_dir.join("__init__.py")
887    } else {
888        let rel_path = module_path.replace('.', "/");
889        target_dir.join(format!("{rel_path}.py"))
890    };
891
892    if target_file.exists() {
893        Some(target_file)
894    } else if target_file.with_extension("").is_dir() {
895        let init = target_file.with_extension("").join("__init__.py");
896        if init.exists() { Some(init) } else { None }
897    } else {
898        tracing::debug!(target = ?target_file, import = %module, "target file not found");
899        None
900    }
901}
902
903/// Read and parse a target .py file, returning its exported symbols.
904fn parse_target_symbols(target_path: &Path, module: &str) -> Option<Vec<String>> {
905    use tree_sitter::Parser;
906
907    let source = match std::fs::read_to_string(target_path) {
908        Ok(s) => s,
909        Err(e) => {
910            tracing::debug!(target = ?target_path, import = %module, error = %e, "unable to read target file");
911            return None;
912        }
913    };
914
915    // Parse once with tree-sitter
916    let lang_info = crate::languages::get_language_info("python")?;
917    let mut parser = Parser::new();
918    if parser.set_language(&lang_info.language).is_err() {
919        return None;
920    }
921    let tree = parser.parse(&source, None)?;
922
923    // First, try to extract __all__ from the same tree
924    let mut symbols = Vec::new();
925    extract_all_from_tree(&tree, &source, &mut symbols);
926    if !symbols.is_empty() {
927        tracing::debug!(import = %module, symbols = ?symbols, "using __all__ symbols");
928        return Some(symbols);
929    }
930
931    // Fallback: extract functions/classes from the tree
932    let root = tree.root_node();
933    let mut cursor = root.walk();
934    for child in root.children(&mut cursor) {
935        if matches!(child.kind(), "function_definition" | "class_definition")
936            && let Some(name_node) = child.child_by_field_name("name")
937        {
938            let name = source[name_node.start_byte()..name_node.end_byte()].to_string();
939            if !name.starts_with('_') {
940                symbols.push(name);
941            }
942        }
943    }
944    tracing::debug!(import = %module, fallback_symbols = ?symbols, "using fallback function/class names");
945    Some(symbols)
946}
947
948/// Extract __all__ from a tree-sitter tree.
949fn extract_all_from_tree(tree: &tree_sitter::Tree, source: &str, result: &mut Vec<String>) {
950    let root = tree.root_node();
951    let mut cursor = root.walk();
952    for child in root.children(&mut cursor) {
953        if child.kind() == "simple_statement" {
954            // simple_statement contains assignment and other statement types
955            let mut simple_cursor = child.walk();
956            for simple_child in child.children(&mut simple_cursor) {
957                if simple_child.kind() == "assignment"
958                    && let Some(left) = simple_child.child_by_field_name("left")
959                {
960                    let target_text = source[left.start_byte()..left.end_byte()].trim();
961                    if target_text == "__all__"
962                        && let Some(right) = simple_child.child_by_field_name("right")
963                    {
964                        extract_string_list_from_list_node(&right, source, result);
965                    }
966                }
967            }
968        } else if child.kind() == "expression_statement" {
969            // Fallback for older Python AST structures
970            let mut stmt_cursor = child.walk();
971            for stmt_child in child.children(&mut stmt_cursor) {
972                if stmt_child.kind() == "assignment"
973                    && let Some(left) = stmt_child.child_by_field_name("left")
974                {
975                    let target_text = source[left.start_byte()..left.end_byte()].trim();
976                    if target_text == "__all__"
977                        && let Some(right) = stmt_child.child_by_field_name("right")
978                    {
979                        extract_string_list_from_list_node(&right, source, result);
980                    }
981                }
982            }
983        }
984    }
985}
986
987/// Extract string literals from a Python list node.
988fn extract_string_list_from_list_node(
989    list_node: &tree_sitter::Node,
990    source: &str,
991    result: &mut Vec<String>,
992) {
993    let mut cursor = list_node.walk();
994    for child in list_node.named_children(&mut cursor) {
995        if child.kind() == "string" {
996            let raw = source[child.start_byte()..child.end_byte()].trim();
997            // Strip quotes: "name" -> name
998            let unquoted = raw.trim_matches('"').trim_matches('\'').to_string();
999            if !unquoted.is_empty() {
1000                result.push(unquoted);
1001            }
1002        }
1003    }
1004}
1005
1006#[cfg(all(test, feature = "lang-rust"))]
1007mod tests {
1008    use super::*;
1009    use crate::formatter::format_focused_paginated;
1010    use crate::pagination::{PaginationMode, decode_cursor, paginate_slice};
1011    use std::fs;
1012    use tempfile::TempDir;
1013
1014    #[test]
1015    fn test_symbol_focus_callers_pagination_first_page() {
1016        let temp_dir = TempDir::new().unwrap();
1017
1018        // Create a file with many callers of `target`
1019        let mut code = String::from("fn target() {}\n");
1020        for i in 0..15 {
1021            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1022        }
1023        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1024
1025        // Act
1026        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1027
1028        // Paginate prod callers with page_size=5
1029        let paginated = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1030            .expect("paginate failed");
1031        assert!(
1032            paginated.total >= 5,
1033            "should have enough callers to paginate"
1034        );
1035        assert!(
1036            paginated.next_cursor.is_some(),
1037            "should have next_cursor for page 1"
1038        );
1039
1040        // Verify cursor encodes callers mode
1041        assert_eq!(paginated.items.len(), 5);
1042    }
1043
1044    #[test]
1045    fn test_symbol_focus_callers_pagination_second_page() {
1046        let temp_dir = TempDir::new().unwrap();
1047
1048        let mut code = String::from("fn target() {}\n");
1049        for i in 0..12 {
1050            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1051        }
1052        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1053
1054        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1055        let total_prod = output.prod_chains.len();
1056
1057        if total_prod > 5 {
1058            // Get page 1 cursor
1059            let p1 = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1060                .expect("paginate failed");
1061            assert!(p1.next_cursor.is_some());
1062
1063            let cursor_str = p1.next_cursor.unwrap();
1064            let cursor_data = decode_cursor(&cursor_str).expect("decode failed");
1065
1066            // Get page 2
1067            let p2 = paginate_slice(
1068                &output.prod_chains,
1069                cursor_data.offset,
1070                5,
1071                PaginationMode::Callers,
1072            )
1073            .expect("paginate failed");
1074
1075            // Format paginated output
1076            let formatted = format_focused_paginated(
1077                &p2.items,
1078                total_prod,
1079                PaginationMode::Callers,
1080                "target",
1081                &output.prod_chains,
1082                &output.test_chains,
1083                &output.outgoing_chains,
1084                output.def_count,
1085                cursor_data.offset,
1086                Some(temp_dir.path()),
1087                true,
1088            );
1089
1090            // Assert: header shows correct range for page 2
1091            let expected_start = cursor_data.offset + 1;
1092            assert!(
1093                formatted.contains(&format!("CALLERS ({}", expected_start)),
1094                "header should show page 2 range, got: {}",
1095                formatted
1096            );
1097        }
1098    }
1099
1100    #[test]
1101    fn test_symbol_focus_callees_pagination() {
1102        let temp_dir = TempDir::new().unwrap();
1103
1104        // target calls many functions
1105        let mut code = String::from("fn target() {\n");
1106        for i in 0..10 {
1107            code.push_str(&format!("    callee_{:02}();\n", i));
1108        }
1109        code.push_str("}\n");
1110        for i in 0..10 {
1111            code.push_str(&format!("fn callee_{:02}() {{}}\n", i));
1112        }
1113        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1114
1115        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1116        let total_callees = output.outgoing_chains.len();
1117
1118        if total_callees > 3 {
1119            let paginated = paginate_slice(&output.outgoing_chains, 0, 3, PaginationMode::Callees)
1120                .expect("paginate failed");
1121
1122            let formatted = format_focused_paginated(
1123                &paginated.items,
1124                total_callees,
1125                PaginationMode::Callees,
1126                "target",
1127                &output.prod_chains,
1128                &output.test_chains,
1129                &output.outgoing_chains,
1130                output.def_count,
1131                0,
1132                Some(temp_dir.path()),
1133                true,
1134            );
1135
1136            assert!(
1137                formatted.contains(&format!(
1138                    "CALLEES (1-{} of {})",
1139                    paginated.items.len(),
1140                    total_callees
1141                )),
1142                "header should show callees range, got: {}",
1143                formatted
1144            );
1145        }
1146    }
1147
1148    #[test]
1149    fn test_symbol_focus_empty_prod_callers() {
1150        let temp_dir = TempDir::new().unwrap();
1151
1152        // target is only called from test functions
1153        let code = r#"
1154fn target() {}
1155
1156#[cfg(test)]
1157mod tests {
1158    use super::*;
1159    #[test]
1160    fn test_something() { target(); }
1161}
1162"#;
1163        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1164
1165        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1166
1167        // prod_chains may be empty; pagination should handle it gracefully
1168        let paginated = paginate_slice(&output.prod_chains, 0, 100, PaginationMode::Callers)
1169            .expect("paginate failed");
1170        assert_eq!(paginated.items.len(), output.prod_chains.len());
1171        assert!(
1172            paginated.next_cursor.is_none(),
1173            "no next_cursor for empty or single-page prod_chains"
1174        );
1175    }
1176
1177    #[test]
1178    fn test_impl_only_filter_header_correct_counts() {
1179        let temp_dir = TempDir::new().unwrap();
1180
1181        // Create a Rust fixture with:
1182        // - A trait definition
1183        // - An impl Trait for SomeType block that calls the focus symbol
1184        // - A regular (non-trait-impl) function that also calls the focus symbol
1185        let code = r#"
1186trait MyTrait {
1187    fn focus_symbol();
1188}
1189
1190struct SomeType;
1191
1192impl MyTrait for SomeType {
1193    fn focus_symbol() {}
1194}
1195
1196fn impl_caller() {
1197    SomeType::focus_symbol();
1198}
1199
1200fn regular_caller() {
1201    SomeType::focus_symbol();
1202}
1203"#;
1204        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1205
1206        // Call analyze_focused with impl_only=Some(true)
1207        let params = FocusedAnalysisConfig {
1208            focus: "focus_symbol".to_string(),
1209            match_mode: SymbolMatchMode::Insensitive,
1210            follow_depth: 1,
1211            max_depth: None,
1212            ast_recursion_limit: None,
1213            use_summary: false,
1214            impl_only: Some(true),
1215        };
1216        let output = analyze_focused_with_progress(
1217            temp_dir.path(),
1218            &params,
1219            Arc::new(AtomicUsize::new(0)),
1220            CancellationToken::new(),
1221        )
1222        .unwrap();
1223
1224        // Assert the result contains "FILTER: impl_only=true"
1225        assert!(
1226            output.formatted.contains("FILTER: impl_only=true"),
1227            "formatted output should contain FILTER header for impl_only=true, got: {}",
1228            output.formatted
1229        );
1230
1231        // Assert the retained count N < total count M
1232        assert!(
1233            output.impl_trait_caller_count < output.unfiltered_caller_count,
1234            "impl_trait_caller_count ({}) should be less than unfiltered_caller_count ({})",
1235            output.impl_trait_caller_count,
1236            output.unfiltered_caller_count
1237        );
1238
1239        // Assert format is "FILTER: impl_only=true (N of M callers shown)"
1240        let filter_line = output
1241            .formatted
1242            .lines()
1243            .find(|line| line.contains("FILTER: impl_only=true"))
1244            .expect("should find FILTER line");
1245        assert!(
1246            filter_line.contains(&format!(
1247                "({} of {} callers shown)",
1248                output.impl_trait_caller_count, output.unfiltered_caller_count
1249            )),
1250            "FILTER line should show correct N of M counts, got: {}",
1251            filter_line
1252        );
1253    }
1254
1255    #[test]
1256    fn test_callers_count_matches_formatted_output() {
1257        let temp_dir = TempDir::new().unwrap();
1258
1259        // Create a file with multiple callers of `target`
1260        let code = r#"
1261fn target() {}
1262fn caller_a() { target(); }
1263fn caller_b() { target(); }
1264fn caller_c() { target(); }
1265"#;
1266        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1267
1268        // Analyze the symbol
1269        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1270
1271        // Extract CALLERS count from formatted output
1272        let formatted = &output.formatted;
1273        let callers_count_from_output = formatted
1274            .lines()
1275            .find(|line| line.contains("FOCUS:"))
1276            .and_then(|line| {
1277                line.split(',')
1278                    .find(|part| part.contains("callers"))
1279                    .and_then(|part| {
1280                        part.trim()
1281                            .split_whitespace()
1282                            .next()
1283                            .and_then(|s| s.parse::<usize>().ok())
1284                    })
1285            })
1286            .expect("should find CALLERS count in formatted output");
1287
1288        // Compute expected count from prod_chains (unique first-caller names)
1289        let expected_callers_count = output
1290            .prod_chains
1291            .iter()
1292            .filter_map(|chain| chain.chain.first().map(|(name, _, _)| name))
1293            .collect::<std::collections::HashSet<_>>()
1294            .len();
1295
1296        assert_eq!(
1297            callers_count_from_output, expected_callers_count,
1298            "CALLERS count in formatted output should match unique-first-caller count in prod_chains"
1299        );
1300    }
1301}
code_analyze_core/analyze.rs

code_analyze_core/
analyze.rs