code_analyze_core/
analyze.rs

1// SPDX-FileCopyrightText: 2026 code-analyze-mcp contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Main analysis engine for extracting code structure from files and directories.
4//!
5//! Implements the four MCP tools: `analyze_directory` (Overview), `analyze_file` (`FileDetails`),
6//! `analyze_symbol` (call graph), and `analyze_module` (lightweight index). Handles parallel processing and cancellation.
7
8use crate::formatter::{
9    format_file_details, format_focused_internal, format_focused_summary_internal, format_structure,
10};
11use crate::graph::{CallGraph, InternalCallChain};
12use crate::lang::{language_for_extension, supported_languages};
13use crate::parser::{ElementExtractor, SemanticExtractor};
14use crate::test_detection::is_test_file;
15use crate::traversal::{WalkEntry, walk_directory};
16use crate::types::{
17    AnalysisMode, FileInfo, ImplTraitInfo, ImportInfo, SemanticAnalysis, SymbolMatchMode,
18};
19use rayon::prelude::*;
20#[cfg(feature = "schemars")]
21use schemars::JsonSchema;
22use serde::{Deserialize, Serialize};
23use std::path::{Path, PathBuf};
24use std::sync::Arc;
25use std::sync::atomic::{AtomicUsize, Ordering};
26use std::time::Instant;
27use thiserror::Error;
28use tokio_util::sync::CancellationToken;
29use tracing::instrument;
30
31#[derive(Debug, Error)]
32#[non_exhaustive]
33pub enum AnalyzeError {
34    #[error("Traversal error: {0}")]
35    Traversal(#[from] crate::traversal::TraversalError),
36    #[error("Parser error: {0}")]
37    Parser(#[from] crate::parser::ParserError),
38    #[error("Graph error: {0}")]
39    Graph(#[from] crate::graph::GraphError),
40    #[error("Formatter error: {0}")]
41    Formatter(#[from] crate::formatter::FormatterError),
42    #[error("Analysis cancelled")]
43    Cancelled,
44    #[error("unsupported language: {0}")]
45    UnsupportedLanguage(String),
46}
47
48/// Result of directory analysis containing both formatted output and file data.
49#[derive(Debug, Clone, Serialize)]
50#[cfg_attr(feature = "schemars", derive(JsonSchema))]
51#[non_exhaustive]
52pub struct AnalysisOutput {
53    #[cfg_attr(
54        feature = "schemars",
55        schemars(description = "Formatted text representation of the analysis")
56    )]
57    pub formatted: String,
58    #[cfg_attr(
59        feature = "schemars",
60        schemars(description = "List of files analyzed in the directory")
61    )]
62    pub files: Vec<FileInfo>,
63    /// Walk entries used internally for summary generation; not serialized.
64    #[serde(skip)]
65    #[cfg_attr(feature = "schemars", schemars(skip))]
66    pub entries: Vec<WalkEntry>,
67    /// Subtree file counts computed from an unbounded walk; used by `format_summary`; not serialized.
68    #[serde(skip)]
69    #[cfg_attr(feature = "schemars", schemars(skip))]
70    pub subtree_counts: Option<Vec<(std::path::PathBuf, usize)>>,
71    #[serde(skip_serializing_if = "Option::is_none")]
72    #[cfg_attr(
73        feature = "schemars",
74        schemars(
75            description = "Opaque cursor token for the next page of results (absent when no more results)"
76        )
77    )]
78    pub next_cursor: Option<String>,
79}
80
81/// Result of file-level semantic analysis.
82#[derive(Debug, Clone, Serialize)]
83#[cfg_attr(feature = "schemars", derive(JsonSchema))]
84#[non_exhaustive]
85pub struct FileAnalysisOutput {
86    #[cfg_attr(
87        feature = "schemars",
88        schemars(description = "Formatted text representation of the analysis")
89    )]
90    pub formatted: String,
91    #[cfg_attr(
92        feature = "schemars",
93        schemars(description = "Semantic analysis data including functions, classes, and imports")
94    )]
95    pub semantic: SemanticAnalysis,
96    #[cfg_attr(
97        feature = "schemars",
98        schemars(description = "Total line count of the analyzed file")
99    )]
100    #[cfg_attr(
101        feature = "schemars",
102        schemars(schema_with = "crate::schema_helpers::integer_schema")
103    )]
104    pub line_count: usize,
105    #[serde(skip_serializing_if = "Option::is_none")]
106    #[cfg_attr(
107        feature = "schemars",
108        schemars(
109            description = "Opaque cursor token for the next page of results (absent when no more results)"
110        )
111    )]
112    pub next_cursor: Option<String>,
113}
114
115impl FileAnalysisOutput {
116    /// Create a new `FileAnalysisOutput`.
117    #[must_use]
118    pub fn new(
119        formatted: String,
120        semantic: SemanticAnalysis,
121        line_count: usize,
122        next_cursor: Option<String>,
123    ) -> Self {
124        Self {
125            formatted,
126            semantic,
127            line_count,
128            next_cursor,
129        }
130    }
131}
132#[instrument(skip_all, fields(path = %root.display()))]
133// public API; callers expect owned semantics
134#[allow(clippy::needless_pass_by_value)]
135pub fn analyze_directory_with_progress(
136    root: &Path,
137    entries: Vec<WalkEntry>,
138    progress: Arc<AtomicUsize>,
139    ct: CancellationToken,
140) -> Result<AnalysisOutput, AnalyzeError> {
141    // Check if already cancelled
142    if ct.is_cancelled() {
143        return Err(AnalyzeError::Cancelled);
144    }
145
146    // Detect language from file extension
147    let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
148
149    let start = Instant::now();
150    tracing::debug!(file_count = file_entries.len(), root = %root.display(), "analysis start");
151
152    // Parallel analysis of files
153    let analysis_results: Vec<FileInfo> = file_entries
154        .par_iter()
155        .filter_map(|entry| {
156            // Check cancellation per file
157            if ct.is_cancelled() {
158                return None;
159            }
160
161            let path_str = entry.path.display().to_string();
162
163            // Detect language from extension
164            let ext = entry.path.extension().and_then(|e| e.to_str());
165
166            // Try to read file content; skip binary or unreadable files
167            let Ok(source) = std::fs::read_to_string(&entry.path) else {
168                progress.fetch_add(1, Ordering::Relaxed);
169                return None;
170            };
171
172            // Count lines
173            let line_count = source.lines().count();
174
175            // Detect language and extract counts
176            let (language, function_count, class_count) = if let Some(ext_str) = ext {
177                if let Some(lang) = language_for_extension(ext_str) {
178                    let lang_str = lang.to_string();
179                    match ElementExtractor::extract_with_depth(&source, &lang_str) {
180                        Ok((func_count, class_count)) => (lang_str, func_count, class_count),
181                        Err(_) => (lang_str, 0, 0),
182                    }
183                } else {
184                    ("unknown".to_string(), 0, 0)
185                }
186            } else {
187                ("unknown".to_string(), 0, 0)
188            };
189
190            progress.fetch_add(1, Ordering::Relaxed);
191
192            let is_test = is_test_file(&entry.path);
193
194            Some(FileInfo {
195                path: path_str,
196                line_count,
197                function_count,
198                class_count,
199                language,
200                is_test,
201            })
202        })
203        .collect();
204
205    // Check if cancelled after parallel processing
206    if ct.is_cancelled() {
207        return Err(AnalyzeError::Cancelled);
208    }
209
210    tracing::debug!(
211        file_count = file_entries.len(),
212        duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
213        "analysis complete"
214    );
215
216    // Format output
217    let formatted = format_structure(&entries, &analysis_results, None);
218
219    Ok(AnalysisOutput {
220        formatted,
221        files: analysis_results,
222        entries,
223        next_cursor: None,
224        subtree_counts: None,
225    })
226}
227
228/// Analyze a directory structure and return formatted output and file data.
229#[instrument(skip_all, fields(path = %root.display()))]
230pub fn analyze_directory(
231    root: &Path,
232    max_depth: Option<u32>,
233) -> Result<AnalysisOutput, AnalyzeError> {
234    let entries = walk_directory(root, max_depth)?;
235    let counter = Arc::new(AtomicUsize::new(0));
236    let ct = CancellationToken::new();
237    analyze_directory_with_progress(root, entries, counter, ct)
238}
239
240/// Determine analysis mode based on parameters and path.
241#[must_use]
242pub fn determine_mode(path: &str, focus: Option<&str>) -> AnalysisMode {
243    if focus.is_some() {
244        return AnalysisMode::SymbolFocus;
245    }
246
247    let path_obj = Path::new(path);
248    if path_obj.is_dir() {
249        AnalysisMode::Overview
250    } else {
251        AnalysisMode::FileDetails
252    }
253}
254
255/// Analyze a single file and return semantic analysis with formatted output.
256#[instrument(skip_all, fields(path))]
257pub fn analyze_file(
258    path: &str,
259    ast_recursion_limit: Option<usize>,
260) -> Result<FileAnalysisOutput, AnalyzeError> {
261    let start = Instant::now();
262    let source = std::fs::read_to_string(path)
263        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
264
265    let line_count = source.lines().count();
266
267    // Detect language from extension
268    let ext = Path::new(path)
269        .extension()
270        .and_then(|e| e.to_str())
271        .and_then(language_for_extension)
272        .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string);
273
274    // Extract semantic information
275    let mut semantic = SemanticExtractor::extract(&source, &ext, ast_recursion_limit)?;
276
277    // Populate the file path on references now that the path is known
278    for r in &mut semantic.references {
279        r.location = path.to_string();
280    }
281
282    // Resolve Python wildcard imports
283    if ext == "python" {
284        resolve_wildcard_imports(Path::new(path), &mut semantic.imports);
285    }
286
287    // Detect if this is a test file
288    let is_test = is_test_file(Path::new(path));
289
290    // Extract parent directory for relative path display
291    let parent_dir = Path::new(path).parent();
292
293    // Format output
294    let formatted = format_file_details(path, &semantic, line_count, is_test, parent_dir);
295
296    tracing::debug!(path = %path, language = %ext, functions = semantic.functions.len(), classes = semantic.classes.len(), imports = semantic.imports.len(), duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX), "file analysis complete");
297
298    Ok(FileAnalysisOutput::new(
299        formatted, semantic, line_count, None,
300    ))
301}
302
303/// Analyze source code from a string buffer without filesystem access.
304///
305/// This function analyzes in-memory source code by language identifier. The `language`
306/// parameter can be either a language name (e.g., `"rust"`, `"python"`, `"go"`) or a file
307/// extension (e.g., `"rs"`, `"py"`).
308///
309/// Accepted language identifiers depend on compiled features. Use [`supported_languages()`] to
310/// discover the available language names at runtime, and [`language_for_extension()`] to resolve
311/// a file extension to its supported language identifier.
312///
313/// # Arguments
314///
315/// * `source` - The source code to analyze
316/// * `language` - The language identifier (language name or extension)
317/// * `ast_recursion_limit` - Optional limit for AST traversal depth
318///
319/// # Returns
320///
321/// - `Ok(FileAnalysisOutput)` on success
322/// - `Err(AnalyzeError::UnsupportedLanguage)` if the language is not recognized
323/// - `Err(AnalyzeError::Parser)` if parsing fails
324///
325/// # Notes
326///
327/// - Python wildcard import resolution is skipped for in-memory analysis (no filesystem path available)
328/// - The formatted output uses the standard file-details formatter, so it includes a `FILE:` header with an empty path
329#[inline]
330pub fn analyze_str(
331    source: &str,
332    language: &str,
333    ast_recursion_limit: Option<usize>,
334) -> Result<FileAnalysisOutput, AnalyzeError> {
335    // Resolve language: first try as a file extension, then as a language name
336    // (case-insensitive match against supported_languages()).
337    let lang = language_for_extension(language).or_else(|| {
338        let lower = language.to_ascii_lowercase();
339        supported_languages()
340            .iter()
341            .find(|&&name| name == lower)
342            .copied()
343    });
344    let lang = lang.ok_or_else(|| AnalyzeError::UnsupportedLanguage(language.to_string()))?;
345
346    // Extract semantic information
347    let mut semantic = SemanticExtractor::extract(source, lang, ast_recursion_limit)?;
348
349    // Populate a stable in-memory sentinel on all reference locations
350    for r in &mut semantic.references {
351        r.location = "<memory>".to_string();
352    }
353
354    // Count lines in the source
355    let line_count = source.lines().count();
356
357    // Format output with empty path (no filesystem access)
358    let formatted = format_file_details("", &semantic, line_count, false, None);
359
360    Ok(FileAnalysisOutput::new(
361        formatted, semantic, line_count, None,
362    ))
363}
364
365/// Single entry in a call chain (depth-1 direct caller or callee).
366#[derive(Debug, Clone, Serialize, Deserialize)]
367#[cfg_attr(feature = "schemars", derive(JsonSchema))]
368pub struct CallChainEntry {
369    #[cfg_attr(
370        feature = "schemars",
371        schemars(description = "Symbol name of the caller or callee")
372    )]
373    pub symbol: String,
374    #[cfg_attr(
375        feature = "schemars",
376        schemars(description = "File path relative to the repository root")
377    )]
378    pub file: String,
379    #[cfg_attr(
380        feature = "schemars",
381        schemars(
382            description = "Line number of the definition or call site (1-indexed)",
383            schema_with = "crate::schema_helpers::integer_schema"
384        )
385    )]
386    pub line: usize,
387}
388
389/// Result of focused symbol analysis.
390#[derive(Debug, Serialize)]
391#[cfg_attr(feature = "schemars", derive(JsonSchema))]
392#[non_exhaustive]
393pub struct FocusedAnalysisOutput {
394    #[cfg_attr(
395        feature = "schemars",
396        schemars(description = "Formatted text representation of the call graph analysis")
397    )]
398    pub formatted: String,
399    #[serde(skip_serializing_if = "Option::is_none")]
400    #[cfg_attr(
401        feature = "schemars",
402        schemars(
403            description = "Opaque cursor token for the next page of results (absent when no more results)"
404        )
405    )]
406    pub next_cursor: Option<String>,
407    /// Production caller chains (partitioned from incoming chains, excluding test callers).
408    /// Not serialized; used for pagination in lib.rs.
409    #[serde(skip)]
410    #[cfg_attr(feature = "schemars", schemars(skip))]
411    pub prod_chains: Vec<InternalCallChain>,
412    /// Test caller chains. Not serialized; used for pagination summary in lib.rs.
413    #[serde(skip)]
414    #[cfg_attr(feature = "schemars", schemars(skip))]
415    pub test_chains: Vec<InternalCallChain>,
416    /// Outgoing (callee) chains. Not serialized; used for pagination in lib.rs.
417    #[serde(skip)]
418    #[cfg_attr(feature = "schemars", schemars(skip))]
419    pub outgoing_chains: Vec<InternalCallChain>,
420    /// Number of definitions for the symbol. Not serialized; used for pagination headers.
421    #[serde(skip)]
422    #[cfg_attr(feature = "schemars", schemars(skip))]
423    pub def_count: usize,
424    /// Total unique callers before `impl_only` filter. Not serialized; used for FILTER header.
425    #[serde(skip)]
426    #[cfg_attr(feature = "schemars", schemars(skip))]
427    pub unfiltered_caller_count: usize,
428    /// Unique callers after `impl_only` filter. Not serialized; used for FILTER header.
429    #[serde(skip)]
430    #[cfg_attr(feature = "schemars", schemars(skip))]
431    pub impl_trait_caller_count: usize,
432    /// Direct (depth-1) production callers. `follow_depth` does not affect this field.
433    #[serde(skip_serializing_if = "Option::is_none")]
434    pub callers: Option<Vec<CallChainEntry>>,
435    /// Direct (depth-1) test callers. `follow_depth` does not affect this field.
436    #[serde(skip_serializing_if = "Option::is_none")]
437    pub test_callers: Option<Vec<CallChainEntry>>,
438    /// Direct (depth-1) callees. `follow_depth` does not affect this field.
439    #[serde(skip_serializing_if = "Option::is_none")]
440    pub callees: Option<Vec<CallChainEntry>>,
441    /// Definition and use sites for the symbol.
442    #[serde(default)]
443    pub def_use_sites: Vec<crate::types::DefUseSite>,
444}
445
446/// Parameters for focused symbol analysis. Groups high-arity parameters to keep
447/// function signatures under clippy's default 7-argument threshold.
448#[derive(Clone)]
449pub struct FocusedAnalysisConfig {
450    pub focus: String,
451    pub match_mode: SymbolMatchMode,
452    pub follow_depth: u32,
453    pub max_depth: Option<u32>,
454    pub ast_recursion_limit: Option<usize>,
455    pub use_summary: bool,
456    pub impl_only: Option<bool>,
457    pub def_use: bool,
458}
459
460/// Internal parameters for focused analysis phases.
461#[derive(Clone)]
462struct InternalFocusedParams {
463    focus: String,
464    match_mode: SymbolMatchMode,
465    follow_depth: u32,
466    ast_recursion_limit: Option<usize>,
467    use_summary: bool,
468    impl_only: Option<bool>,
469    def_use: bool,
470}
471
472/// Type alias for analysis results: (`file_path`, `semantic_analysis`) pairs and impl-trait info.
473type FileAnalysisBatch = (Vec<(PathBuf, SemanticAnalysis)>, Vec<ImplTraitInfo>);
474
475/// Phase 1: Collect semantic analysis for all files in parallel.
476fn collect_file_analysis(
477    entries: &[WalkEntry],
478    progress: &Arc<AtomicUsize>,
479    ct: &CancellationToken,
480    ast_recursion_limit: Option<usize>,
481) -> Result<FileAnalysisBatch, AnalyzeError> {
482    // Check if already cancelled
483    if ct.is_cancelled() {
484        return Err(AnalyzeError::Cancelled);
485    }
486
487    // Use pre-walked entries (passed by caller)
488    // Collect semantic analysis for all files in parallel
489    let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
490
491    let analysis_results: Vec<(PathBuf, SemanticAnalysis)> = file_entries
492        .par_iter()
493        .filter_map(|entry| {
494            // Check cancellation per file
495            if ct.is_cancelled() {
496                return None;
497            }
498
499            let ext = entry.path.extension().and_then(|e| e.to_str());
500
501            // Try to read file content
502            let Ok(source) = std::fs::read_to_string(&entry.path) else {
503                progress.fetch_add(1, Ordering::Relaxed);
504                return None;
505            };
506
507            // Detect language and extract semantic information
508            let language = if let Some(ext_str) = ext {
509                language_for_extension(ext_str)
510                    .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string)
511            } else {
512                "unknown".to_string()
513            };
514
515            if let Ok(mut semantic) =
516                SemanticExtractor::extract(&source, &language, ast_recursion_limit)
517            {
518                // Populate file path on references
519                for r in &mut semantic.references {
520                    r.location = entry.path.display().to_string();
521                }
522                // Populate file path on impl_traits (already extracted during SemanticExtractor::extract)
523                for trait_info in &mut semantic.impl_traits {
524                    trait_info.path.clone_from(&entry.path);
525                }
526                progress.fetch_add(1, Ordering::Relaxed);
527                Some((entry.path.clone(), semantic))
528            } else {
529                progress.fetch_add(1, Ordering::Relaxed);
530                None
531            }
532        })
533        .collect();
534
535    // Check if cancelled after parallel processing
536    if ct.is_cancelled() {
537        return Err(AnalyzeError::Cancelled);
538    }
539
540    // Collect all impl-trait info from analysis results
541    let all_impl_traits: Vec<ImplTraitInfo> = analysis_results
542        .iter()
543        .flat_map(|(_, sem)| sem.impl_traits.iter().cloned())
544        .collect();
545
546    Ok((analysis_results, all_impl_traits))
547}
548
549/// Phase 2: Build call graph from analysis results.
550fn build_call_graph(
551    analysis_results: Vec<(PathBuf, SemanticAnalysis)>,
552    all_impl_traits: &[ImplTraitInfo],
553) -> Result<CallGraph, AnalyzeError> {
554    // Build call graph. Always build without impl_only filter first so we can
555    // record the unfiltered caller count before discarding those edges.
556    CallGraph::build_from_results(
557        analysis_results,
558        all_impl_traits,
559        false, // filter applied below after counting
560    )
561    .map_err(std::convert::Into::into)
562}
563
564/// Phase 3: Resolve symbol and apply `impl_only` filter.
565/// Returns (`resolved_focus`, `unfiltered_caller_count`, `impl_trait_caller_count`).
566/// CRITICAL: Must capture `unfiltered_caller_count` BEFORE `retain()`, then apply `retain()`,
567/// then compute `impl_trait_caller_count`.
568fn resolve_symbol(
569    graph: &mut CallGraph,
570    params: &InternalFocusedParams,
571) -> Result<(String, usize, usize), AnalyzeError> {
572    // Resolve symbol name using the requested match mode.
573    let resolved_focus = if params.match_mode == SymbolMatchMode::Exact {
574        let exists = graph.definitions.contains_key(&params.focus)
575            || graph.callers.contains_key(&params.focus)
576            || graph.callees.contains_key(&params.focus);
577        if exists {
578            params.focus.clone()
579        } else {
580            return Err(crate::graph::GraphError::SymbolNotFound {
581                symbol: params.focus.clone(),
582                hint: "Try match_mode=insensitive for a case-insensitive search, or match_mode=prefix to list symbols starting with this name.".to_string(),
583            }
584            .into());
585        }
586    } else {
587        graph.resolve_symbol_indexed(&params.focus, &params.match_mode)?
588    };
589
590    // Count unique callers for the focus symbol before applying impl_only filter.
591    let unfiltered_caller_count = graph.callers.get(&resolved_focus).map_or(0, |edges| {
592        edges
593            .iter()
594            .map(|e| &e.neighbor_name)
595            .collect::<std::collections::HashSet<_>>()
596            .len()
597    });
598
599    // Apply impl_only filter now if requested, then count filtered callers.
600    // Filter all caller adjacency lists so traversal and formatting are consistently
601    // restricted to impl-trait edges regardless of follow_depth.
602    let impl_trait_caller_count = if params.impl_only.unwrap_or(false) {
603        for edges in graph.callers.values_mut() {
604            edges.retain(|e| e.is_impl_trait);
605        }
606        graph.callers.get(&resolved_focus).map_or(0, |edges| {
607            edges
608                .iter()
609                .map(|e| &e.neighbor_name)
610                .collect::<std::collections::HashSet<_>>()
611                .len()
612        })
613    } else {
614        unfiltered_caller_count
615    };
616
617    Ok((
618        resolved_focus,
619        unfiltered_caller_count,
620        impl_trait_caller_count,
621    ))
622}
623
624/// Type alias for `compute_chains` return type: (`formatted_output`, `prod_chains`, `test_chains`, `outgoing_chains`, `def_count`).
625type ChainComputeResult = (
626    String,
627    Vec<InternalCallChain>,
628    Vec<InternalCallChain>,
629    Vec<InternalCallChain>,
630    usize,
631);
632
633/// Helper function to convert InternalCallChain data to CallChainEntry vec.
634/// Takes the first (depth-1) element of each chain and converts it to a CallChainEntry.
635/// Returns None if chains is empty, otherwise returns a vec of up to 10 entries.
636fn chains_to_entries(
637    chains: &[InternalCallChain],
638    root: Option<&std::path::Path>,
639) -> Option<Vec<CallChainEntry>> {
640    if chains.is_empty() {
641        return None;
642    }
643    let entries: Vec<CallChainEntry> = chains
644        .iter()
645        .take(10)
646        .filter_map(|chain| {
647            let (symbol, path, line) = chain.chain.first()?;
648            let file = match root {
649                Some(root) => path
650                    .strip_prefix(root)
651                    .unwrap_or(path.as_path())
652                    .to_string_lossy()
653                    .into_owned(),
654                None => path.to_string_lossy().into_owned(),
655            };
656            Some(CallChainEntry {
657                symbol: symbol.clone(),
658                file,
659                line: *line,
660            })
661        })
662        .collect();
663    if entries.is_empty() {
664        None
665    } else {
666        Some(entries)
667    }
668}
669
670/// Phase 4: Compute chains and format output.
671fn compute_chains(
672    graph: &CallGraph,
673    resolved_focus: &str,
674    root: &Path,
675    params: &InternalFocusedParams,
676    unfiltered_caller_count: usize,
677    impl_trait_caller_count: usize,
678    def_use_sites: &[crate::types::DefUseSite],
679) -> Result<ChainComputeResult, AnalyzeError> {
680    // Compute chain data for pagination (always, regardless of summary mode)
681    let def_count = graph.definitions.get(resolved_focus).map_or(0, Vec::len);
682    let incoming_chains = graph.find_incoming_chains(resolved_focus, params.follow_depth)?;
683    let outgoing_chains = graph.find_outgoing_chains(resolved_focus, params.follow_depth)?;
684
685    let (prod_chains, test_chains): (Vec<_>, Vec<_>) =
686        incoming_chains.iter().cloned().partition(|chain| {
687            chain
688                .chain
689                .first()
690                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
691        });
692
693    // Format output with pre-computed chains
694    let mut formatted = if params.use_summary {
695        format_focused_summary_internal(
696            graph,
697            resolved_focus,
698            params.follow_depth,
699            Some(root),
700            Some(&incoming_chains),
701            Some(&outgoing_chains),
702            def_use_sites,
703        )?
704    } else {
705        format_focused_internal(
706            graph,
707            resolved_focus,
708            params.follow_depth,
709            Some(root),
710            Some(&incoming_chains),
711            Some(&outgoing_chains),
712            def_use_sites,
713        )?
714    };
715
716    // Add FILTER header if impl_only filter was applied
717    if params.impl_only.unwrap_or(false) {
718        let filter_header = format!(
719            "FILTER: impl_only=true ({impl_trait_caller_count} of {unfiltered_caller_count} callers shown)\n",
720        );
721        formatted = format!("{filter_header}{formatted}");
722    }
723
724    Ok((
725        formatted,
726        prod_chains,
727        test_chains,
728        outgoing_chains,
729        def_count,
730    ))
731}
732
733/// Analyze a symbol's call graph across a directory with progress tracking.
734// public API; callers expect owned semantics
735#[allow(clippy::needless_pass_by_value)]
736pub fn analyze_focused_with_progress(
737    root: &Path,
738    params: &FocusedAnalysisConfig,
739    progress: Arc<AtomicUsize>,
740    ct: CancellationToken,
741) -> Result<FocusedAnalysisOutput, AnalyzeError> {
742    let entries = walk_directory(root, params.max_depth)?;
743    let internal_params = InternalFocusedParams {
744        focus: params.focus.clone(),
745        match_mode: params.match_mode.clone(),
746        follow_depth: params.follow_depth,
747        ast_recursion_limit: params.ast_recursion_limit,
748        use_summary: params.use_summary,
749        impl_only: params.impl_only,
750        def_use: params.def_use,
751    };
752    analyze_focused_with_progress_with_entries_internal(
753        root,
754        params.max_depth,
755        &progress,
756        &ct,
757        &internal_params,
758        &entries,
759    )
760}
761
762/// Internal implementation of focused analysis using pre-walked entries and params struct.
763#[instrument(skip_all, fields(path = %root.display(), symbol = %params.focus))]
764fn analyze_focused_with_progress_with_entries_internal(
765    root: &Path,
766    _max_depth: Option<u32>,
767    progress: &Arc<AtomicUsize>,
768    ct: &CancellationToken,
769    params: &InternalFocusedParams,
770    entries: &[WalkEntry],
771) -> Result<FocusedAnalysisOutput, AnalyzeError> {
772    // Check if already cancelled
773    if ct.is_cancelled() {
774        return Err(AnalyzeError::Cancelled);
775    }
776
777    // Check if path is a file (hint to use directory)
778    if root.is_file() {
779        let formatted =
780            "Single-file focus not supported. Please provide a directory path for cross-file call graph analysis.\n"
781                .to_string();
782        return Ok(FocusedAnalysisOutput {
783            formatted,
784            next_cursor: None,
785            prod_chains: vec![],
786            test_chains: vec![],
787            outgoing_chains: vec![],
788            def_count: 0,
789            unfiltered_caller_count: 0,
790            impl_trait_caller_count: 0,
791            callers: None,
792            test_callers: None,
793            callees: None,
794            def_use_sites: vec![],
795        });
796    }
797
798    // Phase 1: Collect file analysis
799    let (analysis_results, all_impl_traits) =
800        collect_file_analysis(entries, progress, ct, params.ast_recursion_limit)?;
801
802    // Check for cancellation before building the call graph (phase 2)
803    if ct.is_cancelled() {
804        return Err(AnalyzeError::Cancelled);
805    }
806
807    // Phase 2: Build call graph
808    let mut graph = build_call_graph(analysis_results, &all_impl_traits)?;
809
810    // Check for cancellation before resolving the symbol (phase 3)
811    if ct.is_cancelled() {
812        return Err(AnalyzeError::Cancelled);
813    }
814
815    // Phase 3: Resolve symbol and apply impl_only filter.
816    // When def_use=true and the symbol is not in the call graph (e.g. a variable),
817    // fall through to def-use extraction instead of returning SymbolNotFound.
818    let resolve_result = resolve_symbol(&mut graph, params);
819    if let Err(AnalyzeError::Graph(crate::graph::GraphError::SymbolNotFound { .. })) =
820        &resolve_result
821    {
822        // Deliberately not collapsed: resolve_result must stay alive past this block
823        // so that the `?` below can propagate non-SymbolNotFound errors.
824        if params.def_use {
825            let def_use_sites =
826                collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct);
827            if def_use_sites.is_empty() {
828                // Symbol not found anywhere (neither in call graph nor as def/use site).
829                // Propagate the original SymbolNotFound error instead of returning an
830                // empty success response.
831                return Err(resolve_result.unwrap_err());
832            }
833            use std::fmt::Write as _;
834            let mut formatted = String::new();
835            let _ = writeln!(
836                formatted,
837                "FOCUS: {} (0 defs, 0 callers, 0 callees)",
838                params.focus
839            );
840            {
841                let writes = def_use_sites
842                    .iter()
843                    .filter(|s| {
844                        matches!(
845                            s.kind,
846                            crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
847                        )
848                    })
849                    .count();
850                let reads = def_use_sites
851                    .iter()
852                    .filter(|s| s.kind == crate::types::DefUseKind::Read)
853                    .count();
854                let _ = writeln!(
855                    formatted,
856                    "DEF-USE SITES  {}  ({} total: {} writes, {} reads)",
857                    params.focus,
858                    def_use_sites.len(),
859                    writes,
860                    reads
861                );
862            }
863            return Ok(FocusedAnalysisOutput {
864                formatted,
865                next_cursor: None,
866                callers: None,
867                test_callers: None,
868                callees: None,
869                prod_chains: vec![],
870                test_chains: vec![],
871                outgoing_chains: vec![],
872                def_count: 0,
873                unfiltered_caller_count: 0,
874                impl_trait_caller_count: 0,
875                def_use_sites,
876            });
877        }
878    }
879    let (resolved_focus, unfiltered_caller_count, impl_trait_caller_count) = resolve_result?;
880
881    // Check for cancellation before computing chains (phase 4)
882    if ct.is_cancelled() {
883        return Err(AnalyzeError::Cancelled);
884    }
885
886    // Phase 5 (optional, before formatting): Def-use site extraction.
887    // Use params.focus (the raw user-supplied string) rather than resolved_focus
888    // so that variable/field names that are not in the call graph still work.
889    let def_use_sites = if params.def_use {
890        collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct)
891    } else {
892        Vec::new()
893    };
894
895    // Phase 4: Compute chains and format output (includes def_use_sites in one pass)
896    let (formatted, prod_chains, test_chains, outgoing_chains, def_count) = compute_chains(
897        &graph,
898        &resolved_focus,
899        root,
900        params,
901        unfiltered_caller_count,
902        impl_trait_caller_count,
903        &def_use_sites,
904    )?;
905
906    // Compute depth-1 chains for structured output fields (always direct relationships only,
907    // regardless of `follow_depth` used for the text-formatted output).
908    let (depth1_callers, depth1_test_callers, depth1_callees) = if params.follow_depth <= 1 {
909        // Chains already at depth 1; reuse the partitioned vecs.
910        let callers = chains_to_entries(&prod_chains, Some(root));
911        let test_callers = chains_to_entries(&test_chains, Some(root));
912        let callees = chains_to_entries(&outgoing_chains, Some(root));
913        (callers, test_callers, callees)
914    } else {
915        // follow_depth > 1: re-query at depth 1 to get only direct edges.
916        let incoming1 = graph
917            .find_incoming_chains(&resolved_focus, 1)
918            .unwrap_or_default();
919        let outgoing1 = graph
920            .find_outgoing_chains(&resolved_focus, 1)
921            .unwrap_or_default();
922        let (prod1, test1): (Vec<_>, Vec<_>) = incoming1.into_iter().partition(|chain| {
923            chain
924                .chain
925                .first()
926                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
927        });
928        let callers = chains_to_entries(&prod1, Some(root));
929        let test_callers = chains_to_entries(&test1, Some(root));
930        let callees = chains_to_entries(&outgoing1, Some(root));
931        (callers, test_callers, callees)
932    };
933
934    Ok(FocusedAnalysisOutput {
935        formatted,
936        next_cursor: None,
937        callers: depth1_callers,
938        test_callers: depth1_test_callers,
939        callees: depth1_callees,
940        prod_chains,
941        test_chains,
942        outgoing_chains,
943        def_count,
944        unfiltered_caller_count,
945        impl_trait_caller_count,
946        def_use_sites,
947    })
948}
949
950/// Phase 5: Extract def-use sites for `symbol` across all entries.
951/// Writes go before reads; within each kind ordered by file, line, then column.
952fn collect_def_use_sites(
953    entries: &[WalkEntry],
954    symbol: &str,
955    ast_recursion_limit: Option<usize>,
956    root: &std::path::Path,
957    ct: &CancellationToken,
958) -> Vec<crate::types::DefUseSite> {
959    use crate::parser::SemanticExtractor;
960
961    let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
962
963    let mut sites: Vec<crate::types::DefUseSite> = file_entries
964        .par_iter()
965        .filter_map(|entry| {
966            if ct.is_cancelled() {
967                return None;
968            }
969            let Ok(source) = std::fs::read_to_string(&entry.path) else {
970                return None;
971            };
972            let ext = entry
973                .path
974                .extension()
975                .and_then(|e| e.to_str())
976                .unwrap_or("");
977            let lang = crate::lang::language_for_extension(ext)?;
978            let file_path = entry
979                .path
980                .strip_prefix(root)
981                .unwrap_or(&entry.path)
982                .display()
983                .to_string();
984            let sites = SemanticExtractor::extract_def_use_for_file(
985                &source,
986                lang,
987                symbol,
988                &file_path,
989                ast_recursion_limit,
990            );
991            if sites.is_empty() { None } else { Some(sites) }
992        })
993        .flatten()
994        .collect();
995
996    // Writes before reads; within each kind: file, line, then column for deterministic order
997    sites.sort_by(|a, b| {
998        use crate::types::DefUseKind;
999        let kind_ord = |k: &DefUseKind| match k {
1000            DefUseKind::Write | DefUseKind::WriteRead => 0,
1001            DefUseKind::Read => 1,
1002        };
1003        kind_ord(&a.kind)
1004            .cmp(&kind_ord(&b.kind))
1005            .then_with(|| a.file.cmp(&b.file))
1006            .then_with(|| a.line.cmp(&b.line))
1007            .then_with(|| a.column.cmp(&b.column))
1008    });
1009
1010    sites
1011}
1012
1013/// Analyze a symbol's call graph using pre-walked directory entries.
1014pub fn analyze_focused_with_progress_with_entries(
1015    root: &Path,
1016    params: &FocusedAnalysisConfig,
1017    progress: &Arc<AtomicUsize>,
1018    ct: &CancellationToken,
1019    entries: &[WalkEntry],
1020) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1021    let internal_params = InternalFocusedParams {
1022        focus: params.focus.clone(),
1023        match_mode: params.match_mode.clone(),
1024        follow_depth: params.follow_depth,
1025        ast_recursion_limit: params.ast_recursion_limit,
1026        use_summary: params.use_summary,
1027        impl_only: params.impl_only,
1028        def_use: params.def_use,
1029    };
1030    analyze_focused_with_progress_with_entries_internal(
1031        root,
1032        params.max_depth,
1033        progress,
1034        ct,
1035        &internal_params,
1036        entries,
1037    )
1038}
1039
1040#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
1041pub fn analyze_focused(
1042    root: &Path,
1043    focus: &str,
1044    follow_depth: u32,
1045    max_depth: Option<u32>,
1046    ast_recursion_limit: Option<usize>,
1047) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1048    let entries = walk_directory(root, max_depth)?;
1049    let counter = Arc::new(AtomicUsize::new(0));
1050    let ct = CancellationToken::new();
1051    let params = FocusedAnalysisConfig {
1052        focus: focus.to_string(),
1053        match_mode: SymbolMatchMode::Exact,
1054        follow_depth,
1055        max_depth,
1056        ast_recursion_limit,
1057        use_summary: false,
1058        impl_only: None,
1059        def_use: false,
1060    };
1061    analyze_focused_with_progress_with_entries(root, &params, &counter, &ct, &entries)
1062}
1063
1064/// Analyze a single file and return a minimal fixed schema (name, line count, language,
1065/// functions, imports) for lightweight code understanding.
1066#[instrument(skip_all, fields(path))]
1067pub fn analyze_module_file(path: &str) -> Result<crate::types::ModuleInfo, AnalyzeError> {
1068    let source = std::fs::read_to_string(path)
1069        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
1070
1071    let file_path = Path::new(path);
1072    let name = file_path
1073        .file_name()
1074        .and_then(|s| s.to_str())
1075        .unwrap_or("unknown")
1076        .to_string();
1077
1078    let line_count = source.lines().count();
1079
1080    let language = file_path
1081        .extension()
1082        .and_then(|e| e.to_str())
1083        .and_then(language_for_extension)
1084        .ok_or_else(|| {
1085            AnalyzeError::Parser(crate::parser::ParserError::ParseError(
1086                "unsupported or missing file extension".to_string(),
1087            ))
1088        })?;
1089
1090    let semantic = SemanticExtractor::extract(&source, language, None)?;
1091
1092    let functions = semantic
1093        .functions
1094        .into_iter()
1095        .map(|f| crate::types::ModuleFunctionInfo {
1096            name: f.name,
1097            line: f.line,
1098        })
1099        .collect();
1100
1101    let imports = semantic
1102        .imports
1103        .into_iter()
1104        .map(|i| crate::types::ModuleImportInfo {
1105            module: i.module,
1106            items: i.items,
1107        })
1108        .collect();
1109
1110    Ok(crate::types::ModuleInfo {
1111        name,
1112        line_count,
1113        language: language.to_string(),
1114        functions,
1115        imports,
1116    })
1117}
1118
1119/// Scan a directory for files that import a given module path.
1120///
1121/// For each non-directory walk entry, extracts imports via [`SemanticExtractor`] and
1122/// checks whether `module` matches `ImportInfo.module` or appears in `ImportInfo.items`.
1123/// Returns a [`FocusedAnalysisOutput`] whose `formatted` field lists matching files.
1124pub fn analyze_import_lookup(
1125    root: &Path,
1126    module: &str,
1127    entries: &[WalkEntry],
1128    ast_recursion_limit: Option<usize>,
1129) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1130    let mut matches: Vec<(PathBuf, usize)> = Vec::new();
1131
1132    for entry in entries {
1133        if entry.is_dir {
1134            continue;
1135        }
1136        let ext = entry
1137            .path
1138            .extension()
1139            .and_then(|e| e.to_str())
1140            .and_then(crate::lang::language_for_extension);
1141        let Some(lang) = ext else {
1142            continue;
1143        };
1144        let Ok(source) = std::fs::read_to_string(&entry.path) else {
1145            continue;
1146        };
1147        let Ok(semantic) = SemanticExtractor::extract(&source, lang, ast_recursion_limit) else {
1148            continue;
1149        };
1150        for import in &semantic.imports {
1151            if import.module == module || import.items.iter().any(|item| item == module) {
1152                matches.push((entry.path.clone(), import.line));
1153                break;
1154            }
1155        }
1156    }
1157
1158    let mut text = format!("IMPORT_LOOKUP: {module}\n");
1159    text.push_str(&format!("ROOT: {}\n", root.display()));
1160    text.push_str(&format!("MATCHES: {}\n", matches.len()));
1161    for (path, line) in &matches {
1162        let rel = path.strip_prefix(root).unwrap_or(path);
1163        text.push_str(&format!("  {}:{line}\n", rel.display()));
1164    }
1165
1166    Ok(FocusedAnalysisOutput {
1167        formatted: text,
1168        next_cursor: None,
1169        prod_chains: vec![],
1170        test_chains: vec![],
1171        outgoing_chains: vec![],
1172        def_count: 0,
1173        unfiltered_caller_count: 0,
1174        impl_trait_caller_count: 0,
1175        callers: None,
1176        test_callers: None,
1177        callees: None,
1178        def_use_sites: vec![],
1179    })
1180}
1181
1182/// Resolve Python wildcard imports to actual symbol names.
1183///
1184/// For each import with items=`["*"]`, this function:
1185/// 1. Parses the relative dots (if any) and climbs the directory tree
1186/// 2. Finds the target .py file or __init__.py
1187/// 3. Extracts symbols (functions and classes) from the target
1188/// 4. Honors __all__ if defined, otherwise uses function+class names
1189///
1190/// All resolution failures are non-fatal: debug-logged and the wildcard is preserved.
1191fn resolve_wildcard_imports(file_path: &Path, imports: &mut [ImportInfo]) {
1192    use std::collections::HashMap;
1193
1194    let mut resolved_cache: HashMap<PathBuf, Vec<String>> = HashMap::new();
1195    let Ok(file_path_canonical) = file_path.canonicalize() else {
1196        tracing::debug!(file = ?file_path, "unable to canonicalize current file path");
1197        return;
1198    };
1199
1200    for import in imports.iter_mut() {
1201        if import.items != ["*"] {
1202            continue;
1203        }
1204        resolve_single_wildcard(import, file_path, &file_path_canonical, &mut resolved_cache);
1205    }
1206}
1207
1208/// Resolve one wildcard import in place. On any failure the import is left unchanged.
1209fn resolve_single_wildcard(
1210    import: &mut ImportInfo,
1211    file_path: &Path,
1212    file_path_canonical: &Path,
1213    resolved_cache: &mut std::collections::HashMap<PathBuf, Vec<String>>,
1214) {
1215    let module = import.module.clone();
1216    let dot_count = module.chars().take_while(|c| *c == '.').count();
1217    if dot_count == 0 {
1218        return;
1219    }
1220    let module_path = module.trim_start_matches('.');
1221
1222    let Some(target_to_read) = locate_target_file(file_path, dot_count, module_path, &module)
1223    else {
1224        return;
1225    };
1226
1227    let Ok(canonical) = target_to_read.canonicalize() else {
1228        tracing::debug!(target = ?target_to_read, import = %module, "unable to canonicalize path");
1229        return;
1230    };
1231
1232    if canonical == file_path_canonical {
1233        tracing::debug!(target = ?canonical, import = %module, "cannot import from self");
1234        return;
1235    }
1236
1237    if let Some(cached) = resolved_cache.get(&canonical) {
1238        tracing::debug!(import = %module, symbols_count = cached.len(), "using cached symbols");
1239        import.items.clone_from(cached);
1240        return;
1241    }
1242
1243    if let Some(symbols) = parse_target_symbols(&target_to_read, &module) {
1244        tracing::debug!(import = %module, resolved_count = symbols.len(), "wildcard import resolved");
1245        import.items.clone_from(&symbols);
1246        resolved_cache.insert(canonical, symbols);
1247    }
1248}
1249
1250/// Locate the .py file that a wildcard import refers to. Returns None if not found.
1251fn locate_target_file(
1252    file_path: &Path,
1253    dot_count: usize,
1254    module_path: &str,
1255    module: &str,
1256) -> Option<PathBuf> {
1257    let mut target_dir = file_path.parent()?.to_path_buf();
1258
1259    for _ in 1..dot_count {
1260        if !target_dir.pop() {
1261            tracing::debug!(import = %module, "unable to climb {} levels", dot_count.saturating_sub(1));
1262            return None;
1263        }
1264    }
1265
1266    let target_file = if module_path.is_empty() {
1267        target_dir.join("__init__.py")
1268    } else {
1269        let rel_path = module_path.replace('.', "/");
1270        target_dir.join(format!("{rel_path}.py"))
1271    };
1272
1273    if target_file.exists() {
1274        Some(target_file)
1275    } else if target_file.with_extension("").is_dir() {
1276        let init = target_file.with_extension("").join("__init__.py");
1277        if init.exists() { Some(init) } else { None }
1278    } else {
1279        tracing::debug!(target = ?target_file, import = %module, "target file not found");
1280        None
1281    }
1282}
1283
1284/// Read and parse a target .py file, returning its exported symbols.
1285fn parse_target_symbols(target_path: &Path, module: &str) -> Option<Vec<String>> {
1286    use tree_sitter::Parser;
1287
1288    let source = match std::fs::read_to_string(target_path) {
1289        Ok(s) => s,
1290        Err(e) => {
1291            tracing::debug!(target = ?target_path, import = %module, error = %e, "unable to read target file");
1292            return None;
1293        }
1294    };
1295
1296    // Parse once with tree-sitter
1297    let lang_info = crate::languages::get_language_info("python")?;
1298    let mut parser = Parser::new();
1299    if parser.set_language(&lang_info.language).is_err() {
1300        return None;
1301    }
1302    let tree = parser.parse(&source, None)?;
1303
1304    // First, try to extract __all__ from the same tree
1305    let mut symbols = Vec::new();
1306    extract_all_from_tree(&tree, &source, &mut symbols);
1307    if !symbols.is_empty() {
1308        tracing::debug!(import = %module, symbols = ?symbols, "using __all__ symbols");
1309        return Some(symbols);
1310    }
1311
1312    // Fallback: extract functions/classes from the tree
1313    let root = tree.root_node();
1314    let mut cursor = root.walk();
1315    for child in root.children(&mut cursor) {
1316        if matches!(child.kind(), "function_definition" | "class_definition")
1317            && let Some(name_node) = child.child_by_field_name("name")
1318        {
1319            let name = source[name_node.start_byte()..name_node.end_byte()].to_string();
1320            if !name.starts_with('_') {
1321                symbols.push(name);
1322            }
1323        }
1324    }
1325    tracing::debug!(import = %module, fallback_symbols = ?symbols, "using fallback function/class names");
1326    Some(symbols)
1327}
1328
1329/// Extract __all__ from a tree-sitter tree.
1330fn extract_all_from_tree(tree: &tree_sitter::Tree, source: &str, result: &mut Vec<String>) {
1331    let root = tree.root_node();
1332    let mut cursor = root.walk();
1333    for child in root.children(&mut cursor) {
1334        if child.kind() == "simple_statement" {
1335            // simple_statement contains assignment and other statement types
1336            let mut simple_cursor = child.walk();
1337            for simple_child in child.children(&mut simple_cursor) {
1338                if simple_child.kind() == "assignment"
1339                    && let Some(left) = simple_child.child_by_field_name("left")
1340                {
1341                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1342                    if target_text == "__all__"
1343                        && let Some(right) = simple_child.child_by_field_name("right")
1344                    {
1345                        extract_string_list_from_list_node(&right, source, result);
1346                    }
1347                }
1348            }
1349        } else if child.kind() == "expression_statement" {
1350            // Fallback for older Python AST structures
1351            let mut stmt_cursor = child.walk();
1352            for stmt_child in child.children(&mut stmt_cursor) {
1353                if stmt_child.kind() == "assignment"
1354                    && let Some(left) = stmt_child.child_by_field_name("left")
1355                {
1356                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1357                    if target_text == "__all__"
1358                        && let Some(right) = stmt_child.child_by_field_name("right")
1359                    {
1360                        extract_string_list_from_list_node(&right, source, result);
1361                    }
1362                }
1363            }
1364        }
1365    }
1366}
1367
1368/// Extract string literals from a Python list node.
1369fn extract_string_list_from_list_node(
1370    list_node: &tree_sitter::Node,
1371    source: &str,
1372    result: &mut Vec<String>,
1373) {
1374    let mut cursor = list_node.walk();
1375    for child in list_node.named_children(&mut cursor) {
1376        if child.kind() == "string" {
1377            let raw = source[child.start_byte()..child.end_byte()].trim();
1378            // Strip quotes: "name" -> name
1379            let unquoted = raw.trim_matches('"').trim_matches('\'').to_string();
1380            if !unquoted.is_empty() {
1381                result.push(unquoted);
1382            }
1383        }
1384    }
1385}
1386
1387#[cfg(test)]
1388mod tests {
1389    use super::*;
1390    use crate::formatter::format_focused_paginated;
1391    use crate::graph::InternalCallChain;
1392    use crate::pagination::{PaginationMode, decode_cursor, paginate_slice};
1393    use std::fs;
1394    use std::path::PathBuf;
1395    use tempfile::TempDir;
1396
1397    #[cfg(feature = "lang-rust")]
1398    #[test]
1399    fn analyze_str_rust_happy_path() {
1400        let source = "fn hello() -> i32 { 42 }";
1401        let result = analyze_str(source, "rs", None);
1402        assert!(result.is_ok());
1403    }
1404
1405    #[cfg(feature = "lang-python")]
1406    #[test]
1407    fn analyze_str_python_happy_path() {
1408        let source = "def greet(name):\n    return f'Hello {name}'";
1409        let result = analyze_str(source, "py", None);
1410        assert!(result.is_ok());
1411    }
1412
1413    #[cfg(feature = "lang-rust")]
1414    #[test]
1415    fn analyze_str_rust_by_language_name() {
1416        let source = "fn hello() -> i32 { 42 }";
1417        let result = analyze_str(source, "rust", None);
1418        assert!(result.is_ok());
1419    }
1420
1421    #[cfg(feature = "lang-python")]
1422    #[test]
1423    fn analyze_str_python_by_language_name() {
1424        let source = "def greet(name):\n    return f'Hello {name}'";
1425        let result = analyze_str(source, "python", None);
1426        assert!(result.is_ok());
1427    }
1428
1429    #[cfg(feature = "lang-rust")]
1430    #[test]
1431    fn analyze_str_rust_mixed_case() {
1432        let source = "fn hello() -> i32 { 42 }";
1433        let result = analyze_str(source, "RuSt", None);
1434        assert!(result.is_ok());
1435    }
1436
1437    #[cfg(feature = "lang-python")]
1438    #[test]
1439    fn analyze_str_python_mixed_case() {
1440        let source = "def greet(name):\n    return f'Hello {name}'";
1441        let result = analyze_str(source, "PyThOn", None);
1442        assert!(result.is_ok());
1443    }
1444
1445    #[test]
1446    fn analyze_str_unsupported_language() {
1447        let result = analyze_str("code", "brainfuck", None);
1448        assert!(
1449            matches!(result, Err(AnalyzeError::UnsupportedLanguage(lang)) if lang == "brainfuck")
1450        );
1451    }
1452
1453    #[cfg(feature = "lang-rust")]
1454    #[test]
1455    fn test_symbol_focus_callers_pagination_first_page() {
1456        let temp_dir = TempDir::new().unwrap();
1457
1458        // Create a file with many callers of `target`
1459        let mut code = String::from("fn target() {}\n");
1460        for i in 0..15 {
1461            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1462        }
1463        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1464
1465        // Act
1466        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1467
1468        // Paginate prod callers with page_size=5
1469        let paginated = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1470            .expect("paginate failed");
1471        assert!(
1472            paginated.total >= 5,
1473            "should have enough callers to paginate"
1474        );
1475        assert!(
1476            paginated.next_cursor.is_some(),
1477            "should have next_cursor for page 1"
1478        );
1479
1480        // Verify cursor encodes callers mode
1481        assert_eq!(paginated.items.len(), 5);
1482    }
1483
1484    #[test]
1485    fn test_symbol_focus_callers_pagination_second_page() {
1486        let temp_dir = TempDir::new().unwrap();
1487
1488        let mut code = String::from("fn target() {}\n");
1489        for i in 0..12 {
1490            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1491        }
1492        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1493
1494        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1495        let total_prod = output.prod_chains.len();
1496
1497        if total_prod > 5 {
1498            // Get page 1 cursor
1499            let p1 = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1500                .expect("paginate failed");
1501            assert!(p1.next_cursor.is_some());
1502
1503            let cursor_str = p1.next_cursor.unwrap();
1504            let cursor_data = decode_cursor(&cursor_str).expect("decode failed");
1505
1506            // Get page 2
1507            let p2 = paginate_slice(
1508                &output.prod_chains,
1509                cursor_data.offset,
1510                5,
1511                PaginationMode::Callers,
1512            )
1513            .expect("paginate failed");
1514
1515            // Format paginated output
1516            let formatted = format_focused_paginated(
1517                &p2.items,
1518                total_prod,
1519                PaginationMode::Callers,
1520                "target",
1521                &output.prod_chains,
1522                &output.test_chains,
1523                &output.outgoing_chains,
1524                output.def_count,
1525                cursor_data.offset,
1526                Some(temp_dir.path()),
1527                true,
1528            );
1529
1530            // Assert: header shows correct range for page 2
1531            let expected_start = cursor_data.offset + 1;
1532            assert!(
1533                formatted.contains(&format!("CALLERS ({}", expected_start)),
1534                "header should show page 2 range, got: {}",
1535                formatted
1536            );
1537        }
1538    }
1539
1540    #[test]
1541    fn test_chains_to_entries_empty_returns_none() {
1542        // Arrange
1543        let chains: Vec<InternalCallChain> = vec![];
1544
1545        // Act
1546        let result = chains_to_entries(&chains, None);
1547
1548        // Assert
1549        assert!(result.is_none());
1550    }
1551
1552    #[test]
1553    fn test_chains_to_entries_with_data_returns_entries() {
1554        // Arrange
1555        let chains = vec![
1556            InternalCallChain {
1557                chain: vec![("caller1".to_string(), PathBuf::from("/root/lib.rs"), 10)],
1558            },
1559            InternalCallChain {
1560                chain: vec![("caller2".to_string(), PathBuf::from("/root/other.rs"), 20)],
1561            },
1562        ];
1563        let root = PathBuf::from("/root");
1564
1565        // Act
1566        let result = chains_to_entries(&chains, Some(root.as_path()));
1567
1568        // Assert
1569        assert!(result.is_some());
1570        let entries = result.unwrap();
1571        assert_eq!(entries.len(), 2);
1572        assert_eq!(entries[0].symbol, "caller1");
1573        assert_eq!(entries[0].file, "lib.rs");
1574        assert_eq!(entries[0].line, 10);
1575        assert_eq!(entries[1].symbol, "caller2");
1576        assert_eq!(entries[1].file, "other.rs");
1577        assert_eq!(entries[1].line, 20);
1578    }
1579
1580    #[test]
1581    fn test_symbol_focus_callees_pagination() {
1582        let temp_dir = TempDir::new().unwrap();
1583
1584        // target calls many functions
1585        let mut code = String::from("fn target() {\n");
1586        for i in 0..10 {
1587            code.push_str(&format!("    callee_{:02}();\n", i));
1588        }
1589        code.push_str("}\n");
1590        for i in 0..10 {
1591            code.push_str(&format!("fn callee_{:02}() {{}}\n", i));
1592        }
1593        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1594
1595        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1596        let total_callees = output.outgoing_chains.len();
1597
1598        if total_callees > 3 {
1599            let paginated = paginate_slice(&output.outgoing_chains, 0, 3, PaginationMode::Callees)
1600                .expect("paginate failed");
1601
1602            let formatted = format_focused_paginated(
1603                &paginated.items,
1604                total_callees,
1605                PaginationMode::Callees,
1606                "target",
1607                &output.prod_chains,
1608                &output.test_chains,
1609                &output.outgoing_chains,
1610                output.def_count,
1611                0,
1612                Some(temp_dir.path()),
1613                true,
1614            );
1615
1616            assert!(
1617                formatted.contains(&format!(
1618                    "CALLEES (1-{} of {})",
1619                    paginated.items.len(),
1620                    total_callees
1621                )),
1622                "header should show callees range, got: {}",
1623                formatted
1624            );
1625        }
1626    }
1627
1628    #[test]
1629    fn test_symbol_focus_empty_prod_callers() {
1630        let temp_dir = TempDir::new().unwrap();
1631
1632        // target is only called from test functions
1633        let code = r#"
1634fn target() {}
1635
1636#[cfg(test)]
1637mod tests {
1638    use super::*;
1639    #[test]
1640    fn test_something() { target(); }
1641}
1642"#;
1643        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1644
1645        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1646
1647        // prod_chains may be empty; pagination should handle it gracefully
1648        let paginated = paginate_slice(&output.prod_chains, 0, 100, PaginationMode::Callers)
1649            .expect("paginate failed");
1650        assert_eq!(paginated.items.len(), output.prod_chains.len());
1651        assert!(
1652            paginated.next_cursor.is_none(),
1653            "no next_cursor for empty or single-page prod_chains"
1654        );
1655    }
1656
1657    #[test]
1658    fn test_impl_only_filter_header_correct_counts() {
1659        let temp_dir = TempDir::new().unwrap();
1660
1661        // Create a Rust fixture with:
1662        // - A trait definition
1663        // - An impl Trait for SomeType block that calls the focus symbol
1664        // - A regular (non-trait-impl) function that also calls the focus symbol
1665        let code = r#"
1666trait MyTrait {
1667    fn focus_symbol();
1668}
1669
1670struct SomeType;
1671
1672impl MyTrait for SomeType {
1673    fn focus_symbol() {}
1674}
1675
1676fn impl_caller() {
1677    SomeType::focus_symbol();
1678}
1679
1680fn regular_caller() {
1681    SomeType::focus_symbol();
1682}
1683"#;
1684        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1685
1686        // Call analyze_focused with impl_only=Some(true)
1687        let params = FocusedAnalysisConfig {
1688            focus: "focus_symbol".to_string(),
1689            match_mode: SymbolMatchMode::Insensitive,
1690            follow_depth: 1,
1691            max_depth: None,
1692            ast_recursion_limit: None,
1693            use_summary: false,
1694            impl_only: Some(true),
1695            def_use: false,
1696        };
1697        let output = analyze_focused_with_progress(
1698            temp_dir.path(),
1699            &params,
1700            Arc::new(AtomicUsize::new(0)),
1701            CancellationToken::new(),
1702        )
1703        .unwrap();
1704
1705        // Assert the result contains "FILTER: impl_only=true"
1706        assert!(
1707            output.formatted.contains("FILTER: impl_only=true"),
1708            "formatted output should contain FILTER header for impl_only=true, got: {}",
1709            output.formatted
1710        );
1711
1712        // Assert the retained count N < total count M
1713        assert!(
1714            output.impl_trait_caller_count < output.unfiltered_caller_count,
1715            "impl_trait_caller_count ({}) should be less than unfiltered_caller_count ({})",
1716            output.impl_trait_caller_count,
1717            output.unfiltered_caller_count
1718        );
1719
1720        // Assert format is "FILTER: impl_only=true (N of M callers shown)"
1721        let filter_line = output
1722            .formatted
1723            .lines()
1724            .find(|line| line.contains("FILTER: impl_only=true"))
1725            .expect("should find FILTER line");
1726        assert!(
1727            filter_line.contains(&format!(
1728                "({} of {} callers shown)",
1729                output.impl_trait_caller_count, output.unfiltered_caller_count
1730            )),
1731            "FILTER line should show correct N of M counts, got: {}",
1732            filter_line
1733        );
1734    }
1735
1736    #[test]
1737    fn test_callers_count_matches_formatted_output() {
1738        let temp_dir = TempDir::new().unwrap();
1739
1740        // Create a file with multiple callers of `target`
1741        let code = r#"
1742fn target() {}
1743fn caller_a() { target(); }
1744fn caller_b() { target(); }
1745fn caller_c() { target(); }
1746"#;
1747        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1748
1749        // Analyze the symbol
1750        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1751
1752        // Extract CALLERS count from formatted output
1753        let formatted = &output.formatted;
1754        let callers_count_from_output = formatted
1755            .lines()
1756            .find(|line| line.contains("FOCUS:"))
1757            .and_then(|line| {
1758                line.split(',')
1759                    .find(|part| part.contains("callers"))
1760                    .and_then(|part| {
1761                        part.trim()
1762                            .split_whitespace()
1763                            .next()
1764                            .and_then(|s| s.parse::<usize>().ok())
1765                    })
1766            })
1767            .expect("should find CALLERS count in formatted output");
1768
1769        // Compute expected count from prod_chains (unique first-caller names)
1770        let expected_callers_count = output
1771            .prod_chains
1772            .iter()
1773            .filter_map(|chain| chain.chain.first().map(|(name, _, _)| name))
1774            .collect::<std::collections::HashSet<_>>()
1775            .len();
1776
1777        assert_eq!(
1778            callers_count_from_output, expected_callers_count,
1779            "CALLERS count in formatted output should match unique-first-caller count in prod_chains"
1780        );
1781    }
1782
1783    #[cfg(feature = "lang-rust")]
1784    #[test]
1785    fn test_def_use_focused_analysis() {
1786        let temp_dir = TempDir::new().unwrap();
1787        fs::write(
1788            temp_dir.path().join("lib.rs"),
1789            "fn example() {\n    let x = 10;\n    x += 1;\n    println!(\"{}\", x);\n    let y = x + 1;\n}\n",
1790        )
1791        .unwrap();
1792
1793        let entries = walk_directory(temp_dir.path(), None).unwrap();
1794        let counter = Arc::new(AtomicUsize::new(0));
1795        let ct = CancellationToken::new();
1796        let params = FocusedAnalysisConfig {
1797            focus: "x".to_string(),
1798            match_mode: SymbolMatchMode::Exact,
1799            follow_depth: 1,
1800            max_depth: None,
1801            ast_recursion_limit: None,
1802            use_summary: false,
1803            impl_only: None,
1804            def_use: true,
1805        };
1806
1807        let output = analyze_focused_with_progress_with_entries(
1808            temp_dir.path(),
1809            &params,
1810            &counter,
1811            &ct,
1812            &entries,
1813        )
1814        .expect("def_use analysis should succeed");
1815
1816        assert!(
1817            !output.def_use_sites.is_empty(),
1818            "should find def-use sites for x"
1819        );
1820        assert!(
1821            output
1822                .def_use_sites
1823                .iter()
1824                .any(|s| s.kind == crate::types::DefUseKind::Write),
1825            "should have at least one Write site",
1826        );
1827        // No location appears as both write and read
1828        let write_locs: std::collections::HashSet<_> = output
1829            .def_use_sites
1830            .iter()
1831            .filter(|s| {
1832                matches!(
1833                    s.kind,
1834                    crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
1835                )
1836            })
1837            .map(|s| (&s.file, s.line, s.column))
1838            .collect();
1839        assert!(
1840            output
1841                .def_use_sites
1842                .iter()
1843                .filter(|s| s.kind == crate::types::DefUseKind::Read)
1844                .all(|s| !write_locs.contains(&(&s.file, s.line, s.column))),
1845            "no location should appear as both write and read",
1846        );
1847        assert!(
1848            output.formatted.contains("DEF-USE SITES"),
1849            "formatted output should contain DEF-USE SITES"
1850        );
1851    }
1852}
code_analyze_core/analyze.rs

code_analyze_core/
analyze.rs