Skip to main content

code_analyze_mcp/
analyze.rs

1//! Main analysis engine for extracting code structure from files and directories.
2//!
3//! Implements the three analysis modes: Overview (directory structure), FileDetails (semantic extraction),
4//! and SymbolFocus (call graph analysis). Handles parallel processing and cancellation.
5
6use crate::formatter::{
7    format_file_details, format_focused, format_focused_summary, format_structure,
8};
9use crate::graph::{CallChain, CallGraph, resolve_symbol};
10use crate::lang::language_from_extension;
11use crate::parser::{ElementExtractor, SemanticExtractor};
12use crate::test_detection::is_test_file;
13use crate::traversal::{WalkEntry, walk_directory};
14use crate::types::{AnalysisMode, FileInfo, SemanticAnalysis, SymbolMatchMode};
15use rayon::prelude::*;
16use schemars::JsonSchema;
17use serde::Serialize;
18use std::path::{Path, PathBuf};
19use std::sync::Arc;
20use std::sync::atomic::{AtomicUsize, Ordering};
21use std::time::Instant;
22use thiserror::Error;
23use tokio_util::sync::CancellationToken;
24use tracing::instrument;
25
26#[derive(Debug, Error)]
27pub enum AnalyzeError {
28    #[error("Traversal error: {0}")]
29    Traversal(#[from] crate::traversal::TraversalError),
30    #[error("Parser error: {0}")]
31    Parser(#[from] crate::parser::ParserError),
32    #[error("Graph error: {0}")]
33    Graph(#[from] crate::graph::GraphError),
34    #[error("Formatter error: {0}")]
35    Formatter(#[from] crate::formatter::FormatterError),
36    #[error("Analysis cancelled")]
37    Cancelled,
38}
39
40/// Result of directory analysis containing both formatted output and file data.
41#[derive(Debug, Serialize, JsonSchema)]
42pub struct AnalysisOutput {
43    #[schemars(description = "Formatted text representation of the analysis")]
44    pub formatted: String,
45    #[schemars(description = "List of files analyzed in the directory")]
46    pub files: Vec<FileInfo>,
47    /// Walk entries used internally for summary generation; not serialized.
48    #[serde(skip)]
49    #[schemars(skip)]
50    pub entries: Vec<WalkEntry>,
51    #[serde(skip_serializing_if = "Option::is_none")]
52    #[schemars(
53        description = "Opaque cursor token for the next page of results (absent when no more results)"
54    )]
55    pub next_cursor: Option<String>,
56}
57
58/// Result of file-level semantic analysis.
59#[derive(Debug, Clone, Serialize, JsonSchema)]
60pub struct FileAnalysisOutput {
61    #[schemars(description = "Formatted text representation of the analysis")]
62    pub formatted: String,
63    #[schemars(description = "Semantic analysis data including functions, classes, and imports")]
64    pub semantic: SemanticAnalysis,
65    #[schemars(description = "Total line count of the analyzed file")]
66    pub line_count: usize,
67    #[serde(skip_serializing_if = "Option::is_none")]
68    #[schemars(
69        description = "Opaque cursor token for the next page of results (absent when no more results)"
70    )]
71    pub next_cursor: Option<String>,
72}
73
74/// Analyze a directory structure with progress tracking.
75#[instrument(skip_all, fields(path = %root.display()))]
76pub fn analyze_directory_with_progress(
77    root: &Path,
78    entries: Vec<WalkEntry>,
79    progress: Arc<AtomicUsize>,
80    ct: CancellationToken,
81) -> Result<AnalysisOutput, AnalyzeError> {
82    // Check if already cancelled
83    if ct.is_cancelled() {
84        return Err(AnalyzeError::Cancelled);
85    }
86
87    // Detect language from file extension
88    let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
89
90    let start = Instant::now();
91    tracing::debug!(file_count = file_entries.len(), root = %root.display(), "analysis start");
92
93    // Parallel analysis of files
94    let analysis_results: Vec<FileInfo> = file_entries
95        .par_iter()
96        .filter_map(|entry| {
97            // Check cancellation per file
98            if ct.is_cancelled() {
99                return None;
100            }
101
102            let path_str = entry.path.display().to_string();
103
104            // Detect language from extension
105            let ext = entry.path.extension().and_then(|e| e.to_str());
106
107            // Try to read file content
108            let source = match std::fs::read_to_string(&entry.path) {
109                Ok(content) => content,
110                Err(_) => {
111                    // Binary file or unreadable - exclude from output
112                    progress.fetch_add(1, Ordering::Relaxed);
113                    return None;
114                }
115            };
116
117            // Count lines
118            let line_count = source.lines().count();
119
120            // Detect language and extract counts
121            let (language, function_count, class_count) = if let Some(ext_str) = ext {
122                if let Some(lang) = language_from_extension(ext_str) {
123                    let lang_str = lang.to_string();
124                    match ElementExtractor::extract_with_depth(&source, &lang_str) {
125                        Ok((func_count, class_count)) => (lang_str, func_count, class_count),
126                        Err(_) => (lang_str, 0, 0),
127                    }
128                } else {
129                    ("unknown".to_string(), 0, 0)
130                }
131            } else {
132                ("unknown".to_string(), 0, 0)
133            };
134
135            progress.fetch_add(1, Ordering::Relaxed);
136
137            let is_test = is_test_file(&entry.path);
138
139            Some(FileInfo {
140                path: path_str,
141                line_count,
142                function_count,
143                class_count,
144                language,
145                is_test,
146            })
147        })
148        .collect();
149
150    // Check if cancelled after parallel processing
151    if ct.is_cancelled() {
152        return Err(AnalyzeError::Cancelled);
153    }
154
155    tracing::debug!(
156        file_count = file_entries.len(),
157        duration_ms = start.elapsed().as_millis() as u64,
158        "analysis complete"
159    );
160
161    // Format output
162    let formatted = format_structure(&entries, &analysis_results, None, Some(root));
163
164    Ok(AnalysisOutput {
165        formatted,
166        files: analysis_results,
167        entries,
168        next_cursor: None,
169    })
170}
171
172/// Analyze a directory structure and return formatted output and file data.
173#[instrument(skip_all, fields(path = %root.display()))]
174pub fn analyze_directory(
175    root: &Path,
176    max_depth: Option<u32>,
177) -> Result<AnalysisOutput, AnalyzeError> {
178    let entries = walk_directory(root, max_depth)?;
179    let counter = Arc::new(AtomicUsize::new(0));
180    let ct = CancellationToken::new();
181    analyze_directory_with_progress(root, entries, counter, ct)
182}
183
184/// Determine analysis mode based on parameters and path.
185pub fn determine_mode(path: &str, focus: Option<&str>) -> AnalysisMode {
186    if focus.is_some() {
187        return AnalysisMode::SymbolFocus;
188    }
189
190    let path_obj = Path::new(path);
191    if path_obj.is_dir() {
192        AnalysisMode::Overview
193    } else {
194        AnalysisMode::FileDetails
195    }
196}
197
198/// Analyze a single file and return semantic analysis with formatted output.
199#[instrument(skip_all, fields(path))]
200pub fn analyze_file(
201    path: &str,
202    ast_recursion_limit: Option<usize>,
203) -> Result<FileAnalysisOutput, AnalyzeError> {
204    let start = Instant::now();
205    let source = std::fs::read_to_string(path)
206        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
207
208    let line_count = source.lines().count();
209
210    // Detect language from extension
211    let ext = Path::new(path)
212        .extension()
213        .and_then(|e| e.to_str())
214        .and_then(language_from_extension)
215        .map(|l| l.to_string())
216        .unwrap_or_else(|| "unknown".to_string());
217
218    // Extract semantic information
219    let mut semantic = SemanticExtractor::extract(&source, &ext, ast_recursion_limit)?;
220
221    // Populate the file path on references now that the path is known
222    for r in &mut semantic.references {
223        r.location = path.to_string();
224    }
225
226    // Detect if this is a test file
227    let is_test = is_test_file(Path::new(path));
228
229    // Extract parent directory for relative path display
230    let parent_dir = Path::new(path).parent();
231
232    // Format output
233    let formatted = format_file_details(path, &semantic, line_count, is_test, parent_dir);
234
235    tracing::debug!(path = %path, language = %ext, functions = semantic.functions.len(), classes = semantic.classes.len(), imports = semantic.imports.len(), duration_ms = start.elapsed().as_millis() as u64, "file analysis complete");
236
237    Ok(FileAnalysisOutput {
238        formatted,
239        semantic,
240        line_count,
241        next_cursor: None,
242    })
243}
244
245/// Result of focused symbol analysis.
246#[derive(Debug, Serialize, JsonSchema)]
247pub struct FocusedAnalysisOutput {
248    #[schemars(description = "Formatted text representation of the call graph analysis")]
249    pub formatted: String,
250    #[serde(skip_serializing_if = "Option::is_none")]
251    #[schemars(
252        description = "Opaque cursor token for the next page of results (absent when no more results)"
253    )]
254    pub next_cursor: Option<String>,
255    /// Production caller chains (partitioned from incoming chains, excluding test callers).
256    /// Not serialized; used for pagination in lib.rs.
257    #[serde(skip)]
258    #[schemars(skip)]
259    pub prod_chains: Vec<CallChain>,
260    /// Test caller chains. Not serialized; used for pagination summary in lib.rs.
261    #[serde(skip)]
262    #[schemars(skip)]
263    pub test_chains: Vec<CallChain>,
264    /// Outgoing (callee) chains. Not serialized; used for pagination in lib.rs.
265    #[serde(skip)]
266    #[schemars(skip)]
267    pub outgoing_chains: Vec<CallChain>,
268    /// Number of definitions for the symbol. Not serialized; used for pagination headers.
269    #[serde(skip)]
270    #[schemars(skip)]
271    pub def_count: usize,
272}
273
274/// Analyze a symbol's call graph across a directory with progress tracking.
275#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
276#[allow(clippy::too_many_arguments)]
277pub fn analyze_focused_with_progress(
278    root: &Path,
279    focus: &str,
280    match_mode: SymbolMatchMode,
281    follow_depth: u32,
282    max_depth: Option<u32>,
283    ast_recursion_limit: Option<usize>,
284    progress: Arc<AtomicUsize>,
285    ct: CancellationToken,
286    use_summary: bool,
287) -> Result<FocusedAnalysisOutput, AnalyzeError> {
288    #[allow(clippy::too_many_arguments)]
289    // Check if already cancelled
290    if ct.is_cancelled() {
291        return Err(AnalyzeError::Cancelled);
292    }
293
294    // Check if path is a file (hint to use directory)
295    if root.is_file() {
296        let formatted =
297            "Single-file focus not supported. Please provide a directory path for cross-file call graph analysis.\n"
298                .to_string();
299        return Ok(FocusedAnalysisOutput {
300            formatted,
301            next_cursor: None,
302            prod_chains: vec![],
303            test_chains: vec![],
304            outgoing_chains: vec![],
305            def_count: 0,
306        });
307    }
308
309    // Walk the directory
310    let entries = walk_directory(root, max_depth)?;
311
312    // Collect semantic analysis for all files in parallel
313    let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
314
315    let analysis_results: Vec<(PathBuf, SemanticAnalysis)> = file_entries
316        .par_iter()
317        .filter_map(|entry| {
318            // Check cancellation per file
319            if ct.is_cancelled() {
320                return None;
321            }
322
323            let ext = entry.path.extension().and_then(|e| e.to_str());
324
325            // Try to read file content
326            let source = match std::fs::read_to_string(&entry.path) {
327                Ok(content) => content,
328                Err(_) => {
329                    progress.fetch_add(1, Ordering::Relaxed);
330                    return None;
331                }
332            };
333
334            // Detect language and extract semantic information
335            let language = if let Some(ext_str) = ext {
336                language_from_extension(ext_str)
337                    .map(|l| l.to_string())
338                    .unwrap_or_else(|| "unknown".to_string())
339            } else {
340                "unknown".to_string()
341            };
342
343            match SemanticExtractor::extract(&source, &language, ast_recursion_limit) {
344                Ok(mut semantic) => {
345                    // Populate file path on references
346                    for r in &mut semantic.references {
347                        r.location = entry.path.display().to_string();
348                    }
349                    progress.fetch_add(1, Ordering::Relaxed);
350                    Some((entry.path.clone(), semantic))
351                }
352                Err(_) => {
353                    progress.fetch_add(1, Ordering::Relaxed);
354                    None
355                }
356            }
357        })
358        .collect();
359
360    // Check if cancelled after parallel processing
361    if ct.is_cancelled() {
362        return Err(AnalyzeError::Cancelled);
363    }
364
365    // Build call graph
366    let graph = CallGraph::build_from_results(analysis_results)?;
367
368    // Resolve symbol name using the requested match mode.
369    // Exact mode: check the graph directly without building a sorted set (O(1) lookups).
370    // Fuzzy modes: collect a sorted, deduplicated set of all known symbols for deterministic results.
371    let resolved_focus = if match_mode == SymbolMatchMode::Exact {
372        let exists = graph.definitions.contains_key(focus)
373            || graph.callers.contains_key(focus)
374            || graph.callees.contains_key(focus);
375        if exists {
376            focus.to_string()
377        } else {
378            return Err(crate::graph::GraphError::SymbolNotFound {
379                symbol: focus.to_string(),
380                hint: "Try match_mode=insensitive for a case-insensitive search.".to_string(),
381            }
382            .into());
383        }
384    } else {
385        let all_known: Vec<String> = graph
386            .definitions
387            .keys()
388            .chain(graph.callers.keys())
389            .chain(graph.callees.keys())
390            .cloned()
391            .collect::<std::collections::BTreeSet<_>>()
392            .into_iter()
393            .collect();
394        resolve_symbol(all_known.iter(), focus, &match_mode)?
395    };
396
397    // Compute chain data for pagination (always, regardless of summary mode)
398    let def_count = graph
399        .definitions
400        .get(&resolved_focus)
401        .map_or(0, |d| d.len());
402    let incoming_chains = graph.find_incoming_chains(&resolved_focus, follow_depth)?;
403    let outgoing_chains = graph.find_outgoing_chains(&resolved_focus, follow_depth)?;
404
405    let (prod_chains, test_chains): (Vec<_>, Vec<_>) =
406        incoming_chains.into_iter().partition(|chain| {
407            chain
408                .chain
409                .first()
410                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
411        });
412
413    // Format output
414    let formatted = if use_summary {
415        format_focused_summary(&graph, &resolved_focus, follow_depth, Some(root))?
416    } else {
417        format_focused(&graph, &resolved_focus, follow_depth, Some(root))?
418    };
419
420    Ok(FocusedAnalysisOutput {
421        formatted,
422        next_cursor: None,
423        prod_chains,
424        test_chains,
425        outgoing_chains,
426        def_count,
427    })
428}
429
430/// Analyze a symbol's call graph with use_summary parameter (internal).
431#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
432#[allow(clippy::too_many_arguments)]
433/// Analyze a symbol's call graph across a directory.
434#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
435pub fn analyze_focused(
436    root: &Path,
437    focus: &str,
438    follow_depth: u32,
439    max_depth: Option<u32>,
440    ast_recursion_limit: Option<usize>,
441) -> Result<FocusedAnalysisOutput, AnalyzeError> {
442    let counter = Arc::new(AtomicUsize::new(0));
443    let ct = CancellationToken::new();
444    analyze_focused_with_progress(
445        root,
446        focus,
447        SymbolMatchMode::Exact,
448        follow_depth,
449        max_depth,
450        ast_recursion_limit,
451        counter,
452        ct,
453        false,
454    )
455}
456
457/// Analyze a single file and return a minimal fixed schema (name, line count, language,
458/// functions, imports) for lightweight code understanding.
459#[instrument(skip_all, fields(path))]
460pub fn analyze_module_file(path: &str) -> Result<crate::types::ModuleInfo, AnalyzeError> {
461    let source = std::fs::read_to_string(path)
462        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
463
464    let file_path = Path::new(path);
465    let name = file_path
466        .file_name()
467        .and_then(|s| s.to_str())
468        .unwrap_or("unknown")
469        .to_string();
470
471    let line_count = source.lines().count();
472
473    let language = file_path
474        .extension()
475        .and_then(|e| e.to_str())
476        .and_then(language_from_extension)
477        .ok_or_else(|| {
478            AnalyzeError::Parser(crate::parser::ParserError::ParseError(
479                "unsupported or missing file extension".to_string(),
480            ))
481        })?;
482
483    let semantic = SemanticExtractor::extract(&source, language, None)?;
484
485    let functions = semantic
486        .functions
487        .into_iter()
488        .map(|f| crate::types::ModuleFunctionInfo {
489            name: f.name,
490            line: f.line,
491        })
492        .collect();
493
494    let imports = semantic
495        .imports
496        .into_iter()
497        .map(|i| crate::types::ModuleImportInfo {
498            module: i.module,
499            items: i.items,
500        })
501        .collect();
502
503    Ok(crate::types::ModuleInfo {
504        name,
505        line_count,
506        language: language.to_string(),
507        functions,
508        imports,
509    })
510}