Skip to main content

code_analyze_mcp/
analyze.rs

1//! Main analysis engine for extracting code structure from files and directories.
2//!
3//! Implements the three analysis modes: Overview (directory structure), FileDetails (semantic extraction),
4//! and SymbolFocus (call graph analysis). Handles parallel processing and cancellation.
5
6use crate::formatter::{
7    format_file_details, format_focused, format_focused_summary, format_structure,
8};
9use crate::graph::{CallChain, CallGraph};
10use crate::lang::language_from_extension;
11use crate::parser::{ElementExtractor, SemanticExtractor};
12use crate::test_detection::is_test_file;
13use crate::traversal::{WalkEntry, walk_directory};
14use crate::types::{AnalysisMode, FileInfo, SemanticAnalysis};
15use rayon::prelude::*;
16use schemars::JsonSchema;
17use serde::Serialize;
18use std::path::{Path, PathBuf};
19use std::sync::Arc;
20use std::sync::atomic::{AtomicUsize, Ordering};
21use std::time::Instant;
22use thiserror::Error;
23use tokio_util::sync::CancellationToken;
24use tracing::instrument;
25
26#[derive(Debug, Error)]
27pub enum AnalyzeError {
28    #[error("Traversal error: {0}")]
29    Traversal(#[from] crate::traversal::TraversalError),
30    #[error("Parser error: {0}")]
31    Parser(#[from] crate::parser::ParserError),
32    #[error("Graph error: {0}")]
33    Graph(#[from] crate::graph::GraphError),
34    #[error("Formatter error: {0}")]
35    Formatter(#[from] crate::formatter::FormatterError),
36    #[error("Analysis cancelled")]
37    Cancelled,
38}
39
40/// Result of directory analysis containing both formatted output and file data.
41#[derive(Debug, Serialize, JsonSchema)]
42pub struct AnalysisOutput {
43    #[schemars(description = "Formatted text representation of the analysis")]
44    pub formatted: String,
45    #[schemars(description = "List of files analyzed in the directory")]
46    pub files: Vec<FileInfo>,
47    /// Walk entries used internally for summary generation; not serialized.
48    #[serde(skip)]
49    #[schemars(skip)]
50    pub entries: Vec<WalkEntry>,
51    #[serde(skip_serializing_if = "Option::is_none")]
52    #[schemars(
53        description = "Opaque cursor token for the next page of results (absent when no more results)"
54    )]
55    pub next_cursor: Option<String>,
56}
57
58/// Result of file-level semantic analysis.
59#[derive(Debug, Clone, Serialize, JsonSchema)]
60pub struct FileAnalysisOutput {
61    #[schemars(description = "Formatted text representation of the analysis")]
62    pub formatted: String,
63    #[schemars(description = "Semantic analysis data including functions, classes, and imports")]
64    pub semantic: SemanticAnalysis,
65    #[schemars(description = "Total line count of the analyzed file")]
66    pub line_count: usize,
67    #[serde(skip_serializing_if = "Option::is_none")]
68    #[schemars(
69        description = "Opaque cursor token for the next page of results (absent when no more results)"
70    )]
71    pub next_cursor: Option<String>,
72}
73
74/// Analyze a directory structure with progress tracking.
75#[instrument(skip_all, fields(path = %root.display()))]
76pub fn analyze_directory_with_progress(
77    root: &Path,
78    entries: Vec<WalkEntry>,
79    progress: Arc<AtomicUsize>,
80    ct: CancellationToken,
81) -> Result<AnalysisOutput, AnalyzeError> {
82    // Check if already cancelled
83    if ct.is_cancelled() {
84        return Err(AnalyzeError::Cancelled);
85    }
86
87    // Detect language from file extension
88    let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
89
90    let start = Instant::now();
91    tracing::debug!(file_count = file_entries.len(), root = %root.display(), "analysis start");
92
93    // Parallel analysis of files
94    let analysis_results: Vec<FileInfo> = file_entries
95        .par_iter()
96        .filter_map(|entry| {
97            // Check cancellation per file
98            if ct.is_cancelled() {
99                return None;
100            }
101
102            let path_str = entry.path.display().to_string();
103
104            // Detect language from extension
105            let ext = entry.path.extension().and_then(|e| e.to_str());
106
107            // Try to read file content
108            let source = match std::fs::read_to_string(&entry.path) {
109                Ok(content) => content,
110                Err(_) => {
111                    // Binary file or unreadable - exclude from output
112                    progress.fetch_add(1, Ordering::Relaxed);
113                    return None;
114                }
115            };
116
117            // Count lines
118            let line_count = source.lines().count();
119
120            // Detect language and extract counts
121            let (language, function_count, class_count) = if let Some(ext_str) = ext {
122                if let Some(lang) = language_from_extension(ext_str) {
123                    let lang_str = lang.to_string();
124                    match ElementExtractor::extract_with_depth(&source, &lang_str) {
125                        Ok((func_count, class_count)) => (lang_str, func_count, class_count),
126                        Err(_) => (lang_str, 0, 0),
127                    }
128                } else {
129                    ("unknown".to_string(), 0, 0)
130                }
131            } else {
132                ("unknown".to_string(), 0, 0)
133            };
134
135            progress.fetch_add(1, Ordering::Relaxed);
136
137            let is_test = is_test_file(&entry.path);
138
139            Some(FileInfo {
140                path: path_str,
141                line_count,
142                function_count,
143                class_count,
144                language,
145                is_test,
146            })
147        })
148        .collect();
149
150    // Check if cancelled after parallel processing
151    if ct.is_cancelled() {
152        return Err(AnalyzeError::Cancelled);
153    }
154
155    tracing::debug!(
156        file_count = file_entries.len(),
157        duration_ms = start.elapsed().as_millis() as u64,
158        "analysis complete"
159    );
160
161    // Format output
162    let formatted = format_structure(&entries, &analysis_results, None, Some(root));
163
164    Ok(AnalysisOutput {
165        formatted,
166        files: analysis_results,
167        entries,
168        next_cursor: None,
169    })
170}
171
172/// Analyze a directory structure and return formatted output and file data.
173#[instrument(skip_all, fields(path = %root.display()))]
174pub fn analyze_directory(
175    root: &Path,
176    max_depth: Option<u32>,
177) -> Result<AnalysisOutput, AnalyzeError> {
178    let entries = walk_directory(root, max_depth)?;
179    let counter = Arc::new(AtomicUsize::new(0));
180    let ct = CancellationToken::new();
181    analyze_directory_with_progress(root, entries, counter, ct)
182}
183
184/// Determine analysis mode based on parameters and path.
185pub fn determine_mode(path: &str, focus: Option<&str>) -> AnalysisMode {
186    if focus.is_some() {
187        return AnalysisMode::SymbolFocus;
188    }
189
190    let path_obj = Path::new(path);
191    if path_obj.is_dir() {
192        AnalysisMode::Overview
193    } else {
194        AnalysisMode::FileDetails
195    }
196}
197
198/// Analyze a single file and return semantic analysis with formatted output.
199#[instrument(skip_all, fields(path))]
200pub fn analyze_file(
201    path: &str,
202    ast_recursion_limit: Option<usize>,
203) -> Result<FileAnalysisOutput, AnalyzeError> {
204    let start = Instant::now();
205    let source = std::fs::read_to_string(path)
206        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
207
208    let line_count = source.lines().count();
209
210    // Detect language from extension
211    let ext = Path::new(path)
212        .extension()
213        .and_then(|e| e.to_str())
214        .and_then(language_from_extension)
215        .map(|l| l.to_string())
216        .unwrap_or_else(|| "unknown".to_string());
217
218    // Extract semantic information
219    let mut semantic = SemanticExtractor::extract(&source, &ext, ast_recursion_limit)?;
220
221    // Populate the file path on references now that the path is known
222    for r in &mut semantic.references {
223        r.location = path.to_string();
224    }
225
226    // Detect if this is a test file
227    let is_test = is_test_file(Path::new(path));
228
229    // Extract parent directory for relative path display
230    let parent_dir = Path::new(path).parent();
231
232    // Format output
233    let formatted = format_file_details(path, &semantic, line_count, is_test, parent_dir);
234
235    tracing::debug!(path = %path, language = %ext, functions = semantic.functions.len(), classes = semantic.classes.len(), imports = semantic.imports.len(), duration_ms = start.elapsed().as_millis() as u64, "file analysis complete");
236
237    Ok(FileAnalysisOutput {
238        formatted,
239        semantic,
240        line_count,
241        next_cursor: None,
242    })
243}
244
245/// Result of focused symbol analysis.
246#[derive(Debug, Serialize, JsonSchema)]
247pub struct FocusedAnalysisOutput {
248    #[schemars(description = "Formatted text representation of the call graph analysis")]
249    pub formatted: String,
250    #[serde(skip_serializing_if = "Option::is_none")]
251    #[schemars(
252        description = "Opaque cursor token for the next page of results (absent when no more results)"
253    )]
254    pub next_cursor: Option<String>,
255    /// Production caller chains (partitioned from incoming chains, excluding test callers).
256    /// Not serialized; used for pagination in lib.rs.
257    #[serde(skip)]
258    #[schemars(skip)]
259    pub prod_chains: Vec<CallChain>,
260    /// Test caller chains. Not serialized; used for pagination summary in lib.rs.
261    #[serde(skip)]
262    #[schemars(skip)]
263    pub test_chains: Vec<CallChain>,
264    /// Outgoing (callee) chains. Not serialized; used for pagination in lib.rs.
265    #[serde(skip)]
266    #[schemars(skip)]
267    pub outgoing_chains: Vec<CallChain>,
268    /// Number of definitions for the symbol. Not serialized; used for pagination headers.
269    #[serde(skip)]
270    #[schemars(skip)]
271    pub def_count: usize,
272}
273
274/// Analyze a symbol's call graph across a directory with progress tracking.
275#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
276#[allow(clippy::too_many_arguments)]
277pub fn analyze_focused_with_progress(
278    root: &Path,
279    focus: &str,
280    follow_depth: u32,
281    max_depth: Option<u32>,
282    ast_recursion_limit: Option<usize>,
283    progress: Arc<AtomicUsize>,
284    ct: CancellationToken,
285    use_summary: bool,
286) -> Result<FocusedAnalysisOutput, AnalyzeError> {
287    #[allow(clippy::too_many_arguments)]
288    // Check if already cancelled
289    if ct.is_cancelled() {
290        return Err(AnalyzeError::Cancelled);
291    }
292
293    // Check if path is a file (hint to use directory)
294    if root.is_file() {
295        let formatted =
296            "Single-file focus not supported. Please provide a directory path for cross-file call graph analysis.\n"
297                .to_string();
298        return Ok(FocusedAnalysisOutput {
299            formatted,
300            next_cursor: None,
301            prod_chains: vec![],
302            test_chains: vec![],
303            outgoing_chains: vec![],
304            def_count: 0,
305        });
306    }
307
308    // Walk the directory
309    let entries = walk_directory(root, max_depth)?;
310
311    // Collect semantic analysis for all files in parallel
312    let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
313
314    let analysis_results: Vec<(PathBuf, SemanticAnalysis)> = file_entries
315        .par_iter()
316        .filter_map(|entry| {
317            // Check cancellation per file
318            if ct.is_cancelled() {
319                return None;
320            }
321
322            let ext = entry.path.extension().and_then(|e| e.to_str());
323
324            // Try to read file content
325            let source = match std::fs::read_to_string(&entry.path) {
326                Ok(content) => content,
327                Err(_) => {
328                    progress.fetch_add(1, Ordering::Relaxed);
329                    return None;
330                }
331            };
332
333            // Detect language and extract semantic information
334            let language = if let Some(ext_str) = ext {
335                language_from_extension(ext_str)
336                    .map(|l| l.to_string())
337                    .unwrap_or_else(|| "unknown".to_string())
338            } else {
339                "unknown".to_string()
340            };
341
342            match SemanticExtractor::extract(&source, &language, ast_recursion_limit) {
343                Ok(mut semantic) => {
344                    // Populate file path on references
345                    for r in &mut semantic.references {
346                        r.location = entry.path.display().to_string();
347                    }
348                    progress.fetch_add(1, Ordering::Relaxed);
349                    Some((entry.path.clone(), semantic))
350                }
351                Err(_) => {
352                    progress.fetch_add(1, Ordering::Relaxed);
353                    None
354                }
355            }
356        })
357        .collect();
358
359    // Check if cancelled after parallel processing
360    if ct.is_cancelled() {
361        return Err(AnalyzeError::Cancelled);
362    }
363
364    // Build call graph
365    let graph = CallGraph::build_from_results(analysis_results)?;
366
367    // Compute chain data for pagination (always, regardless of summary mode)
368    let def_count = graph.definitions.get(focus).map_or(0, |d| d.len());
369    let incoming_chains = graph.find_incoming_chains(focus, follow_depth)?;
370    let outgoing_chains = graph.find_outgoing_chains(focus, follow_depth)?;
371
372    let (prod_chains, test_chains): (Vec<_>, Vec<_>) =
373        incoming_chains.into_iter().partition(|chain| {
374            chain
375                .chain
376                .first()
377                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
378        });
379
380    // Format output
381    let formatted = if use_summary {
382        format_focused_summary(&graph, focus, follow_depth, Some(root))?
383    } else {
384        format_focused(&graph, focus, follow_depth, Some(root))?
385    };
386
387    Ok(FocusedAnalysisOutput {
388        formatted,
389        next_cursor: None,
390        prod_chains,
391        test_chains,
392        outgoing_chains,
393        def_count,
394    })
395}
396
397/// Analyze a symbol's call graph with use_summary parameter (internal).
398#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
399#[allow(clippy::too_many_arguments)]
400/// Analyze a symbol's call graph across a directory.
401#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
402pub fn analyze_focused(
403    root: &Path,
404    focus: &str,
405    follow_depth: u32,
406    max_depth: Option<u32>,
407    ast_recursion_limit: Option<usize>,
408) -> Result<FocusedAnalysisOutput, AnalyzeError> {
409    let counter = Arc::new(AtomicUsize::new(0));
410    let ct = CancellationToken::new();
411    analyze_focused_with_progress(
412        root,
413        focus,
414        follow_depth,
415        max_depth,
416        ast_recursion_limit,
417        counter,
418        ct,
419        false,
420    )
421}