Skip to main content

code_analyze_mcp/
analyze.rs

1use crate::dataflow::DataflowGraph;
2use crate::formatter::{
3    format_file_details, format_focused, format_focused_summary, format_structure,
4};
5use crate::graph::{CallChain, CallGraph};
6use crate::lang::language_from_extension;
7use crate::parser::{ElementExtractor, SemanticExtractor};
8use crate::test_detection::is_test_file;
9use crate::traversal::{WalkEntry, walk_directory};
10use crate::types::{AnalysisMode, FileInfo, SemanticAnalysis};
11use rayon::prelude::*;
12use schemars::JsonSchema;
13use serde::Serialize;
14use std::path::{Path, PathBuf};
15use std::sync::Arc;
16use std::sync::atomic::{AtomicUsize, Ordering};
17use std::time::Instant;
18use thiserror::Error;
19use tokio_util::sync::CancellationToken;
20use tracing::instrument;
21
22#[derive(Debug, Error)]
23pub enum AnalyzeError {
24    #[error("Traversal error: {0}")]
25    Traversal(#[from] crate::traversal::TraversalError),
26    #[error("Parser error: {0}")]
27    Parser(#[from] crate::parser::ParserError),
28    #[error("Graph error: {0}")]
29    Graph(#[from] crate::graph::GraphError),
30    #[error("Formatter error: {0}")]
31    Formatter(#[from] crate::formatter::FormatterError),
32    #[error("Analysis cancelled")]
33    Cancelled,
34}
35
36/// Result of directory analysis containing both formatted output and file data.
37#[derive(Debug, Serialize, JsonSchema)]
38pub struct AnalysisOutput {
39    #[schemars(description = "Formatted text representation of the analysis")]
40    pub formatted: String,
41    #[schemars(description = "List of files analyzed in the directory")]
42    pub files: Vec<FileInfo>,
43    /// Walk entries used internally for summary generation; not serialized.
44    #[serde(skip)]
45    #[schemars(skip)]
46    pub entries: Vec<WalkEntry>,
47    #[serde(skip_serializing_if = "Option::is_none")]
48    #[schemars(
49        description = "Opaque cursor token for the next page of results (absent when no more results)"
50    )]
51    pub next_cursor: Option<String>,
52}
53
54/// Result of file-level semantic analysis.
55#[derive(Debug, Clone, Serialize, JsonSchema)]
56pub struct FileAnalysisOutput {
57    #[schemars(description = "Formatted text representation of the analysis")]
58    pub formatted: String,
59    #[schemars(description = "Semantic analysis data including functions, classes, and imports")]
60    pub semantic: SemanticAnalysis,
61    #[schemars(description = "Total line count of the analyzed file")]
62    pub line_count: usize,
63    #[serde(skip_serializing_if = "Option::is_none")]
64    #[schemars(
65        description = "Opaque cursor token for the next page of results (absent when no more results)"
66    )]
67    pub next_cursor: Option<String>,
68}
69
70/// Analyze a directory structure with progress tracking.
71#[instrument(skip_all, fields(path = %root.display()))]
72pub fn analyze_directory_with_progress(
73    root: &Path,
74    max_depth: Option<u32>,
75    progress: Arc<AtomicUsize>,
76    ct: CancellationToken,
77) -> Result<AnalysisOutput, AnalyzeError> {
78    // Check if already cancelled
79    if ct.is_cancelled() {
80        return Err(AnalyzeError::Cancelled);
81    }
82
83    // Walk the directory
84    let entries = walk_directory(root, max_depth)?;
85
86    // Detect language from file extension
87    let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
88
89    let start = Instant::now();
90    tracing::debug!(file_count = file_entries.len(), root = %root.display(), "analysis start");
91
92    // Parallel analysis of files
93    let analysis_results: Vec<FileInfo> = file_entries
94        .par_iter()
95        .filter_map(|entry| {
96            // Check cancellation per file
97            if ct.is_cancelled() {
98                return None;
99            }
100
101            let path_str = entry.path.display().to_string();
102
103            // Detect language from extension
104            let ext = entry.path.extension().and_then(|e| e.to_str());
105
106            // Try to read file content
107            let source = match std::fs::read_to_string(&entry.path) {
108                Ok(content) => content,
109                Err(_) => {
110                    // Binary file or unreadable - exclude from output
111                    progress.fetch_add(1, Ordering::Relaxed);
112                    return None;
113                }
114            };
115
116            // Count lines
117            let line_count = source.lines().count();
118
119            // Detect language and extract counts
120            let (language, function_count, class_count) = if let Some(ext_str) = ext {
121                if let Some(lang) = language_from_extension(ext_str) {
122                    let lang_str = lang.to_string();
123                    match ElementExtractor::extract_with_depth(&source, &lang_str) {
124                        Ok((func_count, class_count)) => (lang_str, func_count, class_count),
125                        Err(_) => (lang_str, 0, 0),
126                    }
127                } else {
128                    ("unknown".to_string(), 0, 0)
129                }
130            } else {
131                ("unknown".to_string(), 0, 0)
132            };
133
134            progress.fetch_add(1, Ordering::Relaxed);
135
136            let is_test = is_test_file(&entry.path);
137
138            Some(FileInfo {
139                path: path_str,
140                line_count,
141                function_count,
142                class_count,
143                language,
144                is_test,
145            })
146        })
147        .collect();
148
149    // Check if cancelled after parallel processing
150    if ct.is_cancelled() {
151        return Err(AnalyzeError::Cancelled);
152    }
153
154    tracing::debug!(
155        file_count = file_entries.len(),
156        duration_ms = start.elapsed().as_millis() as u64,
157        "analysis complete"
158    );
159
160    // Format output
161    let formatted = format_structure(&entries, &analysis_results, max_depth, Some(root));
162
163    Ok(AnalysisOutput {
164        formatted,
165        files: analysis_results,
166        entries,
167        next_cursor: None,
168    })
169}
170
171/// Analyze a directory structure and return formatted output and file data.
172#[instrument(skip_all, fields(path = %root.display()))]
173pub fn analyze_directory(
174    root: &Path,
175    max_depth: Option<u32>,
176) -> Result<AnalysisOutput, AnalyzeError> {
177    let counter = Arc::new(AtomicUsize::new(0));
178    let ct = CancellationToken::new();
179    analyze_directory_with_progress(root, max_depth, counter, ct)
180}
181
182/// Determine analysis mode based on parameters and path.
183pub fn determine_mode(path: &str, focus: Option<&str>) -> AnalysisMode {
184    if focus.is_some() {
185        return AnalysisMode::SymbolFocus;
186    }
187
188    let path_obj = Path::new(path);
189    if path_obj.is_dir() {
190        AnalysisMode::Overview
191    } else {
192        AnalysisMode::FileDetails
193    }
194}
195
196/// Analyze a single file and return semantic analysis with formatted output.
197#[instrument(skip_all, fields(path))]
198pub fn analyze_file(
199    path: &str,
200    ast_recursion_limit: Option<usize>,
201) -> Result<FileAnalysisOutput, AnalyzeError> {
202    let start = Instant::now();
203    let source = std::fs::read_to_string(path)
204        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
205
206    let line_count = source.lines().count();
207
208    // Detect language from extension
209    let ext = Path::new(path)
210        .extension()
211        .and_then(|e| e.to_str())
212        .and_then(language_from_extension)
213        .map(|l| l.to_string())
214        .unwrap_or_else(|| "unknown".to_string());
215
216    // Extract semantic information
217    let mut semantic = SemanticExtractor::extract(&source, &ext, ast_recursion_limit)?;
218
219    // Populate the file path on references now that the path is known
220    for r in &mut semantic.references {
221        r.location = path.to_string();
222    }
223
224    // Detect if this is a test file
225    let is_test = is_test_file(Path::new(path));
226
227    // Extract parent directory for relative path display
228    let parent_dir = Path::new(path).parent();
229
230    // Format output
231    let formatted = format_file_details(path, &semantic, line_count, is_test, parent_dir);
232
233    tracing::debug!(path = %path, language = %ext, functions = semantic.functions.len(), classes = semantic.classes.len(), imports = semantic.imports.len(), duration_ms = start.elapsed().as_millis() as u64, "file analysis complete");
234
235    Ok(FileAnalysisOutput {
236        formatted,
237        semantic,
238        line_count,
239        next_cursor: None,
240    })
241}
242
243/// Result of focused symbol analysis.
244#[derive(Debug, Serialize, JsonSchema)]
245pub struct FocusedAnalysisOutput {
246    #[schemars(description = "Formatted text representation of the call graph analysis")]
247    pub formatted: String,
248    #[serde(skip_serializing_if = "Option::is_none")]
249    #[schemars(
250        description = "Opaque cursor token for the next page of results (absent when no more results)"
251    )]
252    pub next_cursor: Option<String>,
253    /// Production caller chains (partitioned from incoming chains, excluding test callers).
254    /// Not serialized; used for pagination in lib.rs.
255    #[serde(skip)]
256    #[schemars(skip)]
257    pub prod_chains: Vec<CallChain>,
258    /// Test caller chains. Not serialized; used for pagination summary in lib.rs.
259    #[serde(skip)]
260    #[schemars(skip)]
261    pub test_chains: Vec<CallChain>,
262    /// Outgoing (callee) chains. Not serialized; used for pagination in lib.rs.
263    #[serde(skip)]
264    #[schemars(skip)]
265    pub outgoing_chains: Vec<CallChain>,
266    /// Number of definitions for the symbol. Not serialized; used for pagination headers.
267    #[serde(skip)]
268    #[schemars(skip)]
269    pub def_count: usize,
270}
271
272/// Analyze a symbol's call graph across a directory with progress tracking.
273#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
274#[allow(clippy::too_many_arguments)]
275pub fn analyze_focused_with_progress(
276    root: &Path,
277    focus: &str,
278    follow_depth: u32,
279    max_depth: Option<u32>,
280    ast_recursion_limit: Option<usize>,
281    progress: Arc<AtomicUsize>,
282    ct: CancellationToken,
283    use_summary: bool,
284) -> Result<FocusedAnalysisOutput, AnalyzeError> {
285    #[allow(clippy::too_many_arguments)]
286    // Check if already cancelled
287    if ct.is_cancelled() {
288        return Err(AnalyzeError::Cancelled);
289    }
290
291    // Check if path is a file (hint to use directory)
292    if root.is_file() {
293        let formatted =
294            "Single-file focus not supported. Please provide a directory path for cross-file call graph analysis.\n"
295                .to_string();
296        return Ok(FocusedAnalysisOutput {
297            formatted,
298            next_cursor: None,
299            prod_chains: vec![],
300            test_chains: vec![],
301            outgoing_chains: vec![],
302            def_count: 0,
303        });
304    }
305
306    // Walk the directory
307    let entries = walk_directory(root, max_depth)?;
308
309    // Collect semantic analysis for all files in parallel
310    let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
311
312    let analysis_results: Vec<(PathBuf, SemanticAnalysis)> = file_entries
313        .par_iter()
314        .filter_map(|entry| {
315            // Check cancellation per file
316            if ct.is_cancelled() {
317                return None;
318            }
319
320            let ext = entry.path.extension().and_then(|e| e.to_str());
321
322            // Try to read file content
323            let source = match std::fs::read_to_string(&entry.path) {
324                Ok(content) => content,
325                Err(_) => {
326                    progress.fetch_add(1, Ordering::Relaxed);
327                    return None;
328                }
329            };
330
331            // Detect language and extract semantic information
332            let language = if let Some(ext_str) = ext {
333                language_from_extension(ext_str)
334                    .map(|l| l.to_string())
335                    .unwrap_or_else(|| "unknown".to_string())
336            } else {
337                "unknown".to_string()
338            };
339
340            match SemanticExtractor::extract(&source, &language, ast_recursion_limit) {
341                Ok(mut semantic) => {
342                    // Populate file path on references
343                    for r in &mut semantic.references {
344                        r.location = entry.path.display().to_string();
345                    }
346                    progress.fetch_add(1, Ordering::Relaxed);
347                    Some((entry.path.clone(), semantic))
348                }
349                Err(_) => {
350                    progress.fetch_add(1, Ordering::Relaxed);
351                    None
352                }
353            }
354        })
355        .collect();
356
357    // Check if cancelled after parallel processing
358    if ct.is_cancelled() {
359        return Err(AnalyzeError::Cancelled);
360    }
361
362    // Build call graph
363    let dataflow = DataflowGraph::build_from_results(&analysis_results);
364    let graph = CallGraph::build_from_results(analysis_results)?;
365
366    // Compute chain data for pagination (always, regardless of summary mode)
367    let def_count = graph.definitions.get(focus).map_or(0, |d| d.len());
368    let incoming_chains = graph.find_incoming_chains(focus, follow_depth)?;
369    let outgoing_chains = graph.find_outgoing_chains(focus, follow_depth)?;
370
371    let (prod_chains, test_chains): (Vec<_>, Vec<_>) =
372        incoming_chains.into_iter().partition(|chain| {
373            chain
374                .chain
375                .first()
376                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
377        });
378
379    // Format output
380    let formatted = if use_summary {
381        format_focused_summary(&graph, &dataflow, focus, follow_depth, Some(root))?
382    } else {
383        format_focused(&graph, &dataflow, focus, follow_depth, Some(root))?
384    };
385
386    Ok(FocusedAnalysisOutput {
387        formatted,
388        next_cursor: None,
389        prod_chains,
390        test_chains,
391        outgoing_chains,
392        def_count,
393    })
394}
395
396/// Analyze a symbol's call graph with use_summary parameter (internal).
397#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
398#[allow(clippy::too_many_arguments)]
399/// Analyze a symbol's call graph across a directory.
400#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
401pub fn analyze_focused(
402    root: &Path,
403    focus: &str,
404    follow_depth: u32,
405    max_depth: Option<u32>,
406    ast_recursion_limit: Option<usize>,
407) -> Result<FocusedAnalysisOutput, AnalyzeError> {
408    let counter = Arc::new(AtomicUsize::new(0));
409    let ct = CancellationToken::new();
410    analyze_focused_with_progress(
411        root,
412        focus,
413        follow_depth,
414        max_depth,
415        ast_recursion_limit,
416        counter,
417        ct,
418        false,
419    )
420}