1use crate::formatter::{
7 format_file_details, format_focused, format_focused_summary, format_structure,
8};
9use crate::graph::{CallChain, CallGraph, resolve_symbol};
10use crate::lang::language_from_extension;
11use crate::parser::{ElementExtractor, SemanticExtractor};
12use crate::test_detection::is_test_file;
13use crate::traversal::{WalkEntry, walk_directory};
14use crate::types::{AnalysisMode, FileInfo, ImportInfo, SemanticAnalysis, SymbolMatchMode};
15use rayon::prelude::*;
16use schemars::JsonSchema;
17use serde::Serialize;
18use std::path::{Path, PathBuf};
19use std::sync::Arc;
20use std::sync::atomic::{AtomicUsize, Ordering};
21use std::time::Instant;
22use thiserror::Error;
23use tokio_util::sync::CancellationToken;
24use tracing::instrument;
25
26#[derive(Debug, Error)]
27pub enum AnalyzeError {
28 #[error("Traversal error: {0}")]
29 Traversal(#[from] crate::traversal::TraversalError),
30 #[error("Parser error: {0}")]
31 Parser(#[from] crate::parser::ParserError),
32 #[error("Graph error: {0}")]
33 Graph(#[from] crate::graph::GraphError),
34 #[error("Formatter error: {0}")]
35 Formatter(#[from] crate::formatter::FormatterError),
36 #[error("Analysis cancelled")]
37 Cancelled,
38}
39
40#[derive(Debug, Serialize, JsonSchema)]
42pub struct AnalysisOutput {
43 #[schemars(description = "Formatted text representation of the analysis")]
44 pub formatted: String,
45 #[schemars(description = "List of files analyzed in the directory")]
46 pub files: Vec<FileInfo>,
47 #[serde(skip)]
49 #[schemars(skip)]
50 pub entries: Vec<WalkEntry>,
51 #[serde(skip_serializing_if = "Option::is_none")]
52 #[schemars(
53 description = "Opaque cursor token for the next page of results (absent when no more results)"
54 )]
55 pub next_cursor: Option<String>,
56}
57
58#[derive(Debug, Clone, Serialize, JsonSchema)]
60pub struct FileAnalysisOutput {
61 #[schemars(description = "Formatted text representation of the analysis")]
62 pub formatted: String,
63 #[schemars(description = "Semantic analysis data including functions, classes, and imports")]
64 pub semantic: SemanticAnalysis,
65 #[schemars(description = "Total line count of the analyzed file")]
66 #[schemars(schema_with = "crate::schema_helpers::integer_schema")]
67 pub line_count: usize,
68 #[serde(skip_serializing_if = "Option::is_none")]
69 #[schemars(
70 description = "Opaque cursor token for the next page of results (absent when no more results)"
71 )]
72 pub next_cursor: Option<String>,
73}
74
75#[instrument(skip_all, fields(path = %root.display()))]
77pub fn analyze_directory_with_progress(
78 root: &Path,
79 entries: Vec<WalkEntry>,
80 progress: Arc<AtomicUsize>,
81 ct: CancellationToken,
82) -> Result<AnalysisOutput, AnalyzeError> {
83 if ct.is_cancelled() {
85 return Err(AnalyzeError::Cancelled);
86 }
87
88 let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
90
91 let start = Instant::now();
92 tracing::debug!(file_count = file_entries.len(), root = %root.display(), "analysis start");
93
94 let analysis_results: Vec<FileInfo> = file_entries
96 .par_iter()
97 .filter_map(|entry| {
98 if ct.is_cancelled() {
100 return None;
101 }
102
103 let path_str = entry.path.display().to_string();
104
105 let ext = entry.path.extension().and_then(|e| e.to_str());
107
108 let source = match std::fs::read_to_string(&entry.path) {
110 Ok(content) => content,
111 Err(_) => {
112 progress.fetch_add(1, Ordering::Relaxed);
114 return None;
115 }
116 };
117
118 let line_count = source.lines().count();
120
121 let (language, function_count, class_count) = if let Some(ext_str) = ext {
123 if let Some(lang) = language_from_extension(ext_str) {
124 let lang_str = lang.to_string();
125 match ElementExtractor::extract_with_depth(&source, &lang_str) {
126 Ok((func_count, class_count)) => (lang_str, func_count, class_count),
127 Err(_) => (lang_str, 0, 0),
128 }
129 } else {
130 ("unknown".to_string(), 0, 0)
131 }
132 } else {
133 ("unknown".to_string(), 0, 0)
134 };
135
136 progress.fetch_add(1, Ordering::Relaxed);
137
138 let is_test = is_test_file(&entry.path);
139
140 Some(FileInfo {
141 path: path_str,
142 line_count,
143 function_count,
144 class_count,
145 language,
146 is_test,
147 })
148 })
149 .collect();
150
151 if ct.is_cancelled() {
153 return Err(AnalyzeError::Cancelled);
154 }
155
156 tracing::debug!(
157 file_count = file_entries.len(),
158 duration_ms = start.elapsed().as_millis() as u64,
159 "analysis complete"
160 );
161
162 let formatted = format_structure(&entries, &analysis_results, None, Some(root));
164
165 Ok(AnalysisOutput {
166 formatted,
167 files: analysis_results,
168 entries,
169 next_cursor: None,
170 })
171}
172
173#[instrument(skip_all, fields(path = %root.display()))]
175pub fn analyze_directory(
176 root: &Path,
177 max_depth: Option<u32>,
178) -> Result<AnalysisOutput, AnalyzeError> {
179 let entries = walk_directory(root, max_depth)?;
180 let counter = Arc::new(AtomicUsize::new(0));
181 let ct = CancellationToken::new();
182 analyze_directory_with_progress(root, entries, counter, ct)
183}
184
185pub fn determine_mode(path: &str, focus: Option<&str>) -> AnalysisMode {
187 if focus.is_some() {
188 return AnalysisMode::SymbolFocus;
189 }
190
191 let path_obj = Path::new(path);
192 if path_obj.is_dir() {
193 AnalysisMode::Overview
194 } else {
195 AnalysisMode::FileDetails
196 }
197}
198
199#[instrument(skip_all, fields(path))]
201pub fn analyze_file(
202 path: &str,
203 ast_recursion_limit: Option<usize>,
204) -> Result<FileAnalysisOutput, AnalyzeError> {
205 let start = Instant::now();
206 let source = std::fs::read_to_string(path)
207 .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
208
209 let line_count = source.lines().count();
210
211 let ext = Path::new(path)
213 .extension()
214 .and_then(|e| e.to_str())
215 .and_then(language_from_extension)
216 .map(|l| l.to_string())
217 .unwrap_or_else(|| "unknown".to_string());
218
219 let mut semantic = SemanticExtractor::extract(&source, &ext, ast_recursion_limit)?;
221
222 for r in &mut semantic.references {
224 r.location = path.to_string();
225 }
226
227 if ext == "python" {
229 resolve_wildcard_imports(Path::new(path), &mut semantic.imports);
230 }
231
232 let is_test = is_test_file(Path::new(path));
234
235 let parent_dir = Path::new(path).parent();
237
238 let formatted = format_file_details(path, &semantic, line_count, is_test, parent_dir);
240
241 tracing::debug!(path = %path, language = %ext, functions = semantic.functions.len(), classes = semantic.classes.len(), imports = semantic.imports.len(), duration_ms = start.elapsed().as_millis() as u64, "file analysis complete");
242
243 Ok(FileAnalysisOutput {
244 formatted,
245 semantic,
246 line_count,
247 next_cursor: None,
248 })
249}
250
251#[derive(Debug, Serialize, JsonSchema)]
253pub struct FocusedAnalysisOutput {
254 #[schemars(description = "Formatted text representation of the call graph analysis")]
255 pub formatted: String,
256 #[serde(skip_serializing_if = "Option::is_none")]
257 #[schemars(
258 description = "Opaque cursor token for the next page of results (absent when no more results)"
259 )]
260 pub next_cursor: Option<String>,
261 #[serde(skip)]
264 #[schemars(skip)]
265 pub prod_chains: Vec<CallChain>,
266 #[serde(skip)]
268 #[schemars(skip)]
269 pub test_chains: Vec<CallChain>,
270 #[serde(skip)]
272 #[schemars(skip)]
273 pub outgoing_chains: Vec<CallChain>,
274 #[serde(skip)]
276 #[schemars(skip)]
277 pub def_count: usize,
278}
279
280#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
282#[allow(clippy::too_many_arguments)]
283pub fn analyze_focused_with_progress(
284 root: &Path,
285 focus: &str,
286 match_mode: SymbolMatchMode,
287 follow_depth: u32,
288 max_depth: Option<u32>,
289 ast_recursion_limit: Option<usize>,
290 progress: Arc<AtomicUsize>,
291 ct: CancellationToken,
292 use_summary: bool,
293) -> Result<FocusedAnalysisOutput, AnalyzeError> {
294 #[allow(clippy::too_many_arguments)]
295 if ct.is_cancelled() {
297 return Err(AnalyzeError::Cancelled);
298 }
299
300 if root.is_file() {
302 let formatted =
303 "Single-file focus not supported. Please provide a directory path for cross-file call graph analysis.\n"
304 .to_string();
305 return Ok(FocusedAnalysisOutput {
306 formatted,
307 next_cursor: None,
308 prod_chains: vec![],
309 test_chains: vec![],
310 outgoing_chains: vec![],
311 def_count: 0,
312 });
313 }
314
315 let entries = walk_directory(root, max_depth)?;
317
318 let file_entries: Vec<&WalkEntry> = entries.iter().filter(|e| !e.is_dir).collect();
320
321 let analysis_results: Vec<(PathBuf, SemanticAnalysis)> = file_entries
322 .par_iter()
323 .filter_map(|entry| {
324 if ct.is_cancelled() {
326 return None;
327 }
328
329 let ext = entry.path.extension().and_then(|e| e.to_str());
330
331 let source = match std::fs::read_to_string(&entry.path) {
333 Ok(content) => content,
334 Err(_) => {
335 progress.fetch_add(1, Ordering::Relaxed);
336 return None;
337 }
338 };
339
340 let language = if let Some(ext_str) = ext {
342 language_from_extension(ext_str)
343 .map(|l| l.to_string())
344 .unwrap_or_else(|| "unknown".to_string())
345 } else {
346 "unknown".to_string()
347 };
348
349 match SemanticExtractor::extract(&source, &language, ast_recursion_limit) {
350 Ok(mut semantic) => {
351 for r in &mut semantic.references {
353 r.location = entry.path.display().to_string();
354 }
355 progress.fetch_add(1, Ordering::Relaxed);
356 Some((entry.path.clone(), semantic))
357 }
358 Err(_) => {
359 progress.fetch_add(1, Ordering::Relaxed);
360 None
361 }
362 }
363 })
364 .collect();
365
366 if ct.is_cancelled() {
368 return Err(AnalyzeError::Cancelled);
369 }
370
371 let graph = CallGraph::build_from_results(analysis_results)?;
373
374 let resolved_focus = if match_mode == SymbolMatchMode::Exact {
378 let exists = graph.definitions.contains_key(focus)
379 || graph.callers.contains_key(focus)
380 || graph.callees.contains_key(focus);
381 if exists {
382 focus.to_string()
383 } else {
384 return Err(crate::graph::GraphError::SymbolNotFound {
385 symbol: focus.to_string(),
386 hint: "Try match_mode=insensitive for a case-insensitive search.".to_string(),
387 }
388 .into());
389 }
390 } else {
391 let all_known: Vec<String> = graph
392 .definitions
393 .keys()
394 .chain(graph.callers.keys())
395 .chain(graph.callees.keys())
396 .cloned()
397 .collect::<std::collections::BTreeSet<_>>()
398 .into_iter()
399 .collect();
400 resolve_symbol(all_known.iter(), focus, &match_mode)?
401 };
402
403 let def_count = graph
405 .definitions
406 .get(&resolved_focus)
407 .map_or(0, |d| d.len());
408 let incoming_chains = graph.find_incoming_chains(&resolved_focus, follow_depth)?;
409 let outgoing_chains = graph.find_outgoing_chains(&resolved_focus, follow_depth)?;
410
411 let (prod_chains, test_chains): (Vec<_>, Vec<_>) =
412 incoming_chains.into_iter().partition(|chain| {
413 chain
414 .chain
415 .first()
416 .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
417 });
418
419 let formatted = if use_summary {
421 format_focused_summary(&graph, &resolved_focus, follow_depth, Some(root))?
422 } else {
423 format_focused(&graph, &resolved_focus, follow_depth, Some(root))?
424 };
425
426 Ok(FocusedAnalysisOutput {
427 formatted,
428 next_cursor: None,
429 prod_chains,
430 test_chains,
431 outgoing_chains,
432 def_count,
433 })
434}
435
436#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
438#[allow(clippy::too_many_arguments)]
439#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
441pub fn analyze_focused(
442 root: &Path,
443 focus: &str,
444 follow_depth: u32,
445 max_depth: Option<u32>,
446 ast_recursion_limit: Option<usize>,
447) -> Result<FocusedAnalysisOutput, AnalyzeError> {
448 let counter = Arc::new(AtomicUsize::new(0));
449 let ct = CancellationToken::new();
450 analyze_focused_with_progress(
451 root,
452 focus,
453 SymbolMatchMode::Exact,
454 follow_depth,
455 max_depth,
456 ast_recursion_limit,
457 counter,
458 ct,
459 false,
460 )
461}
462
463#[instrument(skip_all, fields(path))]
466pub fn analyze_module_file(path: &str) -> Result<crate::types::ModuleInfo, AnalyzeError> {
467 let source = std::fs::read_to_string(path)
468 .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
469
470 let file_path = Path::new(path);
471 let name = file_path
472 .file_name()
473 .and_then(|s| s.to_str())
474 .unwrap_or("unknown")
475 .to_string();
476
477 let line_count = source.lines().count();
478
479 let language = file_path
480 .extension()
481 .and_then(|e| e.to_str())
482 .and_then(language_from_extension)
483 .ok_or_else(|| {
484 AnalyzeError::Parser(crate::parser::ParserError::ParseError(
485 "unsupported or missing file extension".to_string(),
486 ))
487 })?;
488
489 let semantic = SemanticExtractor::extract(&source, language, None)?;
490
491 let functions = semantic
492 .functions
493 .into_iter()
494 .map(|f| crate::types::ModuleFunctionInfo {
495 name: f.name,
496 line: f.line,
497 })
498 .collect();
499
500 let imports = semantic
501 .imports
502 .into_iter()
503 .map(|i| crate::types::ModuleImportInfo {
504 module: i.module,
505 items: i.items,
506 })
507 .collect();
508
509 Ok(crate::types::ModuleInfo {
510 name,
511 line_count,
512 language: language.to_string(),
513 functions,
514 imports,
515 })
516}
517
518fn resolve_wildcard_imports(file_path: &Path, imports: &mut [ImportInfo]) {
528 use std::collections::HashMap;
529
530 let mut resolved_cache: HashMap<PathBuf, Vec<String>> = HashMap::new();
531 let file_path_canonical = match file_path.canonicalize() {
532 Ok(p) => p,
533 Err(_) => {
534 tracing::debug!(file = ?file_path, "unable to canonicalize current file path");
535 return;
536 }
537 };
538
539 for import in imports.iter_mut() {
540 if import.items != ["*"] {
541 continue;
542 }
543 resolve_single_wildcard(import, file_path, &file_path_canonical, &mut resolved_cache);
544 }
545}
546
547fn resolve_single_wildcard(
549 import: &mut ImportInfo,
550 file_path: &Path,
551 file_path_canonical: &Path,
552 resolved_cache: &mut std::collections::HashMap<PathBuf, Vec<String>>,
553) {
554 let module = import.module.clone();
555 let dot_count = module.chars().take_while(|c| *c == '.').count();
556 if dot_count == 0 {
557 return;
558 }
559 let module_path = module.trim_start_matches('.');
560
561 let target_to_read = match locate_target_file(file_path, dot_count, module_path, &module) {
562 Some(p) => p,
563 None => return,
564 };
565
566 let canonical = match target_to_read.canonicalize() {
567 Ok(p) => p,
568 Err(_) => {
569 tracing::debug!(target = ?target_to_read, import = %module, "unable to canonicalize path");
570 return;
571 }
572 };
573
574 if canonical == file_path_canonical {
575 tracing::debug!(target = ?canonical, import = %module, "cannot import from self");
576 return;
577 }
578
579 if let Some(cached) = resolved_cache.get(&canonical) {
580 tracing::debug!(import = %module, symbols_count = cached.len(), "using cached symbols");
581 import.items = cached.clone();
582 return;
583 }
584
585 if let Some(symbols) = parse_target_symbols(&target_to_read, &module) {
586 tracing::debug!(import = %module, resolved_count = symbols.len(), "wildcard import resolved");
587 import.items = symbols.clone();
588 resolved_cache.insert(canonical, symbols);
589 }
590}
591
592fn locate_target_file(
594 file_path: &Path,
595 dot_count: usize,
596 module_path: &str,
597 module: &str,
598) -> Option<PathBuf> {
599 let mut target_dir = file_path.parent()?.to_path_buf();
600
601 for _ in 1..dot_count {
602 if !target_dir.pop() {
603 tracing::debug!(import = %module, "unable to climb {} levels", dot_count.saturating_sub(1));
604 return None;
605 }
606 }
607
608 let target_file = if module_path.is_empty() {
609 target_dir.join("__init__.py")
610 } else {
611 let rel_path = module_path.replace('.', "/");
612 target_dir.join(format!("{rel_path}.py"))
613 };
614
615 if target_file.exists() {
616 Some(target_file)
617 } else if target_file.with_extension("").is_dir() {
618 let init = target_file.with_extension("").join("__init__.py");
619 if init.exists() { Some(init) } else { None }
620 } else {
621 tracing::debug!(target = ?target_file, import = %module, "target file not found");
622 None
623 }
624}
625
626fn parse_target_symbols(target_path: &Path, module: &str) -> Option<Vec<String>> {
628 let source = match std::fs::read_to_string(target_path) {
629 Ok(s) => s,
630 Err(e) => {
631 tracing::debug!(target = ?target_path, import = %module, error = %e, "unable to read target file");
632 return None;
633 }
634 };
635
636 use tree_sitter::Parser;
638 let lang_info = crate::languages::get_language_info("python")?;
639 let mut parser = Parser::new();
640 if parser.set_language(&lang_info.language).is_err() {
641 return None;
642 }
643 let tree = parser.parse(&source, None)?;
644
645 let mut symbols = Vec::new();
647 extract_all_from_tree(&tree, &source, &mut symbols);
648 if !symbols.is_empty() {
649 tracing::debug!(import = %module, symbols = ?symbols, "using __all__ symbols");
650 return Some(symbols);
651 }
652
653 let root = tree.root_node();
655 let mut cursor = root.walk();
656 for child in root.children(&mut cursor) {
657 match child.kind() {
658 "function_definition" => {
659 if let Some(name_node) = child.child_by_field_name("name") {
660 let name = source[name_node.start_byte()..name_node.end_byte()].to_string();
661 if !name.starts_with('_') {
662 symbols.push(name);
663 }
664 }
665 }
666 "class_definition" => {
667 if let Some(name_node) = child.child_by_field_name("name") {
668 let name = source[name_node.start_byte()..name_node.end_byte()].to_string();
669 if !name.starts_with('_') {
670 symbols.push(name);
671 }
672 }
673 }
674 _ => {}
675 }
676 }
677 tracing::debug!(import = %module, fallback_symbols = ?symbols, "using fallback function/class names");
678 Some(symbols)
679}
680
681fn extract_all_from_tree(tree: &tree_sitter::Tree, source: &str, result: &mut Vec<String>) {
683 let root = tree.root_node();
684 let mut cursor = root.walk();
685 for child in root.children(&mut cursor) {
686 if child.kind() == "simple_statement" {
687 let mut simple_cursor = child.walk();
689 for simple_child in child.children(&mut simple_cursor) {
690 if simple_child.kind() == "assignment"
691 && let Some(left) = simple_child.child_by_field_name("left")
692 {
693 let target_text = source[left.start_byte()..left.end_byte()].trim();
694 if target_text == "__all__"
695 && let Some(right) = simple_child.child_by_field_name("right")
696 {
697 extract_string_list_from_list_node(&right, source, result);
698 }
699 }
700 }
701 } else if child.kind() == "expression_statement" {
702 let mut stmt_cursor = child.walk();
704 for stmt_child in child.children(&mut stmt_cursor) {
705 if stmt_child.kind() == "assignment"
706 && let Some(left) = stmt_child.child_by_field_name("left")
707 {
708 let target_text = source[left.start_byte()..left.end_byte()].trim();
709 if target_text == "__all__"
710 && let Some(right) = stmt_child.child_by_field_name("right")
711 {
712 extract_string_list_from_list_node(&right, source, result);
713 }
714 }
715 }
716 }
717 }
718}
719
720fn extract_string_list_from_list_node(
722 list_node: &tree_sitter::Node,
723 source: &str,
724 result: &mut Vec<String>,
725) {
726 let mut cursor = list_node.walk();
727 for child in list_node.named_children(&mut cursor) {
728 if child.kind() == "string" {
729 let raw = source[child.start_byte()..child.end_byte()].trim();
730 let unquoted = raw.trim_matches('"').trim_matches('\'').to_string();
732 if !unquoted.is_empty() {
733 result.push(unquoted);
734 }
735 }
736 }
737}