use crate::{
analysis::call_graph::RustCallGraphBuilder,
analyzers::rust_call_graph::extract_call_graph_multi_file,
config,
core::Language,
io,
priority::{
call_graph::{CallGraph, FunctionId},
parallel_call_graph::{ParallelCallGraph, ParallelConfig},
},
};
use anyhow::{Context, Result};
use rayon::prelude::*;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::sync::Arc;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CallGraphPhase {
DiscoveringFiles,
ParsingASTs,
ExtractingCalls,
LinkingModules,
}
#[derive(Debug, Clone)]
pub struct CallGraphProgress {
pub phase: CallGraphPhase,
pub current: usize,
pub total: usize,
}
pub struct ParallelCallGraphBuilder;
impl Default for ParallelCallGraphBuilder {
fn default() -> Self {
Self::new()
}
}
impl ParallelCallGraphBuilder {
pub fn new() -> Self {
Self
}
pub fn with_config(_config: ParallelConfig) -> Self {
Self
}
pub fn build_parallel<F>(
&self,
project_path: &Path,
base_graph: CallGraph,
progress_callback: F,
) -> Result<(CallGraph, HashSet<FunctionId>, HashSet<FunctionId>)>
where
F: FnMut(CallGraphProgress) + Send + Sync,
{
self.build_parallel_with_files(project_path, base_graph, None, progress_callback)
}
pub fn build_parallel_with_files<F>(
&self,
project_path: &Path,
base_graph: CallGraph,
rust_files: Option<&[PathBuf]>,
mut progress_callback: F,
) -> Result<(CallGraph, HashSet<FunctionId>, HashSet<FunctionId>)>
where
F: FnMut(CallGraphProgress) + Send + Sync,
{
let discovered_files: Vec<PathBuf>;
let rust_files = match rust_files {
Some(files) => {
log::info!("Using {} pre-discovered Rust files", files.len());
files
}
None => {
progress_callback(CallGraphProgress {
phase: CallGraphPhase::DiscoveringFiles,
current: 0,
total: 0,
});
let config = config::get_config();
discovered_files = io::walker::find_project_files_with_config(
project_path,
vec![Language::Rust],
config,
)
.context("Failed to find Rust files for call graph")?;
log::info!("Discovered {} Rust files", discovered_files.len());
progress_callback(CallGraphProgress {
phase: CallGraphPhase::DiscoveringFiles,
current: discovered_files.len(),
total: discovered_files.len(),
});
&discovered_files
}
};
let total_files = rust_files.len();
log::info!("Processing {} Rust files in parallel", total_files);
let parallel_graph = Arc::new(ParallelCallGraph::new(total_files));
parallel_graph.merge_concurrent(base_graph);
const BATCH_SIZE: usize = 200;
let mut all_framework_exclusions = HashSet::new();
let mut all_function_pointer_used = HashSet::new();
let mut files_processed = 0;
std::thread::sleep(std::time::Duration::from_millis(150));
for batch in rust_files.chunks(BATCH_SIZE) {
let batch_start = files_processed;
let batch_end = batch_start + batch.len();
progress_callback(CallGraphProgress {
phase: CallGraphPhase::ParsingASTs,
current: batch_start,
total: total_files,
});
let parsed_files = self.parallel_parse_files_batch(batch, ¶llel_graph)?;
progress_callback(CallGraphProgress {
phase: CallGraphPhase::ExtractingCalls,
current: batch_start,
total: total_files,
});
self.parallel_multi_file_extraction(&parsed_files, ¶llel_graph)?;
let (batch_framework_exclusions, batch_function_pointer_used) =
self.parallel_enhanced_analysis(&parsed_files, ¶llel_graph)?;
all_framework_exclusions.extend(batch_framework_exclusions);
all_function_pointer_used.extend(batch_function_pointer_used);
files_processed = batch_end;
crate::core::parsing::reset_span_locations();
log::debug!(
"Processed batch {}/{} ({} files)",
batch_end,
total_files,
batch.len()
);
}
progress_callback(CallGraphProgress {
phase: CallGraphPhase::LinkingModules,
current: 0,
total: 0,
});
let mut final_graph = parallel_graph.to_call_graph();
final_graph.resolve_cross_file_calls();
let stats = parallel_graph.stats();
log::info!(
"Parallel call graph complete: {} nodes, {} edges, {} files processed in {} batches",
stats.total_nodes.load(std::sync::atomic::Ordering::Relaxed),
stats.total_edges.load(std::sync::atomic::Ordering::Relaxed),
stats
.files_processed
.load(std::sync::atomic::Ordering::Relaxed),
total_files.div_ceil(BATCH_SIZE),
);
Ok((
final_graph,
all_framework_exclusions,
all_function_pointer_used,
))
}
fn parallel_parse_files_batch(
&self,
batch: &[PathBuf],
parallel_graph: &Arc<ParallelCallGraph>,
) -> Result<Vec<(PathBuf, syn::File)>> {
let file_contents: Vec<_> = batch
.par_iter()
.filter_map(|file_path| {
io::read_file(file_path)
.map_err(|e| {
log::warn!("Failed to read file {}: {}", file_path.display(), e);
e
})
.ok()
.map(|content| (file_path.clone(), content))
})
.collect();
let parsed_files: Vec<_> = file_contents
.iter()
.filter_map(|(file_path, content)| {
let parsed = syn::parse_file(content).ok()?;
parallel_graph.stats().increment_files();
Some((file_path.clone(), parsed))
})
.collect();
Ok(parsed_files)
}
#[allow(dead_code)]
fn parallel_parse_files_with_progress<F>(
&self,
rust_files: &[PathBuf],
parallel_graph: &Arc<ParallelCallGraph>,
progress_callback: &mut F,
) -> Result<Vec<(PathBuf, syn::File)>>
where
F: FnMut(CallGraphProgress) + Send + Sync,
{
use std::sync::atomic::{AtomicUsize, Ordering};
let file_contents: Vec<_> = rust_files
.par_iter()
.filter_map(|file_path| {
let content = io::read_file(file_path)
.map_err(|e| {
eprintln!(
"Warning: Failed to read file {}: {}",
file_path.display(),
e
);
e
})
.ok()?;
Some((file_path.clone(), content))
})
.collect();
let total_files = file_contents.len();
let parsed_count = Arc::new(AtomicUsize::new(0));
let parsed_files: Vec<_> = file_contents
.iter()
.enumerate()
.filter_map(|(idx, (file_path, content))| {
let parsed = syn::parse_file(content).ok()?;
parallel_graph.stats().increment_files();
let count = parsed_count.fetch_add(1, Ordering::Relaxed) + 1;
if count % 10 == 0 || count == total_files {
progress_callback(CallGraphProgress {
phase: CallGraphPhase::ParsingASTs,
current: count,
total: total_files,
});
}
crate::io::progress::AnalysisProgress::with_global(|p| {
p.update_progress(crate::io::progress::PhaseProgress::Progress {
current: idx + 1,
total: total_files,
});
});
Some((file_path.clone(), parsed))
})
.collect();
Ok(parsed_files)
}
fn parallel_multi_file_extraction(
&self,
parsed_files: &[(PathBuf, syn::File)],
parallel_graph: &Arc<ParallelCallGraph>,
) -> Result<()> {
let files_for_extraction: Vec<_> = parsed_files
.iter()
.map(|(path, parsed)| (parsed.clone(), path.clone()))
.collect();
let graph = extract_call_graph_multi_file(&files_for_extraction);
parallel_graph.merge_concurrent(graph);
Ok(())
}
fn parallel_enhanced_analysis(
&self,
parsed_files: &[(PathBuf, syn::File)],
parallel_graph: &Arc<ParallelCallGraph>,
) -> Result<(HashSet<FunctionId>, HashSet<FunctionId>)> {
let workspace_files: Vec<(PathBuf, syn::File)> = parsed_files
.iter()
.map(|(path, parsed)| (path.clone(), parsed.clone()))
.collect();
let base_graph = parallel_graph.to_call_graph();
let mut enhanced_builder = RustCallGraphBuilder::from_base_graph(base_graph);
for (file_path, parsed) in &workspace_files {
enhanced_builder
.analyze_basic_calls(file_path, parsed)?
.analyze_trait_dispatch(file_path, parsed)?
.analyze_function_pointers(file_path, parsed)?
.analyze_framework_patterns(file_path, parsed)?;
}
enhanced_builder.analyze_cross_module(&workspace_files)?;
enhanced_builder.finalize_trait_analysis()?;
let enhanced_graph = enhanced_builder.build();
let framework_exclusions: HashSet<FunctionId> = enhanced_graph
.framework_patterns
.get_exclusions()
.into_iter()
.collect();
let function_pointer_used: HashSet<FunctionId> = enhanced_graph
.function_pointer_tracker
.get_definitely_used_functions()
.into_iter()
.collect();
parallel_graph.merge_concurrent(enhanced_graph.base_graph);
Ok((framework_exclusions, function_pointer_used))
}
}
pub fn build_call_graph_parallel<F>(
project_path: &Path,
base_graph: CallGraph,
num_threads: Option<usize>,
progress_callback: F,
) -> Result<(CallGraph, HashSet<FunctionId>, HashSet<FunctionId>)>
where
F: FnMut(CallGraphProgress) + Send + Sync,
{
build_call_graph_parallel_with_files(
project_path,
base_graph,
num_threads,
None,
progress_callback,
)
}
pub fn build_call_graph_parallel_with_files<F>(
project_path: &Path,
base_graph: CallGraph,
num_threads: Option<usize>,
rust_files: Option<&[PathBuf]>,
progress_callback: F,
) -> Result<(CallGraph, HashSet<FunctionId>, HashSet<FunctionId>)>
where
F: FnMut(CallGraphProgress) + Send + Sync,
{
let mut config = ParallelConfig::default();
if let Some(threads) = num_threads {
config = config.with_threads(threads);
}
let builder = ParallelCallGraphBuilder::with_config(config);
builder.build_parallel_with_files(project_path, base_graph, rust_files, progress_callback)
}
use crate::extraction::{ExtractedFileData, ExtractedFunctionData};
use std::collections::HashMap;
pub fn build_call_graph_from_extracted(
base_graph: CallGraph,
extracted: &HashMap<PathBuf, ExtractedFileData>,
) -> (CallGraph, HashSet<FunctionId>, HashSet<FunctionId>) {
use crate::priority::call_graph::CallType as GraphCallType;
let parallel_graph =
Arc::new(crate::priority::parallel_call_graph::ParallelCallGraph::new(extracted.len()));
parallel_graph.merge_concurrent(base_graph);
let mut sorted_extracted: Vec<_> = extracted.iter().collect();
sorted_extracted.sort_by(|a, b| a.0.cmp(b.0));
let callee_index = CalleeResolutionIndex::from_sorted_extracted(&sorted_extracted);
for (path, file_data) in sorted_extracted {
for func in &file_data.functions {
let func_id = FunctionId::new(path.clone(), func.qualified_name.clone(), func.line);
parallel_graph.add_function(
func_id.clone(),
false, func.is_test,
func.cyclomatic,
func.length,
);
for call_site in &func.calls {
let callee_id = resolve_callee_from_extracted(
&call_site.callee_name,
&call_site.call_type,
path,
&callee_index,
);
if let Some(callee) = callee_id {
parallel_graph.add_call(func_id.clone(), callee, GraphCallType::Direct);
}
}
}
parallel_graph.stats().increment_files();
}
let mut final_graph = parallel_graph.to_call_graph();
final_graph.resolve_cross_file_calls();
let framework_exclusions = HashSet::new();
let function_pointer_used = HashSet::new();
log::info!(
"Call graph from extracted data: {} nodes in {} files",
parallel_graph
.stats()
.total_nodes
.load(std::sync::atomic::Ordering::Relaxed),
extracted.len()
);
(final_graph, framework_exclusions, function_pointer_used)
}
struct CalleeResolutionIndex {
same_file_functions: HashMap<PathBuf, HashMap<String, FunctionId>>,
qualified_functions: HashMap<String, FunctionId>,
method_functions: HashMap<String, Vec<FunctionId>>,
}
impl CalleeResolutionIndex {
fn from_sorted_extracted(sorted_extracted: &[(&PathBuf, &ExtractedFileData)]) -> Self {
sorted_extracted
.iter()
.fold(Self::empty(), |mut index, item| {
index.add_file_functions(item.0, &item.1.functions);
index
})
}
fn empty() -> Self {
Self {
same_file_functions: HashMap::new(),
qualified_functions: HashMap::new(),
method_functions: HashMap::new(),
}
}
fn add_file_functions(&mut self, path: &Path, functions: &[ExtractedFunctionData]) {
let file_functions = self
.same_file_functions
.entry(path.to_path_buf())
.or_default();
for func in functions {
let function_id = extracted_function_id(path, func);
add_same_file_function(file_functions, func, &function_id);
add_first_match(
&mut self.qualified_functions,
&func.qualified_name,
&function_id,
);
self.method_functions
.entry(func.name.clone())
.or_default()
.push(function_id);
}
}
}
fn extracted_function_id(path: &Path, func: &ExtractedFunctionData) -> FunctionId {
FunctionId::new(path.to_path_buf(), func.qualified_name.clone(), func.line)
}
fn add_same_file_function(
file_functions: &mut HashMap<String, FunctionId>,
func: &ExtractedFunctionData,
function_id: &FunctionId,
) {
add_first_match(file_functions, &func.qualified_name, function_id);
add_first_match(file_functions, &func.name, function_id);
}
fn add_first_match(
functions: &mut HashMap<String, FunctionId>,
key: &str,
function_id: &FunctionId,
) {
functions
.entry(key.to_string())
.or_insert_with(|| function_id.clone());
}
fn resolve_callee_from_extracted(
callee_name: &str,
call_type: &crate::extraction::CallType,
caller_file: &Path,
index: &CalleeResolutionIndex,
) -> Option<FunctionId> {
use crate::extraction::CallType;
match call_type {
CallType::Direct | CallType::StaticMethod | CallType::TraitMethod => {
resolve_direct_callee(callee_name, caller_file, index)
}
CallType::Method => {
if crate::analyzers::call_graph::CallResolver::is_common_library_method(callee_name) {
return None;
}
index
.method_functions
.get(callee_name)
.and_then(|matches| matches.first().cloned())
}
CallType::Closure | CallType::FunctionPointer => {
None
}
}
}
fn resolve_direct_callee(
callee_name: &str,
caller_file: &Path,
index: &CalleeResolutionIndex,
) -> Option<FunctionId> {
index
.same_file_functions
.get(caller_file)
.and_then(|functions| functions.get(callee_name).cloned())
.or_else(|| index.qualified_functions.get(callee_name).cloned())
}
#[cfg(test)]
mod extracted_call_resolution_tests {
use super::*;
use crate::extraction::{CallType, ExtractedFileData, ExtractedFunctionData};
#[test]
fn direct_calls_prefer_same_file_before_qualified_index() {
let caller = PathBuf::from("src/caller.rs");
let other = PathBuf::from("src/other.rs");
let extracted = extracted_files(vec![
(
caller.clone(),
vec![function("helper", "local::helper", 10)],
),
(other, vec![function("helper", "helper", 20)]),
]);
let index = CalleeResolutionIndex::from_sorted_extracted(&sorted(&extracted));
let resolved =
resolve_callee_from_extracted("helper", &CallType::Direct, &caller, &index).unwrap();
assert_eq!(resolved.file, caller);
assert_eq!(resolved.name, "local::helper");
assert_eq!(resolved.line, 10);
}
#[test]
fn method_calls_use_first_deterministic_name_match() {
let first = PathBuf::from("src/a.rs");
let second = PathBuf::from("src/b.rs");
let extracted = extracted_files(vec![
(second, vec![function("run", "Second::run", 20)]),
(first.clone(), vec![function("run", "First::run", 10)]),
]);
let index = CalleeResolutionIndex::from_sorted_extracted(&sorted(&extracted));
let resolved =
resolve_callee_from_extracted("run", &CallType::Method, &first, &index).unwrap();
assert_eq!(resolved.file, first);
assert_eq!(resolved.name, "First::run");
}
#[test]
fn common_library_methods_do_not_resolve_by_simple_method_name() {
let caller = PathBuf::from("src/builders/parallel_unified_analysis.rs");
let support = PathBuf::from("src/support.rs");
let extracted = extracted_files(vec![
(caller.clone(), vec![function("entry", "entry", 5)]),
(
support,
vec![
function("filter", "LazyPipeline::filter", 10),
function("map", "LazyPipeline::map", 20),
function("take", "LazyPipeline::take", 30),
function("get", "PurityCache::get", 40),
],
),
]);
let index = CalleeResolutionIndex::from_sorted_extracted(&sorted(&extracted));
for method in ["filter", "map", "take", "get"] {
let resolved =
resolve_callee_from_extracted(method, &CallType::Method, &caller, &index);
assert!(
resolved.is_none(),
"common library method {method} should not resolve to unrelated project method {resolved:?}"
);
}
}
#[test]
fn build_call_graph_from_extracted_preserves_direct_and_method_edges() {
let caller = PathBuf::from("src/caller.rs");
let helper = PathBuf::from("src/helper.rs");
let mut entry = function("entry", "entry", 5);
entry.calls = vec![
crate::extraction::CallSite {
callee_name: "local_helper".to_string(),
call_type: CallType::Direct,
line: 6,
},
crate::extraction::CallSite {
callee_name: "Helper::remote".to_string(),
call_type: CallType::StaticMethod,
line: 7,
},
crate::extraction::CallSite {
callee_name: "run".to_string(),
call_type: CallType::Method,
line: 8,
},
];
let extracted = extracted_files(vec![
(
caller.clone(),
vec![entry, function("local_helper", "local_helper", 20)],
),
(
helper.clone(),
vec![
function("remote", "Helper::remote", 10),
function("run", "Helper::run", 30),
],
),
]);
let (graph, _, _) = build_call_graph_from_extracted(CallGraph::new(), &extracted);
let entry_id = FunctionId::new(caller.clone(), "entry".to_string(), 5);
let callees = graph.get_callees_exact(&entry_id);
let callee_names: Vec<_> = callees.iter().map(|id| id.name.as_str()).collect();
assert_eq!(callees.len(), 3);
assert!(callee_names.contains(&"local_helper"));
assert!(callee_names.contains(&"Helper::remote"));
assert!(callee_names.contains(&"Helper::run"));
}
fn extracted_files(
files: Vec<(PathBuf, Vec<ExtractedFunctionData>)>,
) -> HashMap<PathBuf, ExtractedFileData> {
files
.into_iter()
.map(|(path, functions)| {
let mut file_data = ExtractedFileData::empty(path.clone());
file_data.functions = functions;
(path, file_data)
})
.collect()
}
fn function(name: &str, qualified_name: &str, line: usize) -> ExtractedFunctionData {
let mut function = ExtractedFunctionData::minimal(name, line);
function.qualified_name = qualified_name.to_string();
function
}
fn sorted(
extracted: &HashMap<PathBuf, ExtractedFileData>,
) -> Vec<(&PathBuf, &ExtractedFileData)> {
let mut sorted: Vec<_> = extracted.iter().collect();
sorted.sort_by(|a, b| a.0.cmp(b.0));
sorted
}
}