use anyhow::{Context, Result};
use chrono::Utc;
use indicatif::{ProgressBar, ProgressStyle};
use rayon::prelude::*;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::time::Instant;
use crate::metrics::complexity;
use crate::snapshot::{FileComplexity, FileEntry, RepoSnapshot, TimeWindow};
use super::exclude::is_excluded;
use super::import_resolver::{resolve_imports, RawImports};
use super::progress::{NoProgress, Progress};
use super::Collector;
impl Collector {
pub(super) fn collect_file_metrics_with_progress(
&self,
files: &[FileEntry],
progress: &dyn Progress,
) -> (HashMap<PathBuf, FileComplexity>, RawImports) {
let root = self.repo_path();
let results: Vec<(PathBuf, FileComplexity, Vec<String>)> = files
.par_iter()
.filter(|entry| !entry.is_binary)
.filter_map(|entry| {
let abs_path = root.join(&entry.path);
let content = std::fs::read_to_string(&abs_path).ok()?;
let metrics = complexity::analyse_file(&entry.path, &content);
let imports = complexity::extract_file_imports(&entry.path, &content);
progress.inc(1);
Some((entry.path.clone(), metrics, imports))
})
.collect();
let mut file_metrics = HashMap::new();
let mut raw_imports = HashMap::new();
for (path, metrics, imports) in results {
file_metrics.insert(path.clone(), metrics);
if !imports.is_empty() {
raw_imports.insert(path, imports);
}
}
(file_metrics, raw_imports)
}
#[allow(clippy::too_many_arguments)]
pub(super) fn collect_snapshot_inner(
&self,
show_progress: bool,
verbose: bool,
skip_blame: bool,
no_cache: bool,
exclude_patterns: &[String],
exclude_extensions: &[String],
use_default_excludes: bool,
) -> Result<RepoSnapshot> {
let make_spinner = |msg: &str| -> Option<ProgressBar> {
if !show_progress {
return None;
}
let sp = ProgressBar::new_spinner();
sp.set_style(
ProgressStyle::default_spinner()
.template(" {spinner:.cyan} {msg}")
.unwrap(),
);
sp.set_message(msg.to_string());
sp.enable_steady_tick(std::time::Duration::from_millis(80));
Some(sp)
};
let bar_style = ProgressStyle::default_bar()
.template(" {spinner:.cyan} {msg} [{bar:30.cyan/dim}] {pos}/{len} ({eta})")
.unwrap()
.progress_chars("━╸─");
let sp = make_spinner("Walking commits...");
let t = Instant::now();
let collection = self.collect_commits()?;
let commits_ms = t.elapsed().as_millis();
if let Some(s) = sp {
s.finish_and_clear();
}
let sp = make_spinner(&format!(
"Found {} commits. Collecting file tree...",
collection.commits.len()
));
let t = Instant::now();
let all_files = self.collect_files()?;
let has_excludes =
!exclude_patterns.is_empty() || !exclude_extensions.is_empty() || use_default_excludes;
let (files, excluded_count) = if has_excludes {
let before = all_files.len();
let filtered: Vec<FileEntry> = all_files
.into_iter()
.filter(|f| {
!is_excluded(
&f.path,
exclude_patterns,
exclude_extensions,
use_default_excludes,
)
})
.collect();
let after = filtered.len();
(filtered, before - after)
} else {
(all_files, 0)
};
let files_ms = t.elapsed().as_millis();
if let Some(s) = sp {
s.finish_and_clear();
}
if show_progress && excluded_count > 0 {
eprintln!(
" Excluded {} files ({} remaining)",
excluded_count,
files.len()
);
}
let changed_paths: std::collections::HashSet<PathBuf> = collection
.commits
.iter()
.flat_map(|c| c.files_changed.iter().map(|fc| fc.path.clone()))
.collect();
let blame_files: Vec<FileEntry> = files
.iter()
.filter(|f| !f.is_binary && changed_paths.contains(&f.path))
.cloned()
.collect();
let non_binary_changed: u64 = blame_files.len() as u64;
let non_binary_total: u64 = files.iter().filter(|f| !f.is_binary).count() as u64;
let t = Instant::now();
let blame_map = if skip_blame {
if show_progress {
eprintln!(
" Skipping blame ({} files) — use without --skip-blame for full analysis",
non_binary_total
);
}
HashMap::new()
} else {
let blame_cache = if no_cache {
crate::cache::blame::BlameCache::default()
} else {
crate::cache::blame::load(self.repo_path()).unwrap_or_default()
};
if show_progress && non_binary_changed < non_binary_total {
eprintln!(
" Selective blame: {}/{} files changed in window",
non_binary_changed, non_binary_total
);
}
let cached_count = blame_files
.iter()
.filter(|f| blame_cache.entries.contains_key(&f.blob_oid))
.count();
if show_progress && cached_count > 0 {
eprintln!(
" Blame cache: {}/{} files cached",
cached_count, non_binary_changed
);
}
let blame_bar = if show_progress {
let pb = ProgressBar::new(non_binary_changed);
pb.set_style(bar_style.clone());
pb.set_message("Blaming files");
pb.enable_steady_tick(std::time::Duration::from_millis(80));
Some(pb)
} else {
None
};
let blame_progress: &dyn Progress = match &blame_bar {
Some(pb) => pb,
None => &NoProgress,
};
let (map, mut updated_cache) = self.collect_blame_cached(
&blame_files,
&collection.authors,
&collection.raw_email_to_id,
&blame_cache,
blame_progress,
)?;
if let Some(pb) = blame_bar {
pb.finish_and_clear();
}
let current_oids: std::collections::HashSet<String> =
files.iter().map(|f| f.blob_oid.clone()).collect();
updated_cache.prune(¤t_oids);
if let Err(e) = crate::cache::blame::save(&updated_cache, self.repo_path()) {
eprintln!("Warning: Failed to save blame cache: {}", e);
}
map
};
let blame_ms = t.elapsed().as_millis();
let complexity_bar = if show_progress {
let pb = ProgressBar::new(non_binary_total);
pb.set_style(bar_style);
pb.set_message("Analysing complexity");
pb.enable_steady_tick(std::time::Duration::from_millis(80));
Some(pb)
} else {
None
};
let t = Instant::now();
let complexity_progress: &dyn Progress = match &complexity_bar {
Some(pb) => pb,
None => &NoProgress,
};
let (file_metrics, raw_imports) =
self.collect_file_metrics_with_progress(&files, complexity_progress);
let complexity_ms = t.elapsed().as_millis();
if let Some(pb) = complexity_bar {
pb.finish_and_clear();
}
let sp = make_spinner("Building indexes...");
let t = Instant::now();
let head = self.head_commit_hash()?;
let import_graph = resolve_imports(&raw_imports, &files);
let mut snapshot = RepoSnapshot {
path: self.repo_path().to_path_buf(),
name: self.repo_name(),
default_branch: self.default_branch(),
time_window: self.time_window.clone(),
head_commit: head,
created_at: Utc::now(),
commits: collection.commits,
files,
authors: collection.authors,
blame_map,
commits_by_author: HashMap::new(),
commits_by_file: HashMap::new(),
file_change_pairs: Vec::new(),
file_metrics,
import_graph,
commit_interner: collection.interner,
};
snapshot.build_indexes();
let indexes_ms = t.elapsed().as_millis();
if let Some(s) = sp {
s.finish_and_clear();
}
if verbose {
eprintln!(
" Timings: commits {}ms, files {}ms, blame {}ms, complexity {}ms, indexes {}ms",
commits_ms, files_ms, blame_ms, complexity_ms, indexes_ms
);
}
Ok(snapshot)
}
pub fn collect_snapshot_at(
repo_path: &Path,
sha: &str,
_skip_blame: bool,
) -> Result<RepoSnapshot> {
let repo = git2::Repository::discover(repo_path)
.with_context(|| format!("'{}' is not a git repository", repo_path.display()))?;
let time_window = TimeWindow::full_history();
let collection = super::libgit::collect_commits_at(&repo, sha, &time_window)?;
let files = super::libgit::collect_files_at(&repo, sha)?;
let blame_map: HashMap<_, _> = HashMap::new();
let repo_name = repo_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
.to_string();
let branch = repo
.head()
.ok()
.and_then(|h| h.shorthand().map(String::from))
.unwrap_or_else(|| "main".to_string());
let mut snapshot = RepoSnapshot {
path: repo_path.to_path_buf(),
name: repo_name,
default_branch: branch,
time_window,
head_commit: sha.to_string(),
created_at: Utc::now(),
commits: collection.commits,
files,
authors: collection.authors,
blame_map,
commits_by_author: HashMap::new(),
commits_by_file: HashMap::new(),
file_change_pairs: Vec::new(),
file_metrics: HashMap::new(),
import_graph: HashMap::new(),
commit_interner: collection.interner,
};
snapshot.build_indexes();
Ok(snapshot)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::snapshot::TimeWindow;
fn test_repo_path() -> std::path::PathBuf {
std::env::var("BARAD_DUR_TEST_REPO")
.map(std::path::PathBuf::from)
.unwrap_or_else(|_| std::path::PathBuf::from("."))
}
#[test]
fn collect_files_populates_blob_oid() {
let Ok(collector) = Collector::open(&test_repo_path(), TimeWindow::default()) else {
return;
};
let files = collector.collect_files().expect("should collect files");
assert!(!files.is_empty());
for f in &files {
assert!(
!f.blob_oid.is_empty(),
"blob_oid should be populated for {}",
f.path.display()
);
assert_eq!(f.blob_oid.len(), 40, "blob_oid should be 40 hex chars");
}
}
#[test]
fn collect_blame_uses_cache_for_known_blobs() {
let Ok(collector) = Collector::open(&test_repo_path(), TimeWindow::default()) else {
return;
};
let files = collector.collect_files().expect("should collect files");
let collection = collector.collect_commits().expect("should collect commits");
let blame_cache = crate::cache::blame::BlameCache::default();
let (blame_map, new_cache) = collector
.collect_blame_cached(
&files,
&collection.authors,
&collection.raw_email_to_id,
&blame_cache,
&NoProgress,
)
.expect("should collect blame");
assert!(!blame_map.is_empty());
assert!(!new_cache.entries.is_empty());
let (blame_map2, _) = collector
.collect_blame_cached(
&files,
&collection.authors,
&collection.raw_email_to_id,
&new_cache,
&NoProgress,
)
.expect("should collect blame from cache");
assert_eq!(blame_map.len(), blame_map2.len());
}
#[test]
fn collect_file_metrics_does_not_panic_on_real_repo() {
let Ok(collector) = Collector::open(&test_repo_path(), TimeWindow::default()) else {
return;
};
let files = collector.collect_files().expect("should collect files");
let metrics = collector.collect_file_metrics(&files);
assert!(!metrics.is_empty());
let rs_file = metrics
.keys()
.find(|p| p.extension().and_then(|e| e.to_str()) == Some("rs"));
assert!(rs_file.is_some(), "expected at least one .rs file");
}
}