use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use chrono::Utc;
use git2::{DiffOptions, Oid, Repository, Sort};
use rayon::prelude::*;
use rustc_hash::FxHashMap as HashMap;
pub fn analyze_repo(
repo_path: &Path,
paths: Option<HashSet<PathBuf>>,
max_commits: Option<usize>,
) -> Result<Vec<(PathBuf, f64)>, git2::Error> {
let repo = Repository::discover(repo_path)?;
let oids = collect_commit_ids(&repo, max_commits)?;
let now_secs = Utc::now().timestamp();
let paths_arc: Option<Arc<HashSet<PathBuf>>> = paths.map(Arc::new);
let scores = compute_scores_parallel(&oids, repo_path, &paths_arc, now_secs);
Ok(scores.into_iter().collect())
}
fn collect_commit_ids(
repo: &Repository,
max_commits: Option<usize>,
) -> Result<Vec<Oid>, git2::Error> {
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?; revwalk.simplify_first_parent()?;
let iter = revwalk.take(max_commits.unwrap_or(usize::MAX));
iter.collect()
}
fn compute_scores_parallel(
oids: &[Oid],
repo_path: &Path,
paths: &Option<Arc<HashSet<PathBuf>>>,
now_secs: i64,
) -> HashMap<PathBuf, f64> {
const COMMITS_PER_WORKER: usize = 250;
oids.par_chunks(COMMITS_PER_WORKER)
.map(|chunk| process_chunk(chunk, repo_path, paths, now_secs))
.reduce(HashMap::default, |mut acc, local| {
for (k, v) in local {
*acc.entry(k).or_default() += v;
}
acc
})
}
fn process_chunk(
chunk: &[Oid],
repo_path: &Path,
paths: &Option<Arc<HashSet<PathBuf>>>,
now_secs: i64,
) -> HashMap<PathBuf, f64> {
let repo = Repository::open(repo_path).expect("re-open repo inside worker");
let mut local_scores: HashMap<PathBuf, f64> = HashMap::default();
for oid in chunk {
let commit = match repo.find_commit(*oid) {
Ok(c) if c.parent_count() <= 1 => c, _ => continue,
};
let tree = match commit.tree() {
Ok(t) => t,
Err(_) => continue,
};
let age_days = ((now_secs - commit.time().seconds()) / 86_400).max(0) as f64;
let weight = 1.0 / (age_days + 1.0).powi(2);
let mut diff_opts = DiffOptions::new();
diff_opts
.context_lines(0)
.interhunk_lines(0)
.skip_binary_check(true)
.include_typechange(false);
if let Some(filter) = paths {
for p in filter.iter() {
diff_opts.pathspec(p);
}
}
let parent_tree = if commit.parent_count() == 1 {
commit.parent(0).ok().and_then(|p| p.tree().ok())
} else {
None
};
let diff =
match repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), Some(&mut diff_opts)) {
Ok(d) => d,
Err(_) => continue,
};
for delta in diff.deltas() {
if let Some(path) = delta.new_file().path() {
if paths.as_ref().map_or(true, |set| set.contains(path)) {
*local_scores.entry(path.to_path_buf()).or_default() += weight;
}
}
}
}
local_scores
}