use std::{
collections::{hash_map::Entry, HashMap},
sync::{
atomic::{AtomicUsize, Ordering},
Arc,
},
};
use gix::bstr::BStr;
use crate::hours::{
util::{add_lines, remove_lines},
CommitIdx, FileStats, LineStats, WorkByEmail, WorkByPerson,
};
const MINUTES_PER_HOUR: f32 = 60.0;
pub const HOURS_PER_WORKDAY: f32 = 8.0;
pub fn estimate_hours(
commits: &[(u32, gix::actor::SignatureRef<'static>)],
stats: &[(u32, FileStats, LineStats)],
) -> WorkByEmail {
assert!(!commits.is_empty());
const MAX_COMMIT_DIFFERENCE_IN_MINUTES: f32 = 2.0 * MINUTES_PER_HOUR;
const FIRST_COMMIT_ADDITION_IN_MINUTES: f32 = 2.0 * MINUTES_PER_HOUR;
let hours_for_commits = {
let mut hours = 0.0;
let mut commits = commits.iter().map(|t| &t.1).rev();
let mut cur = commits.next().expect("at least one commit if we are here");
for next in commits {
let change_in_minutes = (next.time.seconds.saturating_sub(cur.time.seconds)) as f32 / MINUTES_PER_HOUR;
if change_in_minutes < MAX_COMMIT_DIFFERENCE_IN_MINUTES {
hours += change_in_minutes / MINUTES_PER_HOUR;
} else {
hours += FIRST_COMMIT_ADDITION_IN_MINUTES / MINUTES_PER_HOUR;
}
cur = next;
}
hours
};
let author = &commits[0].1;
let (files, lines) = (!stats.is_empty())
.then(|| {
commits
.iter()
.map(|t| &t.0)
.fold((FileStats::default(), LineStats::default()), |mut acc, id| match stats
.binary_search_by(|t| t.0.cmp(id))
{
Ok(idx) => {
let t = &stats[idx];
acc.0.add(&t.1);
acc.1.add(&t.2);
acc
}
Err(_) => acc,
})
})
.unwrap_or_default();
WorkByEmail {
name: author.name,
email: author.email,
hours: FIRST_COMMIT_ADDITION_IN_MINUTES / 60.0 + hours_for_commits,
num_commits: commits.len() as u32,
files,
lines,
}
}
type CommitChangeLineCounters = (Arc<AtomicUsize>, Arc<AtomicUsize>, Arc<AtomicUsize>);
type SpawnResultWithReturnChannelAndWorkers<'scope> = (
crossbeam_channel::Sender<Vec<(CommitIdx, Option<gix::hash::ObjectId>, gix::hash::ObjectId)>>,
Vec<std::thread::ScopedJoinHandle<'scope, anyhow::Result<Vec<(CommitIdx, FileStats, LineStats)>>>>,
);
pub fn spawn_tree_delta_threads<'scope>(
scope: &'scope std::thread::Scope<'scope, '_>,
threads: usize,
line_stats: bool,
repo: gix::Repository,
stat_counters: CommitChangeLineCounters,
) -> SpawnResultWithReturnChannelAndWorkers<'scope> {
let (tx, rx) = crossbeam_channel::unbounded::<Vec<(CommitIdx, Option<gix::hash::ObjectId>, gix::hash::ObjectId)>>();
let stat_workers = (0..threads)
.map(|_| {
scope.spawn({
let stats_counters = stat_counters.clone();
let mut repo = repo.clone();
repo.object_cache_size_if_unset((850 * 1024 * 1024) / threads);
let rx = rx.clone();
move || -> Result<_, anyhow::Error> {
let mut out = Vec::new();
let (commits, changes, lines_count) = stats_counters;
let mut cache = line_stats
.then(|| -> anyhow::Result<_> {
Ok(repo.diff_resource_cache(gix::diff::blob::pipeline::Mode::ToGit, Default::default())?)
})
.transpose()?;
for chunk in rx {
for (commit_idx, parent_commit, commit) in chunk {
if let Some(cache) = cache.as_mut() {
cache.clear_resource_cache_keep_allocation();
}
commits.fetch_add(1, Ordering::Relaxed);
if gix::interrupt::is_triggered() {
return Ok(out);
}
let mut files = FileStats::default();
let mut lines = LineStats::default();
let from = match parent_commit {
Some(id) => match repo.find_object(id).ok().and_then(|c| c.peel_to_tree().ok()) {
Some(tree) => tree,
None => continue,
},
None => repo.empty_tree(),
};
let to = match repo.find_object(commit).ok().and_then(|c| c.peel_to_tree().ok()) {
Some(c) => c,
None => continue,
};
from.changes()?
.options(|opts| {
opts.track_filename().track_rewrites(None);
})
.for_each_to_obtain_tree(&to, |change| {
use gix::object::tree::diff::Change::*;
changes.fetch_add(1, Ordering::Relaxed);
match change {
Rewrite { .. } => {
unreachable!("we turned that off")
}
Addition { entry_mode, id, .. } => {
if entry_mode.is_no_tree() {
files.added += 1;
add_lines(line_stats, &lines_count, &mut lines, id);
}
}
Deletion { entry_mode, id, .. } => {
if entry_mode.is_no_tree() {
files.removed += 1;
remove_lines(line_stats, &lines_count, &mut lines, id);
}
}
Modification {
entry_mode,
previous_entry_mode,
id,
previous_id,
..
} => match (previous_entry_mode.is_blob(), entry_mode.is_blob()) {
(false, false) => {}
(false, true) => {
files.added += 1;
add_lines(line_stats, &lines_count, &mut lines, id);
}
(true, false) => {
files.removed += 1;
remove_lines(line_stats, &lines_count, &mut lines, previous_id);
}
(true, true) => {
files.modified += 1;
if let Some(cache) = cache.as_mut() {
let mut diff = change.diff(cache).map_err(|err| {
std::io::Error::new(std::io::ErrorKind::Other, err)
})?;
let mut nl = 0;
if let Some(counts) = diff.line_counts().map_err(|err| {
std::io::Error::new(std::io::ErrorKind::Other, err)
})? {
nl += counts.insertions as usize + counts.removals as usize;
lines.added += counts.insertions as usize;
lines.removed += counts.removals as usize;
lines_count.fetch_add(nl, Ordering::Relaxed);
}
}
}
},
}
Ok::<_, std::io::Error>(Default::default())
})?;
out.push((commit_idx, files, lines));
}
}
Ok(out)
}
})
})
.collect::<Vec<_>>();
(tx, stat_workers)
}
pub fn deduplicate_identities(persons: &[WorkByEmail]) -> Vec<WorkByPerson> {
let mut email_to_index = HashMap::<&'static BStr, usize>::with_capacity(persons.len());
let mut name_to_index = HashMap::<&'static BStr, usize>::with_capacity(persons.len());
let mut out = Vec::<WorkByPerson>::with_capacity(persons.len());
for person_by_email in persons {
match email_to_index.entry(person_by_email.email) {
Entry::Occupied(email_entry) => {
out[*email_entry.get()].merge(person_by_email);
name_to_index.insert(person_by_email.name, *email_entry.get());
}
Entry::Vacant(email_entry) => match name_to_index.entry(person_by_email.name) {
Entry::Occupied(name_entry) => {
out[*name_entry.get()].merge(person_by_email);
email_entry.insert(*name_entry.get());
}
Entry::Vacant(name_entry) => {
let idx = out.len();
name_entry.insert(idx);
email_entry.insert(idx);
out.push(person_by_email.into());
}
},
}
}
out
}