use std::collections::{BTreeMap, HashMap, HashSet};
use chrono::{DateTime, Datelike, Utc};
use crate::collect::ai_attribution::AgenticMode;
use crate::report::models::{
AuthorSummary, RepositorySummary, UntrackedCommit, WeeklyActivity, WeeklyCategorization,
WeeklyMetrics,
};
use super::{compile_patterns, is_boilerplate, CommitRow, PrRow, DEFAULT_BOILERPLATE_PATTERNS};
pub(super) struct RowFlags {
pub(super) is_boilerplate: Vec<bool>,
pub(super) is_revert: Vec<bool>,
pub(super) boilerplate_count: usize,
pub(super) revert_count: usize,
}
pub(super) fn compute_row_flags(rows: &[CommitRow]) -> RowFlags {
let boilerplate_re = compile_patterns(DEFAULT_BOILERPLATE_PATTERNS);
let mut is_boilerplate: Vec<bool> = Vec::with_capacity(rows.len());
let mut is_revert: Vec<bool> = Vec::with_capacity(rows.len());
for row in rows {
let lines = row.insertions + row.deletions;
is_boilerplate.push(self::is_boilerplate(&row.message, lines, &boilerplate_re));
is_revert.push(crate::core::revert::is_revert(&row.message));
}
let boilerplate_count = is_boilerplate.iter().filter(|b| **b).count();
let revert_count = is_revert.iter().filter(|b| **b).count();
RowFlags {
is_boilerplate,
is_revert,
boilerplate_count,
revert_count,
}
}
pub(super) struct AuthorAcc {
pub(super) name: String,
pub(super) email: String,
pub(super) commits: usize,
pub(super) insertions: i64,
pub(super) deletions: i64,
pub(super) files_changed: i64,
pub(super) categories: HashMap<String, usize>,
pub(super) first: DateTime<Utc>,
pub(super) last: DateTime<Utc>,
}
pub(super) struct RepoAcc {
pub(super) commits: usize,
pub(super) authors: HashSet<String>,
pub(super) insertions: i64,
pub(super) deletions: i64,
pub(super) categories: HashMap<String, usize>,
}
pub(super) struct WeekAcc {
pub(super) commits: usize,
pub(super) insertions: i64,
pub(super) deletions: i64,
pub(super) categories: HashMap<String, usize>,
pub(super) reverts: usize,
pub(super) bugfixes: usize,
pub(super) ticketed: usize,
pub(super) ai_assisted: usize,
pub(super) complexity_sum: i64,
pub(super) complexity_count: usize,
pub(super) agentic_count: usize,
pub(super) ide_assisted_count: usize,
}
#[derive(Default)]
pub(super) struct WeekTotal {
pub(super) commits: usize,
pub(super) categories: HashMap<String, usize>,
pub(super) developers: HashSet<String>,
}
pub(super) struct Accumulators {
pub(super) authors: HashMap<String, AuthorAcc>,
pub(super) repos: HashMap<String, RepoAcc>,
pub(super) weekly: BTreeMap<(String, String, String), WeekAcc>,
pub(super) category_total: HashMap<String, usize>,
pub(super) week_totals: BTreeMap<String, WeekTotal>,
pub(super) dev_weeks: HashMap<String, HashSet<String>>,
pub(super) dev_categories: HashMap<String, HashMap<String, usize>>,
pub(super) dev_ticketed: HashMap<String, usize>,
pub(super) min_ts: DateTime<Utc>,
pub(super) max_ts: DateTime<Utc>,
pub(super) boilerplate_count: usize,
pub(super) revert_count: usize,
}
pub(super) fn accumulate_rows(rows: &[CommitRow], flags: &RowFlags) -> Accumulators {
let mut min_ts = rows[0].timestamp;
let mut max_ts = rows[0].timestamp;
let mut authors: HashMap<String, AuthorAcc> = HashMap::new();
let mut repos: HashMap<String, RepoAcc> = HashMap::new();
let mut weekly: BTreeMap<(String, String, String), WeekAcc> = BTreeMap::new();
let mut category_total: HashMap<String, usize> = HashMap::new();
let mut week_totals: BTreeMap<String, WeekTotal> = BTreeMap::new();
let mut dev_weeks: HashMap<String, HashSet<String>> = HashMap::new();
let mut dev_categories: HashMap<String, HashMap<String, usize>> = HashMap::new();
let mut dev_ticketed: HashMap<String, usize> = HashMap::new();
for (idx, row) in rows.iter().enumerate() {
if row.timestamp < min_ts {
min_ts = row.timestamp;
}
if row.timestamp > max_ts {
max_ts = row.timestamp;
}
let key = row.author_email.clone();
let a = authors.entry(key).or_insert_with(|| AuthorAcc {
name: row.author_name.clone(),
email: row.author_email.clone(),
commits: 0,
insertions: 0,
deletions: 0,
files_changed: 0,
categories: HashMap::new(),
first: row.timestamp,
last: row.timestamp,
});
if row.author_name.len() > a.name.len() {
a.name = row.author_name.clone();
}
a.commits += 1;
a.insertions += row.insertions;
a.deletions += row.deletions;
a.files_changed += row.files_changed;
if row.timestamp < a.first {
a.first = row.timestamp;
}
if row.timestamp > a.last {
a.last = row.timestamp;
}
if let Some(cat) = &row.category {
*a.categories.entry(cat.clone()).or_insert(0) += 1;
}
let r = repos
.entry(row.repository.clone())
.or_insert_with(|| RepoAcc {
commits: 0,
authors: HashSet::new(),
insertions: 0,
deletions: 0,
categories: HashMap::new(),
});
r.commits += 1;
r.authors.insert(row.author_email.clone());
r.insertions += row.insertions;
r.deletions += row.deletions;
if let Some(cat) = &row.category {
*r.categories.entry(cat.clone()).or_insert(0) += 1;
}
let week = iso_week_label(&row.timestamp);
let wkey = (week, row.author_email.clone(), row.repository.clone());
let w = weekly.entry(wkey).or_insert_with(|| WeekAcc {
commits: 0,
insertions: 0,
deletions: 0,
categories: HashMap::new(),
reverts: 0,
bugfixes: 0,
ticketed: 0,
ai_assisted: 0,
complexity_sum: 0,
complexity_count: 0,
agentic_count: 0,
ide_assisted_count: 0,
});
w.commits += 1;
w.insertions += row.insertions;
w.deletions += row.deletions;
if let Some(cat) = &row.category {
*w.categories.entry(cat.clone()).or_insert(0) += 1;
}
if flags.is_revert[idx] {
w.reverts += 1;
}
if row.category.as_deref() == Some("bugfix") {
w.bugfixes += 1;
}
if row.ticketed {
w.ticketed += 1;
}
if row.is_ai_assisted {
w.ai_assisted += 1;
}
match row.agentic_mode {
AgenticMode::FullAgentic => w.agentic_count += 1,
AgenticMode::IdeAssisted => w.ide_assisted_count += 1,
AgenticMode::None => {}
}
if let Some(c) = row.complexity {
w.complexity_sum += c;
w.complexity_count += 1;
}
if let Some(cat) = &row.category {
*category_total.entry(cat.clone()).or_insert(0) += 1;
}
let week_label = iso_week_label(&row.timestamp);
let wt = week_totals.entry(week_label.clone()).or_default();
wt.commits += 1;
wt.developers.insert(row.author_email.clone());
if flags.is_boilerplate[idx] {
*wt.categories.entry("boilerplate".to_string()).or_insert(0) += 1;
} else if let Some(cat) = &row.category {
*wt.categories.entry(cat.clone()).or_insert(0) += 1;
} else {
*wt.categories.entry("unclassified".to_string()).or_insert(0) += 1;
}
dev_weeks
.entry(row.author_email.clone())
.or_default()
.insert(week_label);
if let Some(cat) = &row.category {
*dev_categories
.entry(row.author_email.clone())
.or_default()
.entry(cat.clone())
.or_insert(0) += 1;
}
if row.ticketed {
*dev_ticketed.entry(row.author_email.clone()).or_insert(0) += 1;
}
}
Accumulators {
authors,
repos,
weekly,
category_total,
week_totals,
dev_weeks,
dev_categories,
dev_ticketed,
min_ts,
max_ts,
boilerplate_count: flags.boilerplate_count,
revert_count: flags.revert_count,
}
}
pub(super) fn materialize_authors(authors: HashMap<String, AuthorAcc>) -> Vec<AuthorSummary> {
let mut summaries: Vec<AuthorSummary> = authors
.into_values()
.map(|a| AuthorSummary {
name: a.name,
email: a.email,
commit_count: a.commits,
insertions: a.insertions,
deletions: a.deletions,
files_changed: a.files_changed,
categories: a.categories,
first_commit: a.first.to_rfc3339(),
last_commit: a.last.to_rfc3339(),
})
.collect();
summaries.sort_by_key(|a| std::cmp::Reverse(a.commit_count));
summaries
}
pub(super) fn materialize_repositories(repos: HashMap<String, RepoAcc>) -> Vec<RepositorySummary> {
let mut summaries: Vec<RepositorySummary> = repos
.into_iter()
.map(|(name, r)| {
let mut top: Vec<(String, usize)> = r.categories.into_iter().collect();
top.sort_by_key(|t| std::cmp::Reverse(t.1));
RepositorySummary {
name,
commit_count: r.commits,
author_count: r.authors.len(),
insertions: r.insertions,
deletions: r.deletions,
top_categories: top,
}
})
.collect();
summaries.sort_by_key(|r| std::cmp::Reverse(r.commit_count));
summaries
}
pub(super) fn materialize_weekly_activity(
weekly: BTreeMap<(String, String, String), WeekAcc>,
email_to_name: &HashMap<String, String>,
abandoned_by_week_identity: &HashMap<(String, String), usize>,
) -> Vec<WeeklyActivity> {
weekly
.into_iter()
.map(|((week, email, repository), w)| {
let author = email_to_name.get(&email).cloned().unwrap_or(email.clone());
let (quality_score, quality_tshirt) =
crate::core::quality::score_and_tshirt(crate::core::quality::QualityInputs {
commits: w.commits,
reverts: w.reverts,
bugfixes: w.bugfixes,
ticketed: w.ticketed,
});
let abandoned_pr_count = abandoned_by_week_identity
.get(&(week.clone(), author.to_lowercase()))
.or_else(|| abandoned_by_week_identity.get(&(week.clone(), email.to_lowercase())))
.copied()
.unwrap_or(0);
let avg_complexity = if w.complexity_count > 0 {
Some(w.complexity_sum as f64 / w.complexity_count as f64)
} else {
None
};
WeeklyActivity {
week,
author,
repository,
commit_count: w.commits,
insertions: w.insertions,
deletions: w.deletions,
categories: w.categories,
revert_count: w.reverts,
bugfix_count: w.bugfixes,
ticketed_count: w.ticketed,
quality_score,
quality_tshirt,
abandoned_pr_count,
ai_assisted_count: w.ai_assisted,
avg_complexity,
agentic_count: w.agentic_count,
ide_assisted_count: w.ide_assisted_count,
}
})
.collect()
}
pub(super) fn build_abandoned_pr_counts(prs: &[PrRow]) -> HashMap<(String, String), usize> {
let mut out: HashMap<(String, String), usize> = HashMap::new();
for pr in prs {
if pr.state == "closed" && pr.merged_at.is_none() {
let week = iso_week_label(&pr.created_at);
*out.entry((week, pr.author.to_lowercase())).or_insert(0) += 1;
}
}
out
}
pub(super) fn build_weekly_metrics(
week_totals: &BTreeMap<String, WeekTotal>,
) -> Vec<WeeklyMetrics> {
week_totals
.iter()
.map(|(week, wt)| WeeklyMetrics {
week: week.clone(),
total_commits: wt.commits,
feature_commits: *wt.categories.get("feature").unwrap_or(&0),
bugfix_commits: *wt.categories.get("bugfix").unwrap_or(&0),
maintenance_commits: *wt.categories.get("maintenance").unwrap_or(&0),
refactor_commits: *wt.categories.get("refactor").unwrap_or(&0),
test_commits: *wt.categories.get("test").unwrap_or(&0),
doc_commits: *wt.categories.get("documentation").unwrap_or(&0)
+ *wt.categories.get("docs").unwrap_or(&0),
active_developers: wt.developers.len(),
story_points: 0.0,
})
.collect()
}
pub(super) fn build_weekly_categorization(
week_totals: &BTreeMap<String, WeekTotal>,
) -> Vec<WeeklyCategorization> {
let mut rows: Vec<WeeklyCategorization> = Vec::new();
for (week, wt) in week_totals {
let total = wt.commits as f64;
let mut entries: Vec<(&String, &usize)> = wt.categories.iter().collect();
entries.sort_by_key(|e| e.0);
for (cat, count) in entries {
rows.push(WeeklyCategorization {
week: week.clone(),
change_type: cat.clone(),
commit_count: *count,
pct_of_week: if total > 0.0 {
(*count as f64) * 100.0 / total
} else {
0.0
},
});
}
}
rows
}
pub(super) fn build_untracked_commits(
rows: &[CommitRow],
email_to_name: &HashMap<String, String>,
) -> Vec<UntrackedCommit> {
let mut out: Vec<UntrackedCommit> = rows
.iter()
.filter(|r| !r.ticketed && r.category.as_deref() != Some("boilerplate"))
.filter(|r| {
r.category.is_none() || r.category.as_deref() == Some("unclassified") || !r.ticketed
})
.map(|r| UntrackedCommit {
sha: r.sha.clone(),
author: email_to_name
.get(&r.author_email)
.cloned()
.unwrap_or_else(|| r.author_name.clone()),
date: r.timestamp.to_rfc3339(),
message: r.message.lines().next().unwrap_or("").to_string(),
})
.collect();
out.sort_by(|a, b| b.date.cmp(&a.date));
out
}
pub(super) fn iso_week_label(ts: &DateTime<Utc>) -> String {
let iso = ts.iso_week();
format!("{}-W{:02}", iso.year(), iso.week())
}