use std::collections::{BTreeMap, HashMap, HashSet};
use chrono::{DateTime, Datelike, Utc};
use regex::Regex;
use tracing::{debug, warn};
use crate::core::config::Config;
use crate::core::db::Database;
use crate::report::errors::{ReportError, Result};
use crate::report::models::{
ActivityWeights, AuthorSummary, DeveloperActivitySummary, DoraMetrics, QualitySummary,
ReportData, ReportSummary, RepositorySummary, UntrackedCommit, VelocitySummary, WeeklyActivity,
WeeklyCategorization, WeeklyMetrics, WeeklyVelocity,
};
pub struct Aggregator;
struct CommitRow {
sha: String,
author_name: String,
author_email: String,
timestamp: DateTime<Utc>,
repository: String,
insertions: i64,
deletions: i64,
files_changed: i64,
category: Option<String>,
message: String,
ticketed: bool,
}
struct PrRow {
author: String,
state: String,
created_at: DateTime<Utc>,
merged_at: Option<DateTime<Utc>>,
}
const DEFAULT_BOILERPLATE_PATTERNS: &[&str] = &[
r"^[Mm]erge branch",
r"^[Mm]erge pull request",
r"^[Bb]ump version",
r"^[Uu]pdate package-lock",
r"^[Uu]pdate yarn\.lock",
r"[Gg]enerated by",
r"[Aa]uto-generated",
];
const BOILERPLATE_LINES_THRESHOLD: i64 = 500;
fn is_boilerplate(message: &str, lines_changed: i64, patterns: &[Regex]) -> bool {
let first_line = message.lines().next().unwrap_or(message);
if lines_changed > BOILERPLATE_LINES_THRESHOLD {
if lines_changed > BOILERPLATE_LINES_THRESHOLD * 10 {
return true;
}
}
patterns.iter().any(|p| p.is_match(first_line))
}
fn compile_patterns(patterns: &[&str]) -> Vec<Regex> {
patterns
.iter()
.filter_map(|p| match Regex::new(p) {
Ok(r) => Some(r),
Err(e) => {
warn!(pattern = %p, error = %e, "skipping invalid regex pattern");
None
}
})
.collect()
}
impl Aggregator {
pub fn build(db: &Database, config: &Config) -> Result<ReportData> {
Self::build_filtered(db, config, None)
}
pub fn build_filtered(
db: &Database,
config: &Config,
author_email: Option<&str>,
) -> Result<ReportData> {
let canonical_email: Option<String> = if let Some(email) = author_email {
let resolved = Self::resolve_canonical_email(db, email)?;
Some(resolved)
} else {
None
};
let rows = Self::load_rows_filtered(db, canonical_email.as_deref())?;
let prs = Self::load_prs(db).unwrap_or_default();
let unresolved_db = if canonical_email.is_none() {
Self::count_unresolved_author_commits(db).unwrap_or(0)
} else {
0
};
let mut data = Self::aggregate(rows, prs);
data.repository_coverage = data.repositories.len();
let alias_set = configured_alias_emails(config);
let unresolved_authors = if alias_set.is_empty() {
0
} else {
data.authors
.iter()
.filter(|a| !alias_set.contains(&a.email.to_lowercase()))
.count()
};
data.unresolved_authors = unresolved_authors;
data.unresolved_author_commits = unresolved_db;
check_weekly_coverage_drift(db, &data.weekly_metrics);
if unresolved_db > 0 {
tracing::warn!(
count = unresolved_db,
"WARNING: {unresolved_db} commits have unresolved author identities and may \
inflate developer counts. Run `tga aliases list` to review, or extend \
`developer_aliases` in the config to map missing identities."
);
}
Ok(data)
}
fn count_unresolved_author_commits(db: &Database) -> Result<usize> {
let conn = db.connection();
let n: i64 = conn
.query_row(
"SELECT COUNT(*) FROM commits WHERE author_id IS NULL",
[],
|r| r.get(0),
)
.map_err(crate::core::TgaError::from)?;
Ok(n as usize)
}
fn load_prs(db: &Database) -> Result<Vec<PrRow>> {
let conn = db.connection();
let mut stmt = conn
.prepare("SELECT created_at, merged_at, author, state FROM pull_requests")
.map_err(crate::core::TgaError::from)?;
let rows = stmt
.query_map([], |row| {
let created: String = row.get(0)?;
let merged: Option<String> = row.get(1)?;
let author: String = row.get(2)?;
let state: String = row.get(3)?;
Ok((created, merged, author, state))
})
.map_err(crate::core::TgaError::from)?;
let mut out = Vec::new();
for r in rows {
let (created_s, merged_s, author, state) = r.map_err(crate::core::TgaError::from)?;
let created_at = match DateTime::parse_from_rfc3339(&created_s) {
Ok(dt) => dt.with_timezone(&Utc),
Err(_) => continue,
};
let merged_at = merged_s
.as_deref()
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
.map(|dt| dt.with_timezone(&Utc));
out.push(PrRow {
author,
state,
created_at,
merged_at,
});
}
Ok(out)
}
fn resolve_canonical_email(db: &Database, email: &str) -> Result<String> {
let conn = db.connection();
let lower = email.to_lowercase();
let result: rusqlite::Result<String> = conn.query_row(
"SELECT canonical_email FROM authors WHERE LOWER(canonical_email) = LOWER(?1) LIMIT 1",
rusqlite::params![lower],
|row| row.get(0),
);
match result {
Ok(stored) => Ok(stored),
Err(rusqlite::Error::QueryReturnedNoRows) => Err(ReportError::Report(format!(
"no canonical identity with canonical_email '{email}' found in authors table.\n\
Run `tga aliases list` to see all canonical identities, or \
`tga aliases merge` to consolidate duplicate identities."
))),
Err(e) => Err(ReportError::Core(crate::core::TgaError::from(e))),
}
}
fn load_rows_filtered(db: &Database, author_email: Option<&str>) -> Result<Vec<CommitRow>> {
let conn = db.connection();
let sql_base = "SELECT c.sha, \
COALESCE(a.canonical_name, c.author_name) AS author_name, \
COALESCE(NULLIF(a.canonical_email, ''), c.author_email) AS author_email, \
c.timestamp, c.repository, \
c.insertions, c.deletions, c.files_changed, cl.category, \
c.message, c.ticketed \
FROM commits c \
LEFT JOIN authors a ON a.id = c.author_id \
LEFT JOIN classifications cl ON cl.id = c.classification_id";
let row_mapper = |row: &rusqlite::Row<'_>| -> rusqlite::Result<CommitRow> {
let ts_str: String = row.get(3)?;
let timestamp = DateTime::parse_from_rfc3339(&ts_str)
.map(|dt| dt.with_timezone(&Utc))
.unwrap_or_else(|_| Utc::now());
let ticketed: i64 = row.get(10).unwrap_or(0);
Ok(CommitRow {
sha: row.get(0)?,
author_name: row.get(1)?,
author_email: row.get(2)?,
timestamp,
repository: row.get(4)?,
insertions: row.get(5)?,
deletions: row.get(6)?,
files_changed: row.get(7)?,
category: row.get(8)?,
message: row.get(9)?,
ticketed: ticketed != 0,
})
};
let mut out: Vec<CommitRow> = Vec::new();
if let Some(email) = author_email {
let sql = format!(
"{sql_base} \
WHERE LOWER(COALESCE(NULLIF(a.canonical_email, ''), c.author_email)) = LOWER(?1)"
);
let mut stmt = conn.prepare(&sql).map_err(crate::core::TgaError::from)?;
let rows = stmt
.query_map(rusqlite::params![email], row_mapper)
.map_err(crate::core::TgaError::from)?;
for r in rows {
out.push(r.map_err(crate::core::TgaError::from)?);
}
} else {
let mut stmt = conn
.prepare(sql_base)
.map_err(crate::core::TgaError::from)?;
let rows = stmt
.query_map([], row_mapper)
.map_err(crate::core::TgaError::from)?;
for r in rows {
out.push(r.map_err(crate::core::TgaError::from)?);
}
}
debug!(count = out.len(), "loaded commit rows for aggregation");
Ok(out)
}
fn aggregate(rows: Vec<CommitRow>, prs: Vec<PrRow>) -> ReportData {
let generated_at = Utc::now().to_rfc3339();
let mut data = ReportData::empty(generated_at);
if rows.is_empty() {
return data;
}
let row_flags = compute_row_flags(&rows);
let acc = accumulate_rows(&rows, &row_flags);
let author_summaries = materialize_authors(acc.authors);
let repo_summaries = materialize_repositories(acc.repos);
let email_to_name: HashMap<String, String> = author_summaries
.iter()
.map(|a| (a.email.clone(), a.name.clone()))
.collect();
let abandoned_by_week_identity = build_abandoned_pr_counts(&prs);
let weekly_activity =
materialize_weekly_activity(acc.weekly, &email_to_name, &abandoned_by_week_identity);
let total_commits = rows.len();
let total_authors = author_summaries.len();
let total_weeks = acc.week_totals.len();
let weekly_metrics = build_weekly_metrics(&acc.week_totals);
let weekly_categorization = build_weekly_categorization(&acc.week_totals);
let untracked_commits = build_untracked_commits(&rows, &email_to_name);
let velocity_inputs = compute_velocity_inputs(&prs);
let velocity = Some(VelocitySummary {
pr_cycle_time_avg_hours: velocity_inputs.cycle_time_avg,
pr_cycle_time_median_hours: velocity_inputs.cycle_time_median,
pr_throughput_per_week: velocity_inputs.pr_throughput_per_week,
revision_rate: 0.0,
pr_count: velocity_inputs.pr_count,
});
let weekly_velocity = build_weekly_velocity(
&acc.week_totals,
&velocity_inputs.pr_per_week,
velocity_inputs.cycle_time_avg,
);
let dora = Some(compute_dora(
&rows,
&row_flags,
&acc.category_total,
&prs,
velocity_inputs.cycle_time_avg,
total_weeks,
acc.revert_count,
));
let quality = Some(compute_quality(
total_commits,
&acc.category_total,
acc.revert_count,
));
let weights = ActivityWeights::default();
let developer_activity = compute_developer_activity(
&author_summaries,
&acc.dev_weeks,
&acc.dev_categories,
&weights,
);
let summary = Some(build_summary(
&rows,
total_commits,
total_authors,
total_weeks,
acc.min_ts,
acc.max_ts,
));
data.total_commits = total_commits;
data.total_authors = total_authors;
data.period_start = Some(acc.min_ts.to_rfc3339());
data.period_end = Some(acc.max_ts.to_rfc3339());
data.authors = author_summaries;
data.repositories = repo_summaries;
data.weekly_activity = weekly_activity;
data.category_breakdown = acc.category_total;
data.weekly_metrics = weekly_metrics;
data.developer_activity = developer_activity;
data.summary = summary;
data.untracked_commits = untracked_commits;
data.weekly_categorization = weekly_categorization;
data.weekly_velocity = weekly_velocity;
data.dora = dora;
data.velocity = velocity;
data.quality = quality;
data.boilerplate_count = acc.boilerplate_count;
data.revert_count = acc.revert_count;
let _ = acc.dev_ticketed;
data
}
}
struct RowFlags {
is_boilerplate: Vec<bool>,
is_revert: Vec<bool>,
boilerplate_count: usize,
revert_count: usize,
}
fn compute_row_flags(rows: &[CommitRow]) -> RowFlags {
let boilerplate_re = compile_patterns(DEFAULT_BOILERPLATE_PATTERNS);
let mut is_boilerplate: Vec<bool> = Vec::with_capacity(rows.len());
let mut is_revert: Vec<bool> = Vec::with_capacity(rows.len());
for row in rows {
let lines = row.insertions + row.deletions;
is_boilerplate.push(self::is_boilerplate(&row.message, lines, &boilerplate_re));
is_revert.push(crate::core::revert::is_revert(&row.message));
}
let boilerplate_count = is_boilerplate.iter().filter(|b| **b).count();
let revert_count = is_revert.iter().filter(|b| **b).count();
RowFlags {
is_boilerplate,
is_revert,
boilerplate_count,
revert_count,
}
}
struct AuthorAcc {
name: String,
email: String,
commits: usize,
insertions: i64,
deletions: i64,
files_changed: i64,
categories: HashMap<String, usize>,
first: DateTime<Utc>,
last: DateTime<Utc>,
}
struct RepoAcc {
commits: usize,
authors: HashSet<String>,
insertions: i64,
deletions: i64,
categories: HashMap<String, usize>,
}
struct WeekAcc {
commits: usize,
insertions: i64,
deletions: i64,
categories: HashMap<String, usize>,
reverts: usize,
bugfixes: usize,
ticketed: usize,
}
#[derive(Default)]
struct WeekTotal {
commits: usize,
categories: HashMap<String, usize>,
developers: HashSet<String>,
}
struct Accumulators {
authors: HashMap<String, AuthorAcc>,
repos: HashMap<String, RepoAcc>,
weekly: BTreeMap<(String, String, String), WeekAcc>,
category_total: HashMap<String, usize>,
week_totals: BTreeMap<String, WeekTotal>,
dev_weeks: HashMap<String, HashSet<String>>,
dev_categories: HashMap<String, HashMap<String, usize>>,
dev_ticketed: HashMap<String, usize>,
min_ts: DateTime<Utc>,
max_ts: DateTime<Utc>,
boilerplate_count: usize,
revert_count: usize,
}
fn accumulate_rows(rows: &[CommitRow], flags: &RowFlags) -> Accumulators {
let mut min_ts = rows[0].timestamp;
let mut max_ts = rows[0].timestamp;
let mut authors: HashMap<String, AuthorAcc> = HashMap::new();
let mut repos: HashMap<String, RepoAcc> = HashMap::new();
let mut weekly: BTreeMap<(String, String, String), WeekAcc> = BTreeMap::new();
let mut category_total: HashMap<String, usize> = HashMap::new();
let mut week_totals: BTreeMap<String, WeekTotal> = BTreeMap::new();
let mut dev_weeks: HashMap<String, HashSet<String>> = HashMap::new();
let mut dev_categories: HashMap<String, HashMap<String, usize>> = HashMap::new();
let mut dev_ticketed: HashMap<String, usize> = HashMap::new();
for (idx, row) in rows.iter().enumerate() {
if row.timestamp < min_ts {
min_ts = row.timestamp;
}
if row.timestamp > max_ts {
max_ts = row.timestamp;
}
let key = row.author_email.clone();
let a = authors.entry(key).or_insert_with(|| AuthorAcc {
name: row.author_name.clone(),
email: row.author_email.clone(),
commits: 0,
insertions: 0,
deletions: 0,
files_changed: 0,
categories: HashMap::new(),
first: row.timestamp,
last: row.timestamp,
});
if row.author_name.len() > a.name.len() {
a.name = row.author_name.clone();
}
a.commits += 1;
a.insertions += row.insertions;
a.deletions += row.deletions;
a.files_changed += row.files_changed;
if row.timestamp < a.first {
a.first = row.timestamp;
}
if row.timestamp > a.last {
a.last = row.timestamp;
}
if let Some(cat) = &row.category {
*a.categories.entry(cat.clone()).or_insert(0) += 1;
}
let r = repos
.entry(row.repository.clone())
.or_insert_with(|| RepoAcc {
commits: 0,
authors: HashSet::new(),
insertions: 0,
deletions: 0,
categories: HashMap::new(),
});
r.commits += 1;
r.authors.insert(row.author_email.clone());
r.insertions += row.insertions;
r.deletions += row.deletions;
if let Some(cat) = &row.category {
*r.categories.entry(cat.clone()).or_insert(0) += 1;
}
let week = iso_week_label(&row.timestamp);
let wkey = (week, row.author_email.clone(), row.repository.clone());
let w = weekly.entry(wkey).or_insert_with(|| WeekAcc {
commits: 0,
insertions: 0,
deletions: 0,
categories: HashMap::new(),
reverts: 0,
bugfixes: 0,
ticketed: 0,
});
w.commits += 1;
w.insertions += row.insertions;
w.deletions += row.deletions;
if let Some(cat) = &row.category {
*w.categories.entry(cat.clone()).or_insert(0) += 1;
}
if flags.is_revert[idx] {
w.reverts += 1;
}
if row.category.as_deref() == Some("bugfix") {
w.bugfixes += 1;
}
if row.ticketed {
w.ticketed += 1;
}
if let Some(cat) = &row.category {
*category_total.entry(cat.clone()).or_insert(0) += 1;
}
let week_label = iso_week_label(&row.timestamp);
let wt = week_totals.entry(week_label.clone()).or_default();
wt.commits += 1;
wt.developers.insert(row.author_email.clone());
if flags.is_boilerplate[idx] {
*wt.categories.entry("boilerplate".to_string()).or_insert(0) += 1;
} else if let Some(cat) = &row.category {
*wt.categories.entry(cat.clone()).or_insert(0) += 1;
} else {
*wt.categories.entry("unclassified".to_string()).or_insert(0) += 1;
}
dev_weeks
.entry(row.author_email.clone())
.or_default()
.insert(week_label);
if let Some(cat) = &row.category {
*dev_categories
.entry(row.author_email.clone())
.or_default()
.entry(cat.clone())
.or_insert(0) += 1;
}
if row.ticketed {
*dev_ticketed.entry(row.author_email.clone()).or_insert(0) += 1;
}
}
Accumulators {
authors,
repos,
weekly,
category_total,
week_totals,
dev_weeks,
dev_categories,
dev_ticketed,
min_ts,
max_ts,
boilerplate_count: flags.boilerplate_count,
revert_count: flags.revert_count,
}
}
fn materialize_authors(authors: HashMap<String, AuthorAcc>) -> Vec<AuthorSummary> {
let mut summaries: Vec<AuthorSummary> = authors
.into_values()
.map(|a| AuthorSummary {
name: a.name,
email: a.email,
commit_count: a.commits,
insertions: a.insertions,
deletions: a.deletions,
files_changed: a.files_changed,
categories: a.categories,
first_commit: a.first.to_rfc3339(),
last_commit: a.last.to_rfc3339(),
})
.collect();
summaries.sort_by_key(|a| std::cmp::Reverse(a.commit_count));
summaries
}
fn materialize_repositories(repos: HashMap<String, RepoAcc>) -> Vec<RepositorySummary> {
let mut summaries: Vec<RepositorySummary> = repos
.into_iter()
.map(|(name, r)| {
let mut top: Vec<(String, usize)> = r.categories.into_iter().collect();
top.sort_by_key(|t| std::cmp::Reverse(t.1));
RepositorySummary {
name,
commit_count: r.commits,
author_count: r.authors.len(),
insertions: r.insertions,
deletions: r.deletions,
top_categories: top,
}
})
.collect();
summaries.sort_by_key(|r| std::cmp::Reverse(r.commit_count));
summaries
}
fn materialize_weekly_activity(
weekly: BTreeMap<(String, String, String), WeekAcc>,
email_to_name: &HashMap<String, String>,
abandoned_by_week_identity: &HashMap<(String, String), usize>,
) -> Vec<WeeklyActivity> {
weekly
.into_iter()
.map(|((week, email, repository), w)| {
let author = email_to_name.get(&email).cloned().unwrap_or(email.clone());
let (quality_score, quality_tshirt) =
crate::core::quality::score_and_tshirt(crate::core::quality::QualityInputs {
commits: w.commits,
reverts: w.reverts,
bugfixes: w.bugfixes,
ticketed: w.ticketed,
});
let abandoned_pr_count = abandoned_by_week_identity
.get(&(week.clone(), author.to_lowercase()))
.or_else(|| abandoned_by_week_identity.get(&(week.clone(), email.to_lowercase())))
.copied()
.unwrap_or(0);
WeeklyActivity {
week,
author,
repository,
commit_count: w.commits,
insertions: w.insertions,
deletions: w.deletions,
categories: w.categories,
revert_count: w.reverts,
bugfix_count: w.bugfixes,
ticketed_count: w.ticketed,
quality_score,
quality_tshirt,
abandoned_pr_count,
}
})
.collect()
}
fn build_abandoned_pr_counts(prs: &[PrRow]) -> HashMap<(String, String), usize> {
let mut out: HashMap<(String, String), usize> = HashMap::new();
for pr in prs {
if pr.state == "closed" && pr.merged_at.is_none() {
let week = iso_week_label(&pr.created_at);
*out.entry((week, pr.author.to_lowercase())).or_insert(0) += 1;
}
}
out
}
fn build_weekly_metrics(week_totals: &BTreeMap<String, WeekTotal>) -> Vec<WeeklyMetrics> {
week_totals
.iter()
.map(|(week, wt)| WeeklyMetrics {
week: week.clone(),
total_commits: wt.commits,
feature_commits: *wt.categories.get("feature").unwrap_or(&0),
bugfix_commits: *wt.categories.get("bugfix").unwrap_or(&0),
maintenance_commits: *wt.categories.get("maintenance").unwrap_or(&0),
refactor_commits: *wt.categories.get("refactor").unwrap_or(&0),
test_commits: *wt.categories.get("test").unwrap_or(&0),
doc_commits: *wt.categories.get("documentation").unwrap_or(&0)
+ *wt.categories.get("docs").unwrap_or(&0),
active_developers: wt.developers.len(),
story_points: 0.0,
})
.collect()
}
fn build_weekly_categorization(
week_totals: &BTreeMap<String, WeekTotal>,
) -> Vec<WeeklyCategorization> {
let mut rows: Vec<WeeklyCategorization> = Vec::new();
for (week, wt) in week_totals {
let total = wt.commits as f64;
let mut entries: Vec<(&String, &usize)> = wt.categories.iter().collect();
entries.sort_by_key(|e| e.0);
for (cat, count) in entries {
rows.push(WeeklyCategorization {
week: week.clone(),
change_type: cat.clone(),
commit_count: *count,
pct_of_week: if total > 0.0 {
(*count as f64) * 100.0 / total
} else {
0.0
},
});
}
}
rows
}
fn build_untracked_commits(
rows: &[CommitRow],
email_to_name: &HashMap<String, String>,
) -> Vec<UntrackedCommit> {
let mut out: Vec<UntrackedCommit> = rows
.iter()
.filter(|r| !r.ticketed && r.category.as_deref() != Some("boilerplate"))
.filter(|r| {
r.category.is_none() || r.category.as_deref() == Some("unclassified") || !r.ticketed
})
.map(|r| UntrackedCommit {
sha: r.sha.clone(),
author: email_to_name
.get(&r.author_email)
.cloned()
.unwrap_or_else(|| r.author_name.clone()),
date: r.timestamp.to_rfc3339(),
message: r.message.lines().next().unwrap_or("").to_string(),
})
.collect();
out.sort_by(|a, b| b.date.cmp(&a.date));
out
}
struct VelocityInputs {
cycle_time_avg: f64,
cycle_time_median: f64,
pr_throughput_per_week: f64,
pr_count: usize,
pr_per_week: HashMap<String, usize>,
}
fn compute_velocity_inputs(prs: &[PrRow]) -> VelocityInputs {
let mut cycle_times: Vec<f64> = prs
.iter()
.filter_map(|p| {
p.merged_at.map(|m| {
let secs = (m - p.created_at).num_seconds();
(secs as f64) / 3600.0
})
})
.filter(|h| *h >= 0.5 && *h <= 720.0)
.collect();
cycle_times.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let pr_count = cycle_times.len();
let cycle_time_avg = if pr_count == 0 {
0.0
} else {
cycle_times.iter().sum::<f64>() / pr_count as f64
};
let cycle_time_median = if pr_count == 0 {
0.0
} else {
cycle_times[pr_count / 2]
};
let mut pr_per_week: HashMap<String, usize> = HashMap::new();
for pr in prs {
if let Some(merged) = pr.merged_at {
*pr_per_week.entry(iso_week_label(&merged)).or_insert(0) += 1;
}
}
let pr_throughput_per_week = if pr_per_week.is_empty() {
0.0
} else {
pr_per_week.values().copied().sum::<usize>() as f64 / pr_per_week.len() as f64
};
VelocityInputs {
cycle_time_avg,
cycle_time_median,
pr_throughput_per_week,
pr_count,
pr_per_week,
}
}
fn build_weekly_velocity(
week_totals: &BTreeMap<String, WeekTotal>,
pr_per_week: &HashMap<String, usize>,
cycle_time_avg: f64,
) -> Vec<WeeklyVelocity> {
week_totals
.iter()
.map(|(week, wt)| {
let prs_merged = *pr_per_week.get(week).unwrap_or(&0);
let active = wt.developers.len();
let commits_per_dev = if active == 0 {
0.0
} else {
wt.commits as f64 / active as f64
};
WeeklyVelocity {
week: week.clone(),
prs_merged,
avg_pr_cycle_time_hours: cycle_time_avg,
story_points: 0.0,
commits_per_developer: commits_per_dev,
}
})
.collect()
}
fn compute_dora(
rows: &[CommitRow],
flags: &RowFlags,
category_total: &HashMap<String, usize>,
prs: &[PrRow],
cycle_time_avg: f64,
total_weeks: usize,
revert_count: usize,
) -> DoraMetrics {
let total_weeks_f = total_weeks.max(1) as f64;
let total_commits = rows.len();
let deploys = prs.iter().filter(|p| p.merged_at.is_some()).count();
let deployment_frequency = deploys as f64 / total_weeks_f;
let bugfix_total = category_total
.get("bugfix")
.copied()
.unwrap_or(0)
.max(revert_count);
let change_failure_rate = if total_commits == 0 {
0.0
} else {
bugfix_total as f64 / total_commits as f64
};
let mut bugfix_ts: Vec<DateTime<Utc>> = rows
.iter()
.zip(flags.is_revert.iter())
.filter(|(r, is_rev)| **is_rev || r.category.as_deref() == Some("bugfix"))
.map(|(r, _)| r.timestamp)
.collect();
bugfix_ts.sort();
let mttr_hours = if bugfix_ts.len() < 2 {
0.0
} else {
let mut gaps: Vec<f64> = Vec::new();
for w in bugfix_ts.windows(2) {
let secs = (w[1] - w[0]).num_seconds().abs();
gaps.push(secs as f64 / 3600.0);
}
gaps.iter().sum::<f64>() / gaps.len() as f64
};
let performance_level = dora_level(
deployment_frequency,
cycle_time_avg,
change_failure_rate,
mttr_hours,
);
DoraMetrics {
deployment_frequency,
lead_time_hours: cycle_time_avg,
change_failure_rate,
mttr_hours,
performance_level,
}
}
fn compute_quality(
total_commits: usize,
category_total: &HashMap<String, usize>,
revert_count: usize,
) -> QualitySummary {
let bugfix_total = category_total
.get("bugfix")
.copied()
.unwrap_or(0)
.max(revert_count);
let bugfix_pct = if total_commits == 0 {
0.0
} else {
bugfix_total as f64 / total_commits as f64
};
let revert_pct = if total_commits == 0 {
0.0
} else {
revert_count as f64 / total_commits as f64
};
let raw_quality = 1.0 - (bugfix_pct * 0.4) - (revert_pct * 0.6);
let quality_score = raw_quality.clamp(0.0, 1.0);
let non_bugfix = total_commits.saturating_sub(bugfix_total);
let defect_rate = if non_bugfix == 0 {
0.0
} else {
bugfix_total as f64 / non_bugfix as f64
};
QualitySummary {
quality_score,
revert_count,
revert_pct,
bugfix_pct,
defect_rate,
}
}
fn build_summary(
rows: &[CommitRow],
total_commits: usize,
total_authors: usize,
total_weeks: usize,
min_ts: DateTime<Utc>,
max_ts: DateTime<Utc>,
) -> ReportSummary {
let classified_commits = rows.iter().filter(|r| r.category.is_some()).count();
let classification_coverage_pct = if total_commits == 0 {
0.0
} else {
classified_commits as f64 * 100.0 / total_commits as f64
};
let date_range = format!("{} .. {}", min_ts.to_rfc3339(), max_ts.to_rfc3339());
ReportSummary {
date_range,
total_commits,
total_developers: total_authors,
total_weeks,
classification_coverage_pct,
}
}
fn compute_developer_activity(
authors: &[AuthorSummary],
dev_weeks: &HashMap<String, HashSet<String>>,
dev_categories: &HashMap<String, HashMap<String, usize>>,
weights: &ActivityWeights,
) -> Vec<DeveloperActivitySummary> {
if authors.is_empty() {
return Vec::new();
}
fn norm(values: &[f64], idx: usize) -> f64 {
let min = values.iter().copied().fold(f64::INFINITY, f64::min);
let max = values.iter().copied().fold(f64::NEG_INFINITY, f64::max);
if (max - min).abs() < f64::EPSILON {
0.0
} else {
(values[idx] - min) / (max - min)
}
}
let commits_v: Vec<f64> = authors.iter().map(|a| a.commit_count as f64).collect();
let impact_v: Vec<f64> = authors
.iter()
.map(|a| (a.insertions + a.deletions) as f64)
.collect();
let complexity_v: Vec<f64> = authors
.iter()
.map(|a| {
if a.commit_count == 0 {
0.0
} else {
a.files_changed as f64 / a.commit_count as f64
}
})
.collect();
let prs_v: Vec<f64> = vec![0.0; authors.len()];
let ticketing_v: Vec<f64> = authors
.iter()
.map(|a| a.categories.values().copied().sum::<usize>() as f64)
.collect();
authors
.iter()
.enumerate()
.map(|(i, a)| {
let score = weights.commits * norm(&commits_v, i)
+ weights.prs * norm(&prs_v, i)
+ weights.code_impact * norm(&impact_v, i)
+ weights.complexity * norm(&complexity_v, i)
+ weights.ticketing * norm(&ticketing_v, i);
let active_weeks = dev_weeks.get(&a.email).map(|s| s.len()).unwrap_or(0);
let avg_commits_per_week = if active_weeks == 0 {
0.0
} else {
a.commit_count as f64 / active_weeks as f64
};
let primary_work_type = dev_categories
.get(&a.email)
.and_then(|m| m.iter().max_by_key(|(_, v)| **v).map(|(k, _)| k.clone()))
.unwrap_or_else(|| "unknown".to_string());
DeveloperActivitySummary {
developer_id: a.email.clone(),
display_name: a.name.clone(),
total_commits: a.commit_count,
active_weeks,
avg_commits_per_week,
primary_work_type,
story_points_total: 0.0,
activity_score: score,
}
})
.collect()
}
fn dora_level(deploys_per_week: f64, lead_h: f64, cfr: f64, mttr_h: f64) -> String {
let elite = deploys_per_week >= 1.0 && lead_h < 1.0 && cfr < 0.15 && mttr_h < 1.0;
if elite {
return "elite".to_string();
}
let high = deploys_per_week >= 0.25 && lead_h < 168.0 && cfr < 0.30 && mttr_h < 24.0;
if high {
return "high".to_string();
}
let medium = deploys_per_week >= 0.04 && lead_h < 720.0 && cfr < 0.30 && mttr_h < 168.0;
if medium {
return "medium".to_string();
}
"low".to_string()
}
fn parse_iso_week_label(label: &str) -> Option<(i32, u32)> {
let (year_s, week_s) = label.split_once("-W")?;
let year: i32 = year_s.parse().ok()?;
let week: u32 = week_s.parse().ok()?;
Some((year, week))
}
fn check_weekly_coverage_drift(
db: &Database,
weekly_metrics: &[crate::report::models::WeeklyMetrics],
) {
if weekly_metrics.len() < 2 {
return;
}
let mut prev: Option<(String, i64)> = None;
for wm in weekly_metrics {
let (year, week) = match parse_iso_week_label(&wm.week) {
Some(v) => v,
None => continue,
};
let count = match crate::core::db::repo_count_for_week(db, year, week) {
Ok(Some(n)) => n,
_ => continue,
};
if let Some((prev_label, prev_count)) = &prev {
if *prev_count != count {
tracing::warn!(
prev_week = %prev_label,
prev_repo_count = prev_count,
week = %wm.week,
repo_count = count,
"WARNING: Week-over-week comparison may be inaccurate — W{prev} was \
collected with {n_prev} repos, W{cur} with {n_cur} repos. Re-run \
`tga collect --force --from <week-start> --to <week-end>` for the \
prior week to normalize coverage.",
prev = prev_label,
n_prev = prev_count,
cur = wm.week,
n_cur = count,
);
}
}
prev = Some((wm.week.clone(), count));
}
}
fn configured_alias_emails(config: &Config) -> HashSet<String> {
let mut out: HashSet<String> = HashSet::new();
for entries in config.developer_aliases.values() {
for e in entries {
if e.contains('@') {
out.insert(e.to_lowercase());
}
}
}
if let Some(team) = &config.team {
for m in &team.members {
if m.email.contains('@') {
out.insert(m.email.to_lowercase());
}
for a in &m.aliases {
if a.contains('@') {
out.insert(a.to_lowercase());
}
}
}
}
out
}
fn iso_week_label(ts: &DateTime<Utc>) -> String {
let iso = ts.iso_week();
format!("{}-W{:02}", iso.year(), iso.week())
}