use rusqlite::params;
use tga::core::config::{expand_path, Config};
use tga::core::db::Database;
use tga::core::effort::effort_tshirt_from_size;
use super::types::{EffortBackfillArgs, EffortRow};
pub(super) fn backfill_effort(
config: Config,
db: &mut Database,
args: EffortBackfillArgs,
repos_filter: &[String],
since: Option<&str>,
until: Option<&str>,
dry_run: bool,
) -> anyhow::Result<()> {
let repos_to_process: Vec<(std::path::PathBuf, String)> = config
.repositories
.iter()
.filter_map(|repo_cfg| {
let path = expand_path(&repo_cfg.path);
let name = repo_cfg
.name
.clone()
.or_else(|| {
path.file_name()
.and_then(|s| s.to_str())
.map(|s| s.to_string())
})
.unwrap_or_else(|| path.display().to_string());
if !repos_filter.is_empty() && !repos_filter.contains(&name) {
return None;
}
Some((path, name))
})
.collect();
if since.is_some() || until.is_some() {
tracing::info!(
since = ?since,
until = ?until,
"effort backfill: applying date window filter (--since/--until/--weeks)"
);
tracing::warn!(
"effort backfill: --since/--until/--weeks filters affect the log output only;\n\
the db-only path queries all commits for each repo via `commits JOIN files`.\n\
For precise date-scoped effort scoring use --range on the git path."
);
}
if repos_to_process.is_empty() {
println!("No matching repositories found in config.");
return Ok(());
}
let use_git_path = args.range.is_some() || args.notes;
let _ = since; let _ = until;
let mut total_scored: usize = 0;
let mut total_skipped: usize = 0;
let mut total_repos: usize = 0;
let mut size_counts = [0usize; 5];
for (repo_path, repo_name) in &repos_to_process {
let result = if use_git_path {
super::effort_git::process_one_repo_git(repo_path, repo_name, db, &args, dry_run)
.and_then(|(scored, skipped, sizes, rows)| {
if !dry_run {
persist_effort_rows(db, &rows)?;
}
Ok((scored, skipped, sizes))
})
} else {
super::effort_db::process_one_repo_db(db.connection(), repo_name, &args, dry_run)
.and_then(|(scored, skipped, sizes, rows)| {
if !dry_run {
persist_effort_rows(db, &rows)?;
}
Ok((scored, skipped, sizes))
})
};
match result {
Ok((scored, skipped, sizes)) => {
total_repos += 1;
total_scored += scored;
total_skipped += skipped;
for i in 0..5 {
size_counts[i] += sizes[i];
}
let verb = if dry_run { "would score" } else { "scored" };
println!(
" {repo_name}: {verb} {scored} commits, skipped {skipped} already-scored"
);
}
Err(e) => {
tracing::warn!(repo = %repo_name, error = %e, "backfill effort failed for repo");
println!(" {repo_name}: error — {e}");
}
}
}
let verb = if dry_run { "Would score" } else { "Scored" };
println!(
"\nBackfill complete: {total_repos} repos, {verb} {total_scored} commits \
({} skipped already-scored).",
total_skipped,
);
println!(
" Size distribution: XS={} S={} M={} L={} XL={}",
size_counts[0], size_counts[1], size_counts[2], size_counts[3], size_counts[4],
);
Ok(())
}
pub(super) fn persist_effort_rows(db: &mut Database, rows: &[EffortRow]) -> anyhow::Result<()> {
let thresholds = tga::core::effort_percentile::load_thresholds(db.connection()).unwrap_or(None);
for chunk in rows.chunks(1000) {
let conn = db.connection_mut();
let tx = conn.transaction()?;
{
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO fact_commit_effort \
(sha, repository, size, score, loc, files, test_loc, tests_factor, \
formula_version, computed_at, effort_tshirt) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
)?;
for row in chunk {
let tshirt = match &thresholds {
Some(t) => t.band_for_score(row.score),
None => effort_tshirt_from_size(&row.size),
};
stmt.execute(params![
row.sha,
row.repository,
row.size,
row.score,
row.loc as i64,
row.files as i64,
row.test_loc as i64,
row.tests_factor,
row.formula_version,
row.computed_at,
tshirt,
])?;
}
}
tx.commit()?;
}
Ok(())
}
pub(super) fn backfill_effort_tshirt(db: &mut Database, dry_run: bool) -> anyhow::Result<()> {
if dry_run {
let count: i64 = db
.connection()
.query_row("SELECT COUNT(*) FROM fact_commit_effort", [], |r| r.get(0))
.unwrap_or(0);
println!(
"Dry run — would rebin effort_tshirt (percentile) for {count} row(s) \
and persist corpus thresholds to effort_percentile_thresholds. \
No changes written."
);
return Ok(());
}
let (rows_updated, thresholds) =
tga::core::effort_percentile::rebin_all(db.connection_mut())
.map_err(|e| anyhow::anyhow!("percentile rebin failed: {e}"))?;
match thresholds {
Some(ref t) => {
println!(
"Rebinned effort_tshirt (percentile) for {rows_updated} row(s). \
Corpus thresholds persisted: p20={:.3} p40={:.3} p60={:.3} p80={:.3} \
(sample_count={}).",
t.p20, t.p40, t.p60, t.p80, t.sample_count,
);
}
None => {
println!(
"Rebinned effort_tshirt for {rows_updated} row(s) using \
static size-label mapping (corpus too small for percentile binning; \
run again after collecting more commits)."
);
}
}
Ok(())
}