use tga::classify::taxonomy::TaxonomyRegistry;
use tga::classify::ClassificationPipeline;
use tga::collect::git::scan_and_persist;
use tga::core::config::{expand_path, Config};
use tga::core::db::{CheckpointMode, Database};
use tga::report::aggregator::Aggregator;
use super::types::ComplexityBackfillArgs;
pub(super) async fn backfill_complexity(
config: Config,
db: &mut Database,
args: ComplexityBackfillArgs,
dry_run: bool,
) -> anyhow::Result<()> {
if dry_run {
let candidates: i64 = db
.connection()
.query_row(
"SELECT COUNT(*) FROM classifications \
WHERE complexity IS NULL AND method != 'exact_rule'",
[],
|row| row.get(0),
)
.unwrap_or(0);
println!(
"Dry run — would request complexity scores for {candidates} classification(s) \
(complexity IS NULL, method != 'exact_rule'). No changes written."
);
return Ok(());
}
let mut cfg = config;
if args.use_llm {
let classification = cfg
.classification
.get_or_insert_with(tga::core::config::ClassificationConfig::default);
classification.use_llm = true;
}
let pipeline = ClassificationPipeline::new(cfg);
let updated = pipeline.backfill_complexity(db).await?;
println!("Backfilled complexity for {updated} commit(s)");
if let Err(e) = db.wal_checkpoint(CheckpointMode::Truncate) {
tracing::warn!(error = %e, "WAL TRUNCATE checkpoint failed after complexity backfill");
}
Ok(())
}
pub(super) fn backfill_reachability(
config: Config,
db: &mut Database,
repos_filter: &[String],
dry_run: bool,
) -> anyhow::Result<()> {
if dry_run {
println!(
"Dry run — would re-run reachability scan for {} repo(s). No changes written.",
if repos_filter.is_empty() {
config.repositories.len()
} else {
repos_filter.len()
}
);
return Ok(());
}
let reach_cfg = &config.reachability;
let conn = db.connection();
let mut total_repos = 0usize;
let mut total_rows = 0usize;
let mut total_default_branch = 0usize;
let mut errors: Vec<String> = Vec::new();
for repo_cfg in &config.repositories {
let path = expand_path(&repo_cfg.path);
let name = repo_cfg
.name
.clone()
.or_else(|| {
path.file_name()
.and_then(|s| s.to_str())
.map(|s| s.to_string())
})
.unwrap_or_else(|| path.display().to_string());
if !repos_filter.is_empty() && !repos_filter.contains(&name) {
continue;
}
total_repos += 1;
tracing::info!(repo = %name, "backfill reachability scan");
match scan_and_persist(&path, conn, reach_cfg, Some(&name)) {
Ok(stats) => {
println!(
" {name}: {} rows upserted \
({} on default branch, {} tagged, {} on release branch)",
stats.rows_upserted,
stats.default_branch_commits,
stats.tagged_commits,
stats.release_branch_commits,
);
total_rows += stats.rows_upserted;
total_default_branch += stats.default_branch_commits;
}
Err(e) => {
let msg = format!(" {name}: reachability scan failed: {e}");
tracing::warn!("{msg}");
errors.push(msg.clone());
println!("{msg}");
}
}
}
println!(
"\nBackfill complete: {total_repos} repos, {total_rows} rows upserted, \
{total_default_branch} commits on default branch."
);
if !errors.is_empty() {
println!("{} repo(s) had errors (see warnings above).", errors.len());
}
Ok(())
}
pub(super) fn backfill_top_level(db: &mut Database, dry_run: bool) -> anyhow::Result<()> {
use rusqlite::params;
let registry = TaxonomyRegistry::with_builtins();
let mut to_update: Vec<(i64, String)> = Vec::new();
{
let conn = db.connection();
let mut stmt = conn.prepare(
"SELECT id, subcategory FROM classifications WHERE top_level_category IS NULL",
)?;
let rows: Vec<(i64, Option<String>)> = stmt
.query_map([], |row| {
Ok((row.get::<_, i64>(0)?, row.get::<_, Option<String>>(1)?))
})?
.collect::<Result<_, _>>()?;
for (id, subcategory) in rows {
if let Some(sub) = subcategory {
if let Some(top) = registry.resolve(&sub) {
to_update.push((id, top.as_str_snake().to_string()));
}
}
}
}
if dry_run {
println!(
"Dry run — would update top_level_category for {} classification(s). \
No changes written.",
to_update.len(),
);
return Ok(());
}
let conn = db.connection_mut();
let tx = conn.transaction()?;
{
let mut up =
tx.prepare("UPDATE classifications SET top_level_category = ?1 WHERE id = ?2")?;
for (id, top) in &to_update {
up.execute(params![top, id])?;
}
}
tx.commit()?;
println!(
"Updated top_level_category for {} classification(s).",
to_update.len()
);
Ok(())
}
pub(super) fn backfill_quality(db: &mut Database, dry_run: bool) -> anyhow::Result<()> {
let config = tga::core::config::Config::default();
if dry_run {
let candidate: i64 = db
.connection()
.query_row(
"SELECT COUNT(*) FROM ( \
SELECT DISTINCT \
COALESCE(NULLIF(a.canonical_email, ''), c.author_email) AS ae, \
CAST(strftime('%Y', c.timestamp) AS INTEGER) AS yr, \
CAST(strftime('%W', c.timestamp) AS INTEGER) AS wk, \
c.repository \
FROM commits c \
LEFT JOIN authors a ON a.id = c.author_id \
)",
[],
|r| r.get(0),
)
.unwrap_or(0);
println!(
"Dry run — would write approximately {candidate} quality row(s) \
to fact_weekly_quality. No changes written."
);
return Ok(());
}
let data =
Aggregator::build(db, &config).map_err(|e| anyhow::anyhow!("aggregation failed: {e}"))?;
let written = Aggregator::persist_weekly_quality(db, &data)
.map_err(|e| anyhow::anyhow!("quality persist failed: {e}"))?;
println!("Backfilled fact_weekly_quality: {written} row(s) written (UPSERT semantics).");
if let Err(e) = db.wal_checkpoint(tga::core::db::CheckpointMode::Truncate) {
tracing::warn!(error = %e, "WAL TRUNCATE checkpoint failed after quality backfill");
}
Ok(())
}