use rusqlite::{params, Connection};
use tga::core::effort::{compute_effort, effort_tshirt_from_size, FORMULA_VERSION};
use super::types::{EffortBackfillArgs, EffortRow};
pub(super) fn process_one_repo_db(
conn: &Connection,
repo_name: &str,
args: &EffortBackfillArgs,
dry_run: bool,
) -> anyhow::Result<(usize, usize, [usize; 5], Vec<EffortRow>)> {
let already_scored: std::collections::HashSet<String> = if args.force {
std::collections::HashSet::new()
} else {
let mut stmt = conn.prepare("SELECT sha FROM fact_commit_effort WHERE repository = ?1")?;
let rows = stmt.query_map(params![repo_name], |row| row.get::<_, String>(0))?;
let mut set = std::collections::HashSet::new();
for r in rows {
set.insert(r?);
}
set
};
let in_db: i64 = conn
.query_row(
"SELECT COUNT(DISTINCT c.sha) FROM commits c WHERE c.repository = ?1",
params![repo_name],
|r| r.get(0),
)
.unwrap_or(0);
tracing::info!(
repo = %repo_name,
in_db = in_db,
already_scored = already_scored.len(),
"effort backfill db path: starting"
);
let mut stmt = conn.prepare(
"SELECT c.sha, f.path, f.insertions, f.deletions \
FROM commits c \
JOIN files f ON f.commit_id = c.id \
WHERE c.repository = ?1 \
ORDER BY c.timestamp ASC, c.sha ASC",
)?;
let limit = args.limit.unwrap_or(usize::MAX);
let mut records: Vec<EffortRow> = Vec::new();
let mut skipped: usize = 0;
let mut current_sha: Option<String> = None;
let mut current_files: Vec<(String, u32, u32)> = Vec::new();
let flush = |sha: &str,
files: &[(String, u32, u32)],
already_scored: &std::collections::HashSet<String>,
records: &mut Vec<EffortRow>,
skipped: &mut usize|
-> bool {
if records.len() >= limit {
return false;
}
if already_scored.contains(sha) {
*skipped += 1;
return true; }
if files.is_empty() {
tracing::warn!(
sha = %sha,
"commit has no rows in the files table; skipping effort computation"
);
return true;
}
let file_refs: Vec<(&str, u32, u32)> =
files.iter().map(|(p, i, d)| (p.as_str(), *i, *d)).collect();
let effort = compute_effort(file_refs);
let computed_at = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs() as i64)
.unwrap_or(0);
records.push(EffortRow {
sha: sha.to_string(),
repository: repo_name.to_string(),
size: effort.size_label().to_string(),
score: effort.score,
loc: effort.loc,
files: effort.files,
test_loc: effort.test_loc,
tests_factor: effort.tests_factor,
formula_version: FORMULA_VERSION.to_string(),
computed_at,
effort_tshirt: effort_tshirt_from_size(effort.size_label()),
});
if records.len().is_multiple_of(1000) {
tracing::info!(
repo = %repo_name,
processed = records.len(),
"effort backfill db path: progress"
);
}
true
};
let rows = stmt.query_map(params![repo_name], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, u32>(2)?,
row.get::<_, u32>(3)?,
))
})?;
for row_res in rows {
let (sha, path, ins, del) = row_res?;
match ¤t_sha {
None => {
current_sha = Some(sha.clone());
current_files.push((path, ins, del));
}
Some(cur) if cur == &sha => {
current_files.push((path, ins, del));
}
Some(_) => {
let prev_sha = current_sha.take().expect("just checked Some");
let should_continue = flush(
&prev_sha,
¤t_files,
&already_scored,
&mut records,
&mut skipped,
);
current_files.clear();
if !should_continue || records.len() >= limit {
break;
}
current_sha = Some(sha.clone());
current_files.push((path, ins, del));
}
}
}
if let Some(last_sha) = current_sha.take() {
if records.len() < limit {
flush(
&last_sha,
¤t_files,
&already_scored,
&mut records,
&mut skipped,
);
}
}
let mut size_counts = [0usize; 5];
for row in &records {
let idx = match row.size.as_str() {
"XS" => 0,
"S" => 1,
"M" => 2,
"L" => 3,
_ => 4, };
size_counts[idx] += 1;
}
tracing::info!(
repo = %repo_name,
in_db = in_db,
scored = records.len(),
skipped = skipped,
dry_run = dry_run,
"effort backfill db path: complete"
);
Ok((records.len(), skipped, size_counts, records))
}