tga 2.8.1

Developer productivity analytics — git commit collection, classification, and reporting
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
//! Flag backfill operations: revert, ticket, ticketed, ai-detection.
//!
//! Why: these four operations share the same pattern — scan `commits.message`,
//! compute a new value, diff against the stored value, and batch-UPDATE changed
//! rows. Grouping them here keeps the effort and misc modules focused and
//! makes the shared `build_commits_filter_sql` helper easy to find.

use rusqlite::params;
use tga::collect::ai_attribution::{detect_agentic_mode, detect_ai_tool};
use tga::collect::ticket::{extract_ticket_id, is_ticketed};
use tga::core::db::Database;

/// Build a SQL fragment and bind params for the common backfill filters.
///
/// Why: revert-flags and ticket-ids both need `WHERE` clauses for repos,
/// since, and until; extracting this avoids duplicating the SQL-building
/// logic in each function.
/// What: given a base SELECT (ending before any WHERE clause), appends
/// predicates for `repository IN (…)`, `timestamp >= ?`, `timestamp <= ?`
/// as needed, returning the assembled SQL string and bound values.
/// Test: exercised indirectly by backfill filter tests.
pub(super) fn build_commits_filter_sql(
    base_sql: &str,
    repos: &[String],
    since: Option<&str>,
    until: Option<&str>,
) -> (String, Vec<rusqlite::types::Value>) {
    use rusqlite::types::Value;
    let mut predicates: Vec<String> = Vec::new();
    let mut params: Vec<Value> = Vec::new();

    if !repos.is_empty() {
        let start = params.len() + 1;
        for r in repos {
            params.push(Value::Text(r.clone()));
        }
        let end = params.len();
        let placeholders: Vec<String> = (start..=end).map(|i| format!("?{i}")).collect();
        predicates.push(format!("repository IN ({})", placeholders.join(", ")));
    }
    if let Some(s) = since {
        params.push(Value::Text(s.to_string()));
        predicates.push(format!("timestamp >= ?{}", params.len()));
    }
    if let Some(u) = until {
        params.push(Value::Text(u.to_string()));
        predicates.push(format!("timestamp <= ?{}", params.len()));
    }

    let sql = if predicates.is_empty() {
        base_sql.to_string()
    } else {
        format!("{base_sql} WHERE {}", predicates.join(" AND "))
    };
    (sql, params)
}

/// Detect if a commit message looks like a revert.
///
/// Why: the `commits.is_revert` column written by `tga backfill is-revert`
/// must agree with the revert rate the report computes from the same commit
/// messages. Issue #377 unified both paths onto
/// [`tga::core::revert::is_revert`]; this thin wrapper preserves the
/// existing call sites while delegating to the single source of truth.
/// What: forwards to [`tga::core::revert::is_revert`], which catches
/// `Revert "..."`, `revert:`, `revert(scope):`, `^revert`, and `^fix.*revert`
/// (case-insensitive, first-line only).
/// Test: `tests::revert_detector_matches_expected_forms` in `tests.rs`, plus the
/// canonical coverage in `crate::core::revert::tests`.
pub(super) fn is_revert(message: &str) -> bool {
    tga::core::revert::is_revert(message)
}

/// Scan every commit message for revert patterns and update `is_revert`.
///
/// Why: the `is_revert` boolean must mirror the verdict produced by the
/// classification cascade so DORA queries (CFR, MTTR) can join through it.
/// What: scans `commits` (filtered by repos/since/until when supplied),
/// detects revert prefixes, and updates changed rows. Supports dry-run.
/// Test: see `tests::backfill_revert_flags_updates_only_changed_rows`.
pub(super) fn backfill_revert_flags(
    db: &mut Database,
    dry_run: bool,
    repos_filter: &[String],
    since: Option<&str>,
    until: Option<&str>,
) -> anyhow::Result<()> {
    let mut to_update: Vec<(i64, bool)> = Vec::new();
    {
        let conn = db.connection();
        // Build filtered SQL for repos/since/until.
        let (sql, params) = build_commits_filter_sql(
            "SELECT id, message, is_revert FROM commits",
            repos_filter,
            since,
            until,
        );
        let mut stmt = conn.prepare(&sql)?;
        let rows = stmt.query_map(rusqlite::params_from_iter(params.iter()), |row| {
            Ok((
                row.get::<_, i64>(0)?,
                row.get::<_, String>(1)?,
                row.get::<_, i64>(2)?,
            ))
        })?;
        for r in rows {
            let (id, message, current) = r?;
            let detected = is_revert(&message);
            let target = if detected { 1 } else { 0 };
            if target != current {
                to_update.push((id, detected));
            }
        }
    }

    if dry_run {
        println!(
            "Would update {} commits ({} would be marked as reverts). No changes written.",
            to_update.len(),
            to_update.iter().filter(|(_, v)| *v).count(),
        );
        return Ok(());
    }

    let conn = db.connection_mut();
    let tx = conn.transaction()?;
    {
        let mut up = tx.prepare("UPDATE commits SET is_revert = ?1 WHERE id = ?2")?;
        for (id, flag) in &to_update {
            up.execute(params![if *flag { 1 } else { 0 }, id])?;
        }
    }
    tx.commit()?;
    println!(
        "Updated is_revert on {} commits ({} are reverts).",
        to_update.len(),
        to_update.iter().filter(|(_, v)| *v).count(),
    );
    Ok(())
}

/// Scan every commit message, extract the first ticket reference, and
/// update `ticket_id` + `ticketed`.
///
/// Why: ticket extraction patterns evolve; backfilling lets operators
/// update the DB after extending patterns without re-collecting.
/// What: scans `commits` (filtered by repos/since/until when supplied),
/// extracts ticket IDs, and updates changed rows.
/// Test: see `tests::backfill_ticket_ids_populates_ticket_id`.
pub(super) fn backfill_ticket_ids(
    db: &mut Database,
    dry_run: bool,
    repos_filter: &[String],
    since: Option<&str>,
    until: Option<&str>,
) -> anyhow::Result<()> {
    let mut to_update: Vec<(i64, Option<String>, i64)> = Vec::new();
    {
        let conn = db.connection();
        let (sql, params) = build_commits_filter_sql(
            "SELECT id, message, ticket_id, ticketed FROM commits",
            repos_filter,
            since,
            until,
        );
        let mut stmt = conn.prepare(&sql)?;
        let rows = stmt.query_map(rusqlite::params_from_iter(params.iter()), |row| {
            Ok((
                row.get::<_, i64>(0)?,
                row.get::<_, String>(1)?,
                row.get::<_, Option<String>>(2)?,
                row.get::<_, i64>(3)?,
            ))
        })?;
        for r in rows {
            let (id, message, current_id, current_ticketed) = r?;
            let extracted = extract_ticket_id(&message);
            let ticketed = if is_ticketed(&message) { 1 } else { 0 };
            if extracted != current_id || ticketed != current_ticketed {
                to_update.push((id, extracted, ticketed));
            }
        }
    }

    if dry_run {
        let with_id = to_update.iter().filter(|(_, id, _)| id.is_some()).count();
        println!(
            "Would update {} commits ({} would gain a ticket_id). No changes written.",
            to_update.len(),
            with_id,
        );
        return Ok(());
    }

    let conn = db.connection_mut();
    let tx = conn.transaction()?;
    {
        let mut up =
            tx.prepare("UPDATE commits SET ticket_id = ?1, ticketed = ?2 WHERE id = ?3")?;
        for (id, ticket, ticketed) in &to_update {
            up.execute(params![ticket, ticketed, id])?;
        }
    }
    tx.commit()?;
    let with_id = to_update.iter().filter(|(_, id, _)| id.is_some()).count();
    println!(
        "Updated {} commits ({} now have a ticket_id).",
        to_update.len(),
        with_id,
    );
    Ok(())
}

/// Recompute `commits.ticketed` using the corrected `is_ticketed` logic.
///
/// Why: before issue #445 the `gh_bare` pattern (`#N` preceded by whitespace)
/// was included in [`is_ticketed`], inflating the ticketed rate to ~100%.
/// After the fix, bare `#N` no longer marks a commit as ticketed. This
/// backfill lets operators correct existing rows without re-collecting.
/// What: loads every commit (filtered by repos/since/until), recomputes
/// `ticketed` from `commits.message` using the fixed `is_ticketed`, and
/// updates rows whose stored value differs. No LLM required — pure regex.
/// Test: `tests::backfill_ticketed_corrects_bare_hash_rows`.
///
/// # Errors
///
/// Propagates database errors from the underlying queries.
pub(super) fn backfill_ticketed(
    db: &mut Database,
    dry_run: bool,
    repos_filter: &[String],
    since: Option<&str>,
    until: Option<&str>,
) -> anyhow::Result<()> {
    let mut to_update: Vec<(i64, i64)> = Vec::new();
    {
        let conn = db.connection();
        let (sql, params) = build_commits_filter_sql(
            "SELECT id, message, ticketed FROM commits",
            repos_filter,
            since,
            until,
        );
        let mut stmt = conn.prepare(&sql)?;
        let rows = stmt.query_map(rusqlite::params_from_iter(params.iter()), |row| {
            Ok((
                row.get::<_, i64>(0)?,
                row.get::<_, String>(1)?,
                row.get::<_, i64>(2)?,
            ))
        })?;
        for r in rows {
            let (id, message, current) = r?;
            let new_val = if is_ticketed(&message) { 1 } else { 0 };
            if new_val != current {
                to_update.push((id, new_val));
            }
        }
    }

    let now_ticketed = to_update.iter().filter(|(_, v)| *v == 1).count();
    let now_unticketed = to_update.iter().filter(|(_, v)| *v == 0).count();

    if dry_run {
        println!(
            "Dry run — would update {} commits \
             ({} newly ticketed, {} newly unticketed). No changes written.",
            to_update.len(),
            now_ticketed,
            now_unticketed,
        );
        return Ok(());
    }

    let conn = db.connection_mut();
    let tx = conn.transaction()?;
    {
        let mut up = tx.prepare("UPDATE commits SET ticketed = ?1 WHERE id = ?2")?;
        for (id, val) in &to_update {
            up.execute(params![val, id])?;
        }
    }
    tx.commit()?;
    println!(
        "Updated ticketed on {} commits \
         ({} newly ticketed, {} newly unticketed).",
        to_update.len(),
        now_ticketed,
        now_unticketed,
    );
    Ok(())
}

/// Mark every commit whose existing classification was produced by the LLM
/// tier with a confidence below 0.7 as needing re-classification.
///
/// Why: clearing `classification_id` on low-confidence LLM verdicts enables the
/// next `tga classify` run to reprocess them with updated models or rules.
/// What: sets `classification_id = NULL, confidence = NULL` on commits that
/// match `method='llm' AND confidence < 0.7`. Supports dry-run.
/// Test: implicit via the classification pipeline integration tests.
pub(super) fn backfill_ai_detection(db: &mut Database, dry_run: bool) -> anyhow::Result<()> {
    let conn = db.connection();
    // Count first.
    let count: i64 = conn
        .query_row(
            "SELECT COUNT(*) FROM commits c \
             JOIN classifications cl ON c.classification_id = cl.id \
             WHERE cl.method = 'llm' AND COALESCE(c.confidence, cl.confidence) < 0.7",
            [],
            |row| row.get(0),
        )
        .unwrap_or(0);

    if dry_run {
        println!(
            "Would re-classify {count} commits (method='llm', confidence<0.7). No changes written."
        );
        return Ok(());
    }

    let conn = db.connection_mut();
    let tx = conn.transaction()?;
    let n = tx.execute(
        "UPDATE commits SET classification_id = NULL, confidence = NULL \
         WHERE classification_id IN ( \
             SELECT id FROM classifications WHERE method = 'llm' \
         ) AND COALESCE(confidence, 0.0) < 0.7",
        [],
    )?;
    tx.commit()?;
    println!(
        "Cleared classification on {n} commits — next `tga classify` run will reprocess them."
    );
    Ok(())
}

/// Scan existing `commits.message` for AI co-authorship trailers.
///
/// Why: `is_ai_assisted` and `ai_tool` columns were added in migration v17 and
/// the canonical `agentic_mode` column in v21 (issue #1113); existing rows have
/// `is_ai_assisted = 0`, `ai_tool = NULL`, and `agentic_mode = 'none'`
/// regardless of their actual history. Before issue #1334 this repair path only
/// rewrote `is_ai_assisted` / `ai_tool`, leaving historical `agentic_mode` stuck
/// at `'none'` — so downstream consumers querying `agentic_mode = 'full_agentic'`
/// missed every backfilled Claude Code commit. This backfill now retroactively
/// detects Claude, GitHub Copilot, and Cursor via `Co-Authored-By:` trailers AND
/// recomputes `agentic_mode`, exactly as the forward `tga collect` path does.
/// What: loads every commit (filtered by repos/since/until), runs
/// [`detect_ai_tool`] and [`detect_agentic_mode`] on the message, and updates
/// rows where `ai_tool` OR `agentic_mode` differs from the stored value. No LLM
/// required — pure string matching, identical to the extractor's INSERT path.
/// Test: `tests::backfill_ai_detection_commits_detects_claude` (ai_tool) and
/// `tests::backfill_ai_detection_commits_repairs_agentic_mode` (agentic_mode).
///
/// # Errors
///
/// Propagates database errors from the underlying queries.
pub(super) fn backfill_ai_detection_commits(
    db: &mut Database,
    dry_run: bool,
    repos_filter: &[String],
    since: Option<&str>,
    until: Option<&str>,
) -> anyhow::Result<()> {
    // (id, is_ai, ai_tool, agentic_mode) — agentic_mode is the forward-path
    // string ("none" | "ide_assisted" | "full_agentic") via `AgenticMode::as_str`.
    let mut to_update: Vec<(i64, i64, Option<&'static str>, &'static str)> = Vec::new();
    {
        let conn = db.connection();
        let (sql, params) = build_commits_filter_sql(
            "SELECT id, message, ai_tool, agentic_mode FROM commits",
            repos_filter,
            since,
            until,
        );
        let mut stmt = conn.prepare(&sql)?;
        let rows: Vec<(i64, String, Option<String>, String)> = stmt
            .query_map(rusqlite::params_from_iter(params.iter()), |row| {
                Ok((
                    row.get::<_, i64>(0)?,
                    row.get::<_, String>(1)?,
                    row.get::<_, Option<String>>(2)?,
                    // Pre-v21 rows default to 'none'; COALESCE guards any NULLs.
                    row.get::<_, Option<String>>(3)?
                        .unwrap_or_else(|| "none".to_string()),
                ))
            })?
            .collect::<Result<_, _>>()?;

        for (id, message, current_tool, current_mode) in rows {
            let detected = detect_ai_tool(&message);
            let mode = detect_agentic_mode(&message).as_str();
            let tool_changed = detected != current_tool.as_deref();
            let mode_changed = mode != current_mode;
            if tool_changed || mode_changed {
                let is_ai = if detected.is_some() { 1_i64 } else { 0_i64 };
                to_update.push((id, is_ai, detected, mode));
            }
        }
    }

    let with_tool = to_update.iter().filter(|(_, _, t, _)| t.is_some()).count();

    if dry_run {
        println!(
            "Dry run — would update {} commits ({} with AI tool detected). No changes written.",
            to_update.len(),
            with_tool,
        );
        return Ok(());
    }

    let conn = db.connection_mut();
    let tx = conn.transaction()?;
    {
        let mut up = tx.prepare(
            "UPDATE commits SET is_ai_assisted = ?1, ai_tool = ?2, agentic_mode = ?3 \
             WHERE id = ?4",
        )?;
        for (id, is_ai, tool, mode) in &to_update {
            up.execute(params![is_ai, tool, mode, id])?;
        }
    }
    tx.commit()?;
    println!(
        "Updated {} commits ({} AI-assisted, {} cleared).",
        to_update.len(),
        with_tool,
        to_update.len() - with_tool,
    );
    Ok(())
}