tga 2.8.1

Developer productivity analytics — git commit collection, classification, and reporting
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
//! Single-pass row accumulation and materialisation phases for the
//! aggregator.
//!
//! Why: `Aggregator::aggregate` reads as a recipe of named phases; the row
//! scan, its accumulator structs, and the per-slice materialisers live here
//! so the orchestrator (`mod.rs`) stays within the SLOC cap.
//! What: houses `RowFlags`/`compute_row_flags`, the `*Acc` accumulator
//! structs, `accumulate_rows`, and every `materialize_*` / `build_*` helper
//! that turns accumulator state into `ReportData` slices.
//! Test: exercised end-to-end by the `aggregator_*` cases in `report::tests`.

use std::collections::{BTreeMap, HashMap, HashSet};

use chrono::{DateTime, Datelike, Utc};

use crate::collect::ai_attribution::AgenticMode;
use crate::report::models::{
    AuthorSummary, RepositorySummary, UntrackedCommit, WeeklyActivity, WeeklyCategorization,
    WeeklyMetrics,
};

use super::{compile_patterns, is_boilerplate, CommitRow, PrRow, DEFAULT_BOILERPLATE_PATTERNS};

/// Pre-pass boilerplate / revert flags per row.
///
/// Why: every later phase (DORA bugfix counting, weekly-categorization
/// boilerplate bucketing) needs these bits, and recomputing per phase
/// would scan the row vector multiple times.
/// What: bundles a parallel `is_boilerplate` / `is_revert` `Vec<bool>`
/// indexed by row position, plus the aggregate counts.
/// Test: behavior preserved — the same `is_boilerplate` / `is_revert`
/// helpers run inline previously.
pub(super) struct RowFlags {
    pub(super) is_boilerplate: Vec<bool>,
    pub(super) is_revert: Vec<bool>,
    pub(super) boilerplate_count: usize,
    pub(super) revert_count: usize,
}

/// Why: keep flag computation in one named place so the main aggregate
/// function reads as a recipe of phases.
/// What: compiles the default regex sets once, walks the rows, and returns
/// a [`RowFlags`] capturing both per-row bits and aggregate counts.
/// Test: indirectly via report tests; identical to the inline loop that
/// existed in `aggregate` before this refactor.
pub(super) fn compute_row_flags(rows: &[CommitRow]) -> RowFlags {
    let boilerplate_re = compile_patterns(DEFAULT_BOILERPLATE_PATTERNS);

    let mut is_boilerplate: Vec<bool> = Vec::with_capacity(rows.len());
    let mut is_revert: Vec<bool> = Vec::with_capacity(rows.len());
    for row in rows {
        let lines = row.insertions + row.deletions;
        is_boilerplate.push(self::is_boilerplate(&row.message, lines, &boilerplate_re));
        // Issue #377: route revert detection through the shared core helper so
        // the report-time revert rate matches the persisted `is_revert` column.
        is_revert.push(crate::core::revert::is_revert(&row.message));
    }
    let boilerplate_count = is_boilerplate.iter().filter(|b| **b).count();
    let revert_count = is_revert.iter().filter(|b| **b).count();
    RowFlags {
        is_boilerplate,
        is_revert,
        boilerplate_count,
        revert_count,
    }
}

/// Per-author running totals during accumulation.
pub(super) struct AuthorAcc {
    pub(super) name: String,
    pub(super) email: String,
    pub(super) commits: usize,
    pub(super) insertions: i64,
    pub(super) deletions: i64,
    pub(super) files_changed: i64,
    pub(super) categories: HashMap<String, usize>,
    pub(super) first: DateTime<Utc>,
    pub(super) last: DateTime<Utc>,
}

/// Per-repository running totals during accumulation.
pub(super) struct RepoAcc {
    pub(super) commits: usize,
    pub(super) authors: HashSet<String>,
    pub(super) insertions: i64,
    pub(super) deletions: i64,
    pub(super) categories: HashMap<String, usize>,
}

/// Per-(week, author, repo) running totals during accumulation.
pub(super) struct WeekAcc {
    pub(super) commits: usize,
    pub(super) insertions: i64,
    pub(super) deletions: i64,
    pub(super) categories: HashMap<String, usize>,
    /// Revert commits in this bucket (issue #377 quality metric).
    pub(super) reverts: usize,
    /// Bugfix-classified commits in this bucket (issue #377).
    pub(super) bugfixes: usize,
    /// Ticketed commits in this bucket (issue #377).
    pub(super) ticketed: usize,
    /// AI-assisted commits in this bucket (issue #445: `is_ai_assisted=1`).
    pub(super) ai_assisted: usize,
    /// Running sum of non-null complexity scores for this bucket (issue #445
    /// batch B, request #6). Used with `complexity_count` to compute the
    /// mean at materialisation time. Only LLM-classified commits contribute.
    pub(super) complexity_sum: i64,
    /// Number of commits in this bucket with a non-null complexity score.
    pub(super) complexity_count: usize,
    /// Full-agentic commits (issue #1113: `agentic_mode = 'full_agentic'`).
    pub(super) agentic_count: usize,
    /// IDE-assisted commits (issue #1113: `agentic_mode = 'ide_assisted'`).
    pub(super) ide_assisted_count: usize,
}

/// Cross-developer per-week running totals during accumulation.
#[derive(Default)]
pub(super) struct WeekTotal {
    pub(super) commits: usize,
    pub(super) categories: HashMap<String, usize>,
    pub(super) developers: HashSet<String>,
}

/// Bundle of accumulator state that the single-pass scan produces.
///
/// Why: the row scan computes many parallel histograms at once; returning
/// them as a single struct keeps the orchestration in `aggregate` readable.
/// What: groups author / repo / weekly buckets and per-developer trackers
/// alongside the period bounds and aggregate counts.
/// Test: see `Aggregator::build` tests which exercise the full pipeline.
pub(super) struct Accumulators {
    pub(super) authors: HashMap<String, AuthorAcc>,
    pub(super) repos: HashMap<String, RepoAcc>,
    pub(super) weekly: BTreeMap<(String, String, String), WeekAcc>,
    pub(super) category_total: HashMap<String, usize>,
    pub(super) week_totals: BTreeMap<String, WeekTotal>,
    pub(super) dev_weeks: HashMap<String, HashSet<String>>,
    pub(super) dev_categories: HashMap<String, HashMap<String, usize>>,
    pub(super) dev_ticketed: HashMap<String, usize>,
    pub(super) min_ts: DateTime<Utc>,
    pub(super) max_ts: DateTime<Utc>,
    pub(super) boilerplate_count: usize,
    pub(super) revert_count: usize,
}

/// Why: the row scan touches a dozen parallel histograms; isolating it in a
/// named function lets the aggregator orchestration read as a sequence of
/// phases.
/// What: runs one pass over `rows`, updating the per-author / per-repo /
/// per-week / per-developer accumulators in lockstep. Caller is `aggregate`.
/// Test: indirectly via the `aggregator_*` tests in `report::tests`; this
/// is a literal lift of the inline loop that lived in `aggregate`.
pub(super) fn accumulate_rows(rows: &[CommitRow], flags: &RowFlags) -> Accumulators {
    // Period bounds initialised to the first row's timestamp.
    let mut min_ts = rows[0].timestamp;
    let mut max_ts = rows[0].timestamp;

    let mut authors: HashMap<String, AuthorAcc> = HashMap::new();
    let mut repos: HashMap<String, RepoAcc> = HashMap::new();
    let mut weekly: BTreeMap<(String, String, String), WeekAcc> = BTreeMap::new();
    let mut category_total: HashMap<String, usize> = HashMap::new();
    let mut week_totals: BTreeMap<String, WeekTotal> = BTreeMap::new();
    let mut dev_weeks: HashMap<String, HashSet<String>> = HashMap::new();
    let mut dev_categories: HashMap<String, HashMap<String, usize>> = HashMap::new();
    let mut dev_ticketed: HashMap<String, usize> = HashMap::new();

    for (idx, row) in rows.iter().enumerate() {
        if row.timestamp < min_ts {
            min_ts = row.timestamp;
        }
        if row.timestamp > max_ts {
            max_ts = row.timestamp;
        }

        // Authors. Group by email only; pick the longest display name seen
        // as the canonical name (heuristic: longer names tend to be the full
        // "Firstname Lastname" form rather than a short login handle).
        let key = row.author_email.clone();
        let a = authors.entry(key).or_insert_with(|| AuthorAcc {
            name: row.author_name.clone(),
            email: row.author_email.clone(),
            commits: 0,
            insertions: 0,
            deletions: 0,
            files_changed: 0,
            categories: HashMap::new(),
            first: row.timestamp,
            last: row.timestamp,
        });
        if row.author_name.len() > a.name.len() {
            a.name = row.author_name.clone();
        }
        a.commits += 1;
        a.insertions += row.insertions;
        a.deletions += row.deletions;
        a.files_changed += row.files_changed;
        if row.timestamp < a.first {
            a.first = row.timestamp;
        }
        if row.timestamp > a.last {
            a.last = row.timestamp;
        }
        if let Some(cat) = &row.category {
            *a.categories.entry(cat.clone()).or_insert(0) += 1;
        }

        // Repositories.
        let r = repos
            .entry(row.repository.clone())
            .or_insert_with(|| RepoAcc {
                commits: 0,
                authors: HashSet::new(),
                insertions: 0,
                deletions: 0,
                categories: HashMap::new(),
            });
        r.commits += 1;
        r.authors.insert(row.author_email.clone());
        r.insertions += row.insertions;
        r.deletions += row.deletions;
        if let Some(cat) = &row.category {
            *r.categories.entry(cat.clone()).or_insert(0) += 1;
        }

        // Weekly. Keyed by email (not display name) so that the same identity
        // committing under multiple names lands in a single weekly bucket.
        let week = iso_week_label(&row.timestamp);
        let wkey = (week, row.author_email.clone(), row.repository.clone());
        let w = weekly.entry(wkey).or_insert_with(|| WeekAcc {
            commits: 0,
            insertions: 0,
            deletions: 0,
            categories: HashMap::new(),
            reverts: 0,
            bugfixes: 0,
            ticketed: 0,
            ai_assisted: 0,
            complexity_sum: 0,
            complexity_count: 0,
            agentic_count: 0,
            ide_assisted_count: 0,
        });
        w.commits += 1;
        w.insertions += row.insertions;
        w.deletions += row.deletions;
        if let Some(cat) = &row.category {
            *w.categories.entry(cat.clone()).or_insert(0) += 1;
        }
        // Issue #377: per-(week, engineer, repo) quality signals. `is_revert`
        // is the shared-helper verdict computed in `compute_row_flags`;
        // `bugfix` comes from the classifier category; `ticketed` from the
        // commit's ticket-reference flag.
        if flags.is_revert[idx] {
            w.reverts += 1;
        }
        if row.category.as_deref() == Some("bugfix") {
            w.bugfixes += 1;
        }
        if row.ticketed {
            w.ticketed += 1;
        }
        // Issue #445: count AI-assisted commits per (week, engineer, repo) bucket
        // so the weekly activity report can surface AI-adoption rates.
        if row.is_ai_assisted {
            w.ai_assisted += 1;
        }
        // Issue #1113: count agentic/IDE-assisted commits per bucket.
        match row.agentic_mode {
            AgenticMode::FullAgentic => w.agentic_count += 1,
            AgenticMode::IdeAssisted => w.ide_assisted_count += 1,
            AgenticMode::None => {}
        }
        // Issue #445 batch B (request #6): accumulate complexity sum so
        // materialize_weekly_activity can compute avg_complexity without a
        // second pass. Only non-null values (LLM-classified commits) contribute.
        if let Some(c) = row.complexity {
            w.complexity_sum += c;
            w.complexity_count += 1;
        }

        // Category totals.
        if let Some(cat) = &row.category {
            *category_total.entry(cat.clone()).or_insert(0) += 1;
        }

        // Cross-developer weekly totals.
        let week_label = iso_week_label(&row.timestamp);
        let wt = week_totals.entry(week_label.clone()).or_default();
        wt.commits += 1;
        wt.developers.insert(row.author_email.clone());
        // Treat boilerplate rows as a synthetic category so they show
        // up in `weekly_categorization.csv` rather than being silently
        // bucketed into whatever the classifier returned.
        if flags.is_boilerplate[idx] {
            *wt.categories.entry("boilerplate".to_string()).or_insert(0) += 1;
        } else if let Some(cat) = &row.category {
            *wt.categories.entry(cat.clone()).or_insert(0) += 1;
        } else {
            *wt.categories.entry("unclassified".to_string()).or_insert(0) += 1;
        }

        // Per-developer week / category / ticketed tracking.
        dev_weeks
            .entry(row.author_email.clone())
            .or_default()
            .insert(week_label);
        if let Some(cat) = &row.category {
            *dev_categories
                .entry(row.author_email.clone())
                .or_default()
                .entry(cat.clone())
                .or_insert(0) += 1;
        }
        if row.ticketed {
            *dev_ticketed.entry(row.author_email.clone()).or_insert(0) += 1;
        }
    }

    Accumulators {
        authors,
        repos,
        weekly,
        category_total,
        week_totals,
        dev_weeks,
        dev_categories,
        dev_ticketed,
        min_ts,
        max_ts,
        boilerplate_count: flags.boilerplate_count,
        revert_count: flags.revert_count,
    }
}

/// Why: report consumers expect authors sorted by commit count with the
/// canonical (longest-seen) display name.
/// What: drains the author accumulator into [`AuthorSummary`] rows and
/// sorts them by descending commit count.
/// Test: indirectly via `aggregator_builds_report_data`.
pub(super) fn materialize_authors(authors: HashMap<String, AuthorAcc>) -> Vec<AuthorSummary> {
    let mut summaries: Vec<AuthorSummary> = authors
        .into_values()
        .map(|a| AuthorSummary {
            name: a.name,
            email: a.email,
            commit_count: a.commits,
            insertions: a.insertions,
            deletions: a.deletions,
            files_changed: a.files_changed,
            categories: a.categories,
            first_commit: a.first.to_rfc3339(),
            last_commit: a.last.to_rfc3339(),
        })
        .collect();
    summaries.sort_by_key(|a| std::cmp::Reverse(a.commit_count));
    summaries
}

/// Why: per-repo rows in reports include the top categories for the repo,
/// sorted by frequency, so reviewers can see at a glance what work
/// dominates each codebase.
/// What: drains the repo accumulator into [`RepositorySummary`] with the
/// top-categories vector sorted descending by count; the outer Vec is
/// sorted by descending repo commit count.
/// Test: indirectly via `aggregator_builds_report_data`.
pub(super) fn materialize_repositories(repos: HashMap<String, RepoAcc>) -> Vec<RepositorySummary> {
    let mut summaries: Vec<RepositorySummary> = repos
        .into_iter()
        .map(|(name, r)| {
            let mut top: Vec<(String, usize)> = r.categories.into_iter().collect();
            top.sort_by_key(|t| std::cmp::Reverse(t.1));
            RepositorySummary {
                name,
                commit_count: r.commits,
                author_count: r.authors.len(),
                insertions: r.insertions,
                deletions: r.deletions,
                top_categories: top,
            }
        })
        .collect();
    summaries.sort_by_key(|r| std::cmp::Reverse(r.commit_count));
    summaries
}

/// Why: the weekly bucket key uses email, but reports want canonical
/// display names so a single identity reads the same across the report.
/// What: drains the weekly map into [`WeeklyActivity`] rows, resolving each
/// row's email to its canonical display name via the `email_to_name` lookup
/// built from the already-materialised author summaries.
/// Test: indirectly via `aggregator_builds_report_data` (two weekly rows
/// for two authors in different weeks).
pub(super) fn materialize_weekly_activity(
    weekly: BTreeMap<(String, String, String), WeekAcc>,
    email_to_name: &HashMap<String, String>,
    abandoned_by_week_identity: &HashMap<(String, String), usize>,
) -> Vec<WeeklyActivity> {
    weekly
        .into_iter()
        .map(|((week, email, repository), w)| {
            let author = email_to_name.get(&email).cloned().unwrap_or(email.clone());
            // Issue #377 quality score for this (week, engineer, repo) bucket.
            let (quality_score, quality_tshirt) =
                crate::core::quality::score_and_tshirt(crate::core::quality::QualityInputs {
                    commits: w.commits,
                    reverts: w.reverts,
                    bugfixes: w.bugfixes,
                    ticketed: w.ticketed,
                });
            // Best-effort abandoned-PR attribution: match the PR author login
            // against either the resolved display name or the email
            // (case-insensitive). See `build_abandoned_pr_counts` for why this
            // is heuristic. Repository is not part of the PR identity key, so
            // a week's abandoned PRs land on the engineer's first repo bucket
            // for that week — counted once via the `.remove`-style guard would
            // require mutation; instead we look up by (week, identity) and
            // accept that an engineer active in multiple repos in one week
            // sees the same abandoned count echoed per repo row. Downstream
            // joins on (week, author) so this is acceptable and documented.
            let abandoned_pr_count = abandoned_by_week_identity
                .get(&(week.clone(), author.to_lowercase()))
                .or_else(|| abandoned_by_week_identity.get(&(week.clone(), email.to_lowercase())))
                .copied()
                .unwrap_or(0);
            // Issue #445 batch B (request #6): compute the mean complexity for
            // this bucket from the running sum. Returns None when no commit has
            // a non-null complexity score (all-null → None, not 0.0, so
            // downstream consumers can distinguish "no data" from "scored 0").
            let avg_complexity = if w.complexity_count > 0 {
                Some(w.complexity_sum as f64 / w.complexity_count as f64)
            } else {
                None
            };
            WeeklyActivity {
                week,
                author,
                repository,
                commit_count: w.commits,
                insertions: w.insertions,
                deletions: w.deletions,
                categories: w.categories,
                revert_count: w.reverts,
                bugfix_count: w.bugfixes,
                ticketed_count: w.ticketed,
                quality_score,
                quality_tshirt,
                abandoned_pr_count,
                // Issue #445: AI-assisted commits in this (week, engineer, repo) bucket.
                ai_assisted_count: w.ai_assisted,
                avg_complexity,
                // Issue #1113: agentic-mode commit counts.
                agentic_count: w.agentic_count,
                ide_assisted_count: w.ide_assisted_count,
            }
        })
        .collect()
}

/// Build a `(iso_week, author_identity_lowercased) → abandoned_pr_count` map.
///
/// Why: closed-but-unmerged PRs are a strong quality signal that today is
/// impossible to compute downstream (issue #377). Counting them per engineer
/// per week lets reports surface the abandoned-PR rate.
/// What: filters `prs` to `state == "closed" && merged_at.is_none()`, buckets
/// each by the ISO week of its `created_at` (abandoned PRs have no merge/close
/// timestamp available, so creation week is the only stable anchor), and keys
/// the count by the lowercased author login.
///
/// Limitation: the PR `author` is a provider login (e.g. a GitHub handle),
/// NOT a canonical engineer email. TGA has no login→engineer mapping at
/// aggregation time, so attribution in [`materialize_weekly_activity`] is a
/// best-effort case-insensitive match of the login against the engineer's
/// display name or email. When a login matches neither, the abandoned PR is
/// counted here but cannot be attributed to a weekly-activity row and is
/// effectively dropped from the per-engineer column. A future change that
/// persists a login→author_id mapping would make this exact.
/// Test: `aggregator_counts_abandoned_prs` in `report::tests`.
pub(super) fn build_abandoned_pr_counts(prs: &[PrRow]) -> HashMap<(String, String), usize> {
    let mut out: HashMap<(String, String), usize> = HashMap::new();
    for pr in prs {
        if pr.state == "closed" && pr.merged_at.is_none() {
            let week = iso_week_label(&pr.created_at);
            *out.entry((week, pr.author.to_lowercase())).or_insert(0) += 1;
        }
    }
    out
}

/// Why: weekly metrics are the cross-developer roll-up used for trend
/// charts; bucketing per category keeps the schema fixed regardless of
/// which categories appeared in the data.
/// What: walks the week-totals map and emits one [`WeeklyMetrics`] row per
/// ISO week with named bucket counters (feature / bugfix / maintenance /
/// refactor / test / docs).
/// Test: indirectly via `aggregator_builds_report_data` (asserts the
/// weekly_metrics vector is populated).
pub(super) fn build_weekly_metrics(
    week_totals: &BTreeMap<String, WeekTotal>,
) -> Vec<WeeklyMetrics> {
    week_totals
        .iter()
        .map(|(week, wt)| WeeklyMetrics {
            week: week.clone(),
            total_commits: wt.commits,
            feature_commits: *wt.categories.get("feature").unwrap_or(&0),
            bugfix_commits: *wt.categories.get("bugfix").unwrap_or(&0),
            maintenance_commits: *wt.categories.get("maintenance").unwrap_or(&0),
            refactor_commits: *wt.categories.get("refactor").unwrap_or(&0),
            test_commits: *wt.categories.get("test").unwrap_or(&0),
            doc_commits: *wt.categories.get("documentation").unwrap_or(&0)
                + *wt.categories.get("docs").unwrap_or(&0),
            active_developers: wt.developers.len(),
            story_points: 0.0,
        })
        .collect()
}

/// Why: the `weekly_categorization.csv` report needs one row per
/// (week, change-type) with the percentage share, so consumers can build
/// stacked-bar charts of "what work happened this week".
/// What: iterates the week-totals map and emits one row per category seen
/// in each week, sorted by category name for deterministic output.
/// Test: covered by `csv_formatter_writes_new_report_files` which writes
/// the weekly_categorization CSV.
pub(super) fn build_weekly_categorization(
    week_totals: &BTreeMap<String, WeekTotal>,
) -> Vec<WeeklyCategorization> {
    let mut rows: Vec<WeeklyCategorization> = Vec::new();
    for (week, wt) in week_totals {
        let total = wt.commits as f64;
        let mut entries: Vec<(&String, &usize)> = wt.categories.iter().collect();
        entries.sort_by_key(|e| e.0);
        for (cat, count) in entries {
            rows.push(WeeklyCategorization {
                week: week.clone(),
                change_type: cat.clone(),
                commit_count: *count,
                pct_of_week: if total > 0.0 {
                    (*count as f64) * 100.0 / total
                } else {
                    0.0
                },
            });
        }
    }
    rows
}

/// Why: untracked-commit rows surface commits without a ticket reference so
/// PMs can chase down missing trackable work.
/// What: filters `rows` to those that are unticketed and not boilerplate,
/// resolves each row's author email to its canonical display name, and
/// emits rows sorted newest-first.
/// Test: covered indirectly via `csv_formatter_writes_new_report_files`
/// (writes the `untracked.csv` file from this data).
pub(super) fn build_untracked_commits(
    rows: &[CommitRow],
    email_to_name: &HashMap<String, String>,
) -> Vec<UntrackedCommit> {
    let mut out: Vec<UntrackedCommit> = rows
        .iter()
        .filter(|r| !r.ticketed && r.category.as_deref() != Some("boilerplate"))
        .filter(|r| {
            // Treat NULL category OR explicit "unclassified" as untracked.
            r.category.is_none() || r.category.as_deref() == Some("unclassified") || !r.ticketed
        })
        .map(|r| UntrackedCommit {
            sha: r.sha.clone(),
            author: email_to_name
                .get(&r.author_email)
                .cloned()
                .unwrap_or_else(|| r.author_name.clone()),
            date: r.timestamp.to_rfc3339(),
            message: r.message.lines().next().unwrap_or("").to_string(),
        })
        .collect();
    // Deterministic ordering: newest first.
    out.sort_by(|a, b| b.date.cmp(&a.date));
    out
}

/// Format an ISO week label such as `"2024-W03"` from a UTC timestamp.
///
/// Why: weekly buckets are keyed by a stable lexically-sortable string so
/// BTreeMap iteration yields chronological output without an extra sort.
/// What: returns `YYYY-W{:02}` from the timestamp's ISO week.
/// Test: exercised by every aggregator test (all weekly buckets use this).
pub(super) fn iso_week_label(ts: &DateTime<Utc>) -> String {
    let iso = ts.iso_week();
    format!("{}-W{:02}", iso.year(), iso.week())
}