tga 2.2.0

Developer productivity analytics — git commit collection, classification, and reporting
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
//! Empirical effort scoring for git commits.
//!
//! Implements the v1 formula shared between `tga backfill effort` (Rust) and
//! the parallel `scripts/compute-effort.sh` (bash) so both storage paths —
//! `fact_commit_effort` rows and `Effort:` git trailers — produce identical
//! T-shirt sizes.
//!
//! # Formula (v1)
//!
//! ```text
//! tests_factor = 1 - 0.3 * min(test_LoC / max(LoC, 1), 1)
//! score        = α·log₂(LoC+1) + β·log₂(files+1) + δ·tests_factor
//! ```
//!
//! Coefficients:  α=1.0, β=1.5, γ=0.0 (deferred), δ=1.0
//!
//! T-shirt thresholds (calibrated against 99 trusty-tools commits; see PR #308):
//! | Size | Score   |
//! |------|---------|
//! | XS   | ≤ 6     |
//! | S    | (6,10]  |
//! | M    | (10,14] |
//! | L    | (14,18] |
//! | XL   | > 18    |
//!
//! # Test-LoC detection (path globs, case-insensitive)
//!
//! - `**/tests/**`
//! - `**/*_test.rs`, `**/test_*.rs`
//! - `**/*.spec.*`, `**/*.test.*`
//! - `**/__tests__/**`

/// Formula version string — baked into every persisted row.
///
/// Why: lets callers distinguish scores computed with different coefficient
/// sets when the formula changes (e.g., v2 that adds cyclomatic complexity).
/// What: static string "v1" for the current coefficient set.
/// Test: used as a literal in `EffortRecord`; equality checked in unit tests.
pub const FORMULA_VERSION: &str = "v1";

/// v1 formula coefficients.
const ALPHA: f64 = 1.0; // LoC weight
const BETA: f64 = 1.5; // file-count weight
const DELTA: f64 = 1.0; // tests-factor weight

/// T-shirt size boundaries (calibrated against 99 trusty-tools commits; see PR #308).
///
/// These must be kept in sync with the identical constants in
/// `scripts/compute-effort.sh` (`XS_MAX`, `S_MAX`, `M_MAX`, `L_MAX`).
const THRESHOLD_XS: f64 = 6.0;
const THRESHOLD_S: f64 = 10.0;
const THRESHOLD_M: f64 = 14.0;
const THRESHOLD_L: f64 = 18.0;

/// T-shirt effort size.
///
/// Why: a human-readable bucketing of the continuous effort score that matches
/// the bucket used in the bash compute script and commit-effort trailers.
/// What: one of XS, S, M, L, XL corresponding to the score thresholds above.
/// Test: [`EffortResult::size_label`] and [`size_for_score`].
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EffortSize {
    /// Extra-small: score ≤ 6.0
    Xs,
    /// Small: 6.0 < score ≤ 10.0
    S,
    /// Medium: 10.0 < score ≤ 14.0
    M,
    /// Large: 14.0 < score ≤ 18.0
    L,
    /// Extra-large: score > 18.0
    Xl,
}

impl EffortSize {
    /// Return the canonical text label stored in `fact_commit_effort.size`.
    ///
    /// Why: matches the string stored by the bash script so JOIN queries and
    /// reports can group by size uniformly.
    /// What: `"XS"` | `"S"` | `"M"` | `"L"` | `"XL"`.
    /// Test: checked by [`tests::size_labels_match_spec`].
    pub fn label(self) -> &'static str {
        match self {
            EffortSize::Xs => "XS",
            EffortSize::S => "S",
            EffortSize::M => "M",
            EffortSize::L => "L",
            EffortSize::Xl => "XL",
        }
    }
}

/// Computed effort result for a single commit.
///
/// Why: bundles all derived values so callers (the backfill command, tests)
/// can inspect individual components without recomputing.
/// What: holds the raw measurements, intermediate `tests_factor`, the
/// continuous `score`, and the discrete `size`.
/// Test: produced by [`compute_effort`]; checked extensively in unit tests.
#[derive(Debug, Clone)]
pub struct EffortResult {
    /// Insertions + deletions across all changed files.
    pub loc: u32,
    /// Number of changed files.
    pub files: u32,
    /// Lines changed in files identified as test files.
    pub test_loc: u32,
    /// Computed tests factor: `1.0 - 0.3 * min(test_loc / max(loc, 1), 1.0)`.
    pub tests_factor: f64,
    /// Continuous effort score.
    pub score: f64,
    /// Bucketed T-shirt size derived from `score`.
    pub size: EffortSize,
}

impl EffortResult {
    /// Convenience accessor that returns the string label of `size`.
    ///
    /// Why: avoids a match at every call site that needs the persisted label.
    /// What: delegates to [`EffortSize::label`].
    /// Test: indirectly covered by all tests that check the label output.
    pub fn size_label(&self) -> &'static str {
        self.size.label()
    }
}

/// Classify a continuous score into a T-shirt size bucket.
///
/// Why: the bash script uses the same thresholds; keeping them in one place
/// makes it trivial to verify parity.
/// What: `score ≤ 6 → XS`, `(6,10] → S`, `(10,14] → M`, `(14,18] → L`, `>18 → XL`.
/// Test: [`tests::thresholds_match_spec`].
pub fn size_for_score(score: f64) -> EffortSize {
    if score <= THRESHOLD_XS {
        EffortSize::Xs
    } else if score <= THRESHOLD_S {
        EffortSize::S
    } else if score <= THRESHOLD_M {
        EffortSize::M
    } else if score <= THRESHOLD_L {
        EffortSize::L
    } else {
        EffortSize::Xl
    }
}

/// Return `true` if `path` matches any of the test-file heuristics.
///
/// Why: isolating test LoC from production LoC lets the formula reward commits
/// that include test coverage, lowering their effective effort score.
/// What: checks the path (case-insensitively) against a fixed set of patterns
/// that cover Rust, TypeScript, JavaScript, and generic `tests/` directories.
/// Test: [`tests::test_file_detection`].
///
/// Patterns (all case-insensitive):
/// - Contains `/tests/` or starts with `tests/`
/// - Ends with `_test.rs`
/// - Filename starts with `test_` (e.g., `test_foo.rs`)
/// - Ends with `.spec.<ext>` (e.g., `.spec.ts`, `.spec.js`)
/// - Ends with `.test.<ext>` (e.g., `.test.ts`, `.test.js`)
/// - Contains `/__tests__/` or starts with `__tests__/`
pub fn is_test_file(path: &str) -> bool {
    let lower = path.to_lowercase();

    // /tests/ directory segment or tests/ at root
    if lower.contains("/tests/") || lower.starts_with("tests/") || lower == "tests" {
        return true;
    }

    // __tests__ directory (Jest convention)
    if lower.contains("/__tests__/") || lower.starts_with("__tests__/") {
        return true;
    }

    // Extract the filename portion for suffix/prefix checks.
    let filename = lower.rsplit('/').next().unwrap_or(&lower);

    // Rust: foo_test.rs
    if filename.ends_with("_test.rs") {
        return true;
    }

    // Rust: test_foo.rs
    if filename.starts_with("test_") {
        return true;
    }

    // .spec.<ext>  and  .test.<ext>  (TypeScript/JavaScript)
    //   e.g. "auth.spec.ts", "auth.test.js"
    // We check for the pattern by splitting on `.` and looking for
    // "spec" or "test" as the second-to-last segment.
    let parts: Vec<&str> = filename.split('.').collect();
    if parts.len() >= 3 {
        let discriminator = parts[parts.len() - 2];
        if discriminator == "spec" || discriminator == "test" {
            return true;
        }
    }

    false
}

/// Core effort computation for a single commit.
///
/// Why: the formula must be deterministic, pure, and identical to the bash
/// compute script so both storage paths produce the same T-shirt size.
/// What: given per-file diff records (path + insertions + deletions), computes
/// LoC, test_LoC, tests_factor, the v1 score, and the bucketed size.
/// Test: [`tests::formula_known_values`], [`tests::formula_empty_commit`],
/// and the cross-validation integration test in `tests::effort_cross_validate`
/// (marked `#[ignore]` until the parallel bash PR lands).
///
/// # Arguments
///
/// * `files` — iterator of `(path, insertions, deletions)` tuples; one entry
///   per changed file in the commit.
///
/// # Returns
///
/// An [`EffortResult`] with all intermediate values populated.
pub fn compute_effort<'a, I>(files: I) -> EffortResult
where
    I: IntoIterator<Item = (&'a str, u32, u32)>,
{
    let mut loc: u32 = 0;
    let mut test_loc: u32 = 0;
    let mut file_count: u32 = 0;

    for (path, ins, del) in files {
        let file_lines = ins.saturating_add(del);
        loc = loc.saturating_add(file_lines);
        file_count = file_count.saturating_add(1);
        if is_test_file(path) {
            test_loc = test_loc.saturating_add(file_lines);
        }
    }

    // tests_factor = 1 - 0.3 * min(test_LoC / max(LoC, 1), 1)
    let loc_f = loc as f64;
    let test_loc_f = test_loc as f64;
    let ratio = (test_loc_f / loc_f.max(1.0)).min(1.0);
    let tests_factor = 1.0 - 0.3 * ratio;

    // score = α·log₂(LoC+1) + β·log₂(files+1) + δ·tests_factor
    let score = ALPHA * (loc_f + 1.0).log2()
        + BETA * (file_count as f64 + 1.0).log2()
        + DELTA * tests_factor;

    let size = size_for_score(score);

    EffortResult {
        loc,
        files: file_count,
        test_loc,
        tests_factor,
        score,
        size,
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Why: guard that T-shirt labels match the spec and the bash script.
    /// What: asserts every variant returns the canonical string.
    /// Test: this test itself.
    #[test]
    fn size_labels_match_spec() {
        assert_eq!(EffortSize::Xs.label(), "XS");
        assert_eq!(EffortSize::S.label(), "S");
        assert_eq!(EffortSize::M.label(), "M");
        assert_eq!(EffortSize::L.label(), "L");
        assert_eq!(EffortSize::Xl.label(), "XL");
    }

    /// Why: the threshold boundaries must match the spec exactly (PR #308 calibration).
    /// What: probes at and immediately above each boundary value.
    /// Test: this test itself.
    #[test]
    fn thresholds_match_spec() {
        // Exact boundaries are inclusive at the upper bound.
        assert_eq!(size_for_score(6.0), EffortSize::Xs);
        assert_eq!(size_for_score(6.01), EffortSize::S);
        assert_eq!(size_for_score(10.0), EffortSize::S);
        assert_eq!(size_for_score(10.01), EffortSize::M);
        assert_eq!(size_for_score(14.0), EffortSize::M);
        assert_eq!(size_for_score(14.01), EffortSize::L);
        assert_eq!(size_for_score(18.0), EffortSize::L);
        assert_eq!(size_for_score(18.01), EffortSize::Xl);
        assert_eq!(size_for_score(0.0), EffortSize::Xs);
        assert_eq!(size_for_score(100.0), EffortSize::Xl);
    }

    /// Why: ensure the file-detection heuristics cover all specified patterns.
    /// What: positive and negative cases for each pattern family.
    /// Test: this test itself.
    #[test]
    fn test_file_detection() {
        // /tests/ directory segment
        assert!(is_test_file("src/tests/auth_tests.rs"));
        assert!(is_test_file("tests/integration.rs"));

        // __tests__ Jest convention
        assert!(is_test_file("src/__tests__/foo.test.ts"));
        assert!(is_test_file("__tests__/auth.js"));

        // Rust _test.rs suffix
        assert!(is_test_file("src/auth_test.rs"));

        // Rust test_ prefix
        assert!(is_test_file("src/test_auth.rs"));

        // .spec.<ext>
        assert!(is_test_file("src/auth.spec.ts"));
        assert!(is_test_file("src/auth.spec.js"));

        // .test.<ext>
        assert!(is_test_file("src/auth.test.ts"));
        assert!(is_test_file("src/auth.test.js"));

        // Non-test files
        assert!(!is_test_file("src/main.rs"));
        assert!(!is_test_file("src/auth.rs"));
        assert!(!is_test_file("Cargo.toml"));
        assert!(!is_test_file("src/context.ts"));
        assert!(!is_test_file("docs/testing.md"));

        // Testable but not a test file — contains "test" in name but wrong pattern
        assert!(!is_test_file("src/attestation.rs"));
    }

    /// Why: verify the formula against hand-computed reference values.
    /// What: a small (2-file, 50-line) commit with no test files.
    /// Test: this test itself.
    #[test]
    fn formula_known_values() {
        // 2 files, 30 insertions + 20 deletions = 50 LoC, 0 test LoC
        // tests_factor = 1 - 0.3 * 0 = 1.0
        // score = 1.0*log2(51) + 1.5*log2(3) + 1.0*1.0
        //       ≈ 5.672 + 2.378 + 1.0 = 9.05
        // With PR #308 calibrated thresholds: 9.05 ∈ (6,10] → S
        let result = compute_effort([("src/auth.rs", 25, 15), ("src/config.rs", 5, 5)]);
        assert_eq!(result.loc, 50);
        assert_eq!(result.files, 2);
        assert_eq!(result.test_loc, 0);
        assert!((result.tests_factor - 1.0).abs() < 1e-9);

        let expected_score = 1.0 * 51_f64.log2() + 1.5 * 3_f64.log2() + 1.0 * 1.0;
        assert!((result.score - expected_score).abs() < 1e-9);
        assert_eq!(result.size, EffortSize::S);
    }

    /// Why: a commit that is entirely test code should have a reduced score
    /// (tests_factor < 1.0), making it appear smaller than an equivalent
    /// production-code change.
    /// What: 20 lines in a test file; tests_factor = 1 - 0.3 = 0.7.
    /// Test: this test itself.
    #[test]
    fn formula_all_test_code_reduces_score() {
        let result = compute_effort([("src/auth_test.rs", 10, 10)]);
        assert_eq!(result.loc, 20);
        assert_eq!(result.test_loc, 20);
        // ratio = 1.0, tests_factor = 1 - 0.3 = 0.7
        assert!((result.tests_factor - 0.7).abs() < 1e-9);
        // score = 1.0*log2(21) + 1.5*log2(2) + 1.0*0.7
        let expected = 1.0 * 21_f64.log2() + 1.5 * 2_f64.log2() + 0.7;
        assert!((result.score - expected).abs() < 1e-9);
    }

    /// Why: empty commit (root or merge with no diff) must not panic.
    /// What: score = α*log2(1) + β*log2(1) + δ*1.0 = 0 + 0 + 1 = 1.0 → XS.
    /// Test: this test itself.
    #[test]
    fn formula_empty_commit() {
        let result = compute_effort(std::iter::empty::<(&str, u32, u32)>());
        assert_eq!(result.loc, 0);
        assert_eq!(result.files, 0);
        assert_eq!(result.test_loc, 0);
        assert!((result.tests_factor - 1.0).abs() < 1e-9);
        assert!((result.score - 1.0).abs() < 1e-9);
        assert_eq!(result.size, EffortSize::Xs);
    }

    /// Why: guard that very large commits (72k LoC) do not overflow u32 or
    /// produce NaN/infinity in the f64 calculation.
    /// What: 40k insertions + 32k deletions in a single file.
    /// Test: this test itself.
    #[test]
    fn formula_large_commit_does_not_overflow() {
        let result = compute_effort([("src/generated.rs", 40_000, 32_000)]);
        assert_eq!(result.loc, 72_000);
        assert!(result.score.is_finite());
        assert_eq!(result.size, EffortSize::Xl);
    }

    /// Cross-validation placeholder.
    ///
    /// Why: ensures Rust formula output matches bash script output exactly once
    /// both sides have landed.  Marked `#[ignore]` so it does not break CI
    /// before `scripts/compute-effort.sh` from the parallel PR is merged.
    /// What: will build a temp git repo, run `compute_effort` on each commit,
    /// shell out to `scripts/compute-effort.sh`, and assert score within ±0.01.
    /// Test: run manually with `cargo test -p tga -- --include-ignored cross_validate`.
    #[ignore]
    #[test]
    fn effort_cross_validate() {
        // TODO: implement once scripts/compute-effort.sh has landed from the
        // parallel PR (feat/commit-effort-scope).
        //
        // Steps:
        //  1. Create a tempdir git repo.
        //  2. Make 5-10 commits of varying sizes using git2.
        //  3. For each commit:
        //     a. Run `compute_effort` from Rust.
        //     b. Run `scripts/compute-effort.sh <sha> <repo>` via std::process::Command.
        //     c. Assert size labels are equal.
        //     d. Assert scores are within ±0.01 (float rounding tolerance).
    }
}