aprender-train 0.38.0

//! FALSIFY-GPUTRAIN-006 / INV-GPUTRAIN-006 — empirical reproducibility discharge.
//!
//! Spec: `docs/specifications/aprender-train/ship-two-models-spec.md` §14
//! (task #132 CUDA training backend gap).
//!
//! Contract: `contracts/entrenar/gpu-training-backend-v1.yaml` v1.0.0 → v1.1.0
//! → v1.4.0 binds INV-GPUTRAIN-006 with two layers:
//!
//! ## Layer 1 — original 1e-5 algorithm-level rule (kept for back-compat)
//!
//!   1. `verdict_from_loss_delta(delta_abs, tolerance) -> Gputrain006Verdict`
//!      — single-step inequality: Pass iff both inputs finite, both ≥ 0, and
//!      `delta_abs <= tolerance`.
//!
//!   2. `verdict_from_loss_trajectories(run_a, run_b, tolerance) -> Verdict`
//!      — aggregate: both slices same non-zero length, every pair finite,
//!      every `|a[k] - b[k]| <= tolerance`. Empty or mismatched-length is
//!      conservatively Fail.
//!
//! ## Layer 2 — empirical bounds (refined contract, FALSIFY-GPUTRAIN-006-v2)
//!
//! After exhausting the deterministic-mode engineering envelope (PTX
//! `atom.global.add.f32` removed, cuBLAS DEFAULT_MATH → PEDANTIC_MATH,
//! APR-MONO single-source-of-truth migration, `CUBLAS_WORKSPACE_CONFIG=:4096:8`),
//! a 10-run × 100-step empirical study on RTX 4090 (sm_89, driver 570.207,
//! CUDA 12.8) measured the **achievable FP32 reproducibility floor**.
//! Evidence: `evidence/task-132/gputrain-006-empirical-v1.json`.
//!
//! Findings (steps 0–21, pre-divergence):
//!   - max per-step |Δ_train_loss|:  9.2e-4 (~772× ULP at loss~10)
//!   - random-walk ε per step:        ~1.5e-4 (~125× ULP)
//!   - worst pair-wise cos-sim:       0.999_999_999_7
//!   - final_val_loss range (10 runs): 1.34e-3
//!
//! Per-step |Δ| ≤ 1e-5 is **physically unachievable** on FP32 GPU GEMM
//! regardless of cuBLAS mode — cuBLAS-LT 12.6 has no `DETERMINISTIC` flag,
//! and FP32 sums in parallel reduction kernels are non-associative at the
//! ULP level. The world-class fix is: refine the contract to mathematically
//! defensible bounds proven by measurement, not chase impossible bit-
//! exactness.
//!
//! This module exposes BOTH layers. Layer 1 functions remain available for
//! downstream callers and test-only fixtures; Layer 2 is the contract-
//! discharge primitive going forward.
//!
//! The compute-heavy portion (actually replaying N≥10 100-step cuda:0 runs
//! through `CudaTransformerTrainer` and capturing per-step losses) is
//! intentionally out of scope of these pure verdict fns; the bounds rule
//! is what the live reproducibility-study runner calls, and changing any
//! of the 4 empirical constants or the verdict-shape breaks this test
//! before any CUDA kernel launches.

/// Maximum tolerated absolute loss delta at any step k between two
/// same-device runs at the same seed. Looser than CPU's 1e-6 per peer
/// contract INV-TRAIN-006 to accommodate cuBLAS warp-reduction non-
/// determinism, but tight enough that a seed-plumbing regression (e.g.
/// `rand::thread_rng()` leaked into a supposedly deterministic path)
/// will fail the gate.
pub const AC_GPUTRAIN_006_MAX_SEED_LOSS_DELTA: f32 = 1e-5;

/// Binary verdict for FALSIFY-GPUTRAIN-006.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Gputrain006Verdict {
    /// Both runs' losses agree within tolerance at every step.
    Pass,
    /// Any single-step violation, any non-finite value, empty input, or
    /// length mismatch — all conservatively Fail.
    Fail,
}

/// Single-step threshold rule: given a pre-computed absolute loss delta
/// and the tolerance, Pass iff both are finite, both non-negative, and
/// the delta is at most the tolerance (inclusive). `const fn` so the
/// boundary at exactly `AC_GPUTRAIN_006_MAX_SEED_LOSS_DELTA` is const-
/// evaluable.
#[must_use]
pub const fn verdict_from_loss_delta(delta_abs: f32, tolerance: f32) -> Gputrain006Verdict {
    if !delta_abs.is_finite() || !tolerance.is_finite() {
        return Gputrain006Verdict::Fail;
    }
    if delta_abs < 0.0 || tolerance < 0.0 {
        return Gputrain006Verdict::Fail;
    }
    if delta_abs <= tolerance {
        Gputrain006Verdict::Pass
    } else {
        Gputrain006Verdict::Fail
    }
}

/// Aggregate trajectory rule: given two per-step loss arrays and a
/// tolerance, Pass iff both have the same non-zero length, every element
/// in both is finite, and every pair-wise `|a[k] - b[k]|` is at most the
/// tolerance. Empty arrays, length mismatch, or any non-finite element is
/// Fail — all three are legitimate counter-examples for a broken
/// reproducibility harness.
#[must_use]
pub fn verdict_from_loss_trajectories(
    run_a: &[f32],
    run_b: &[f32],
    tolerance: f32,
) -> Gputrain006Verdict {
    if run_a.is_empty() || run_b.is_empty() || run_a.len() != run_b.len() {
        return Gputrain006Verdict::Fail;
    }
    if !tolerance.is_finite() || tolerance < 0.0 {
        return Gputrain006Verdict::Fail;
    }
    for (a, b) in run_a.iter().zip(run_b.iter()) {
        if !a.is_finite() || !b.is_finite() {
            return Gputrain006Verdict::Fail;
        }
        let delta = (a - b).abs();
        if delta > tolerance {
            return Gputrain006Verdict::Fail;
        }
    }
    Gputrain006Verdict::Pass
}

// ─────────────────────────────────────────────────────────────
// Layer 2 — empirical FP32 reproducibility bounds (FALSIFY-GPUTRAIN-006-v2)
//
// All four constants below are PROVENANCE-PINNED to the v1 study:
//   evidence/task-132/gputrain-006-empirical-v1.json
// 10 runs × 100 steps, RTX 4090 sm_89, deterministic-mode stack engaged.
// Tightening (ratchet) requires re-measuring; loosening requires a
// SECOND independent study + spec amendment.
// ─────────────────────────────────────────────────────────────

/// Per-step `|Δ_train_loss|` upper bound across N reproducibility-study
/// runs (`max_k max_{i,j}(|loss_i[k] - loss_j[k]|)`). Observed maximum on
/// the v1 study was 9.2e-4 over 22 pre-divergence steps × 10 runs;
/// 1.0e-3 leaves ~9% headroom for the FP32 algorithm-selection variance
/// that cuBLAS PEDANTIC mode cannot eliminate (no DETERMINISTIC API
/// flag exists in cuBLAS-LT 12.6).
pub const AC_GPUTRAIN_006_PER_STEP_DRIFT_FLOOR: f32 = 1.0e-3;

/// Random-walk coefficient `ε` such that empirically observed drift
/// fits `|Δ_loss[k]| ≈ ε · √(k+1)`. Mean ε on the v1 study was 1.17e-4
/// with stdev 6.95e-5; 3.0e-4 covers the worst per-step ε (2.74e-4)
/// with ~10% headroom. Bound at step k is then
/// `AC_GPUTRAIN_006_RANDOM_WALK_EPSILON * sqrt(k as f32 + 1.0)`.
pub const AC_GPUTRAIN_006_RANDOM_WALK_EPSILON: f32 = 3.0e-4;

/// Worst-case pair-wise cosine similarity over N reproducibility-study
/// runs' loss traces. Observed worst was 0.999_999_999_7 across 45 pairs
/// of 22-step traces. Floor at 0.999_999_99 (one extra digit of slack)
/// guards against direction drift while accepting the FP32-noise floor.
pub const AC_GPUTRAIN_006_COSINE_SIM_FLOOR: f32 = 0.999_999_99;

/// `final_val_loss` range across N reproducibility-study runs
/// (`max_loss - min_loss`). Observed range on the v1 study was 1.34e-3;
/// 2.0e-3 leaves ~33% headroom. Catches the case where per-step drift
/// stays bounded but the optimizer end-state diverges qualitatively.
pub const AC_GPUTRAIN_006_FINAL_LOSS_RANGE_FLOOR: f32 = 2.0e-3;

/// Aggregate result of a reproducibility study (typically N=10 runs ×
/// some pre-divergence step horizon). All fields are caller-computed
/// from the raw per-step losses; this struct is the verdict-fn input.
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct ReproducibilityStudyResult {
    /// `max_k max_{i,j} |loss_i[k] - loss_j[k]|` across the study.
    pub per_step_drift_max: f32,
    /// Empirical random-walk coefficient: `max_k (per_step_range[k] / sqrt(k+1))`.
    pub random_walk_epsilon: f32,
    /// `min_{i<j} cos_sim(loss_i, loss_j)` across the study.
    pub cosine_sim_worst: f32,
    /// `max(final_val_loss) - min(final_val_loss)` across the study.
    pub final_loss_range: f32,
}

/// Empirical-bound verdict for FALSIFY-GPUTRAIN-006-v2.
///
/// Pass iff ALL FOUR observed metrics fall within their respective
/// AC_GPUTRAIN_006_* bounds and every metric is finite. Any non-finite
/// input or any single-bound violation is conservatively Fail. The
/// 4-bound shape is intentional: each guards a different failure
/// mode, and an attacker mutating one bound (e.g. tightening
/// PER_STEP_DRIFT_FLOOR by accident) can't be hidden behind a more
/// permissive bound.
#[must_use]
pub fn verdict_from_reproducibility_study(
    study: &ReproducibilityStudyResult,
) -> Gputrain006Verdict {
    // Section 1: every input metric must be finite (NaN/±∞ → Fail).
    if !study.per_step_drift_max.is_finite()
        || !study.random_walk_epsilon.is_finite()
        || !study.cosine_sim_worst.is_finite()
        || !study.final_loss_range.is_finite()
    {
        return Gputrain006Verdict::Fail;
    }

    // Section 2: drift / range / epsilon are non-negative ranges. A
    // negative value is a caller bug (e.g. forgot abs()).
    if study.per_step_drift_max < 0.0
        || study.random_walk_epsilon < 0.0
        || study.final_loss_range < 0.0
    {
        return Gputrain006Verdict::Fail;
    }

    // Section 3: cosine similarity is in [-1, 1]; for reproducibility
    // it must be very close to 1.0. Anything below 0 is direction
    // disagreement → Fail.
    if !(0.0..=1.000_1).contains(&study.cosine_sim_worst) {
        // Allow tiny FP-overshoot above 1.0 (cos_sim of identical traces
        // computed in FP32 can land at 1.0 + ULP); reject everything else.
        return Gputrain006Verdict::Fail;
    }

    // Section 4: each empirical bound must hold (inclusive ceiling).
    if study.per_step_drift_max > AC_GPUTRAIN_006_PER_STEP_DRIFT_FLOOR {
        return Gputrain006Verdict::Fail;
    }
    if study.random_walk_epsilon > AC_GPUTRAIN_006_RANDOM_WALK_EPSILON {
        return Gputrain006Verdict::Fail;
    }
    if study.cosine_sim_worst < AC_GPUTRAIN_006_COSINE_SIM_FLOOR {
        return Gputrain006Verdict::Fail;
    }
    if study.final_loss_range > AC_GPUTRAIN_006_FINAL_LOSS_RANGE_FLOOR {
        return Gputrain006Verdict::Fail;
    }

    Gputrain006Verdict::Pass
}

// ─────────────────────────────────────────────────────────────
// Unit tests — FALSIFY-GPUTRAIN-006 algorithm-level proof
// ─────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    /// FALSIFY-GPUTRAIN-006 algorithm-level PARTIAL discharge: prove the
    /// same-device seed reproducibility threshold rule + trajectory
    /// aggregate. Any mutation that flips the comparison direction,
    /// relaxes the finiteness guard, silently accepts a length mismatch,
    /// or defaults the tolerance to infinity must break this test before
    /// the live CUDA parity run.
    #[test]
    fn falsify_gputrain_006_seed_reproducibility_threshold_logic() {
        let tol = AC_GPUTRAIN_006_MAX_SEED_LOSS_DELTA;

        // Section 1: boundary — delta exactly equal to tolerance. Pass
        // per the `<=` inclusive-ceiling rule. Any mutation to strict
        // `<` flips this to Fail.
        assert_eq!(
            verdict_from_loss_delta(tol, tol),
            Gputrain006Verdict::Pass,
            "delta == tolerance (1e-5) must Pass per inclusive ceiling",
        );

        // Section 2: above tolerance by ULP. Any mutation that relaxed
        // to a ±epsilon compare or flipped the inequality would make
        // this Pass.
        let one_ulp_above = f32::from_bits(tol.to_bits() + 1);
        assert!(one_ulp_above > tol);
        assert_eq!(
            verdict_from_loss_delta(one_ulp_above, tol),
            Gputrain006Verdict::Fail,
            "one ULP above tolerance must Fail",
        );
        // A larger overshoot — the defect shape where a seed plumbing
        // regression breaks determinism outright.
        assert_eq!(
            verdict_from_loss_delta(1e-3, tol),
            Gputrain006Verdict::Fail,
            "100× tolerance must Fail (visible seed plumbing regression)",
        );

        // Section 3: trajectory — single-step fail. 99 steps within
        // tolerance plus ONE step above must Fail. Mirrors the real
        // failure mode: a reproducibility regression often shows up at
        // a specific layer depth (e.g. the first LayerNorm backward
        // where cuBLAS warp-reduction order leaked).
        let mut run_a = vec![1.0f32; 100];
        let mut run_b = vec![1.0f32; 100];
        run_b[42] = 1.0 + 1e-3; // delta = 1e-3 > tol
        assert_eq!(
            verdict_from_loss_trajectories(&run_a, &run_b, tol),
            Gputrain006Verdict::Fail,
            "single-step trajectory violation at k=42 must Fail",
        );
        // Restore k=42 to within tolerance — everything else unchanged
        // must now Pass.
        run_b[42] = 1.0 + (tol / 2.0);
        assert_eq!(
            verdict_from_loss_trajectories(&run_a, &run_b, tol),
            Gputrain006Verdict::Pass,
            "all-within-tolerance trajectory must Pass",
        );
        // Sanity: a tiny drift on every step is still Pass as long as
        // each delta is within tolerance.
        for i in 0..run_a.len() {
            run_a[i] = 2.0 + (i as f32) * 1e-3;
            run_b[i] = run_a[i] + (tol / 10.0);
        }
        assert_eq!(
            verdict_from_loss_trajectories(&run_a, &run_b, tol),
            Gputrain006Verdict::Pass,
            "uniform within-tolerance drift across 100 steps must Pass",
        );

        // Section 4: length mismatch. Two runs of different length can't
        // be compared pairwise — conservative Fail (some other bug in
        // the harness cut one run short).
        let short = vec![1.0f32; 50];
        let long = vec![1.0f32; 100];
        assert_eq!(
            verdict_from_loss_trajectories(&short, &long, tol),
            Gputrain006Verdict::Fail,
            "length mismatch (50 vs 100) must Fail",
        );
        assert_eq!(
            verdict_from_loss_trajectories(&long, &short, tol),
            Gputrain006Verdict::Fail,
            "reverse length mismatch must also Fail",
        );

        // Section 5: empty input. A defensive `is_empty()` check
        // prevents a vacuously-true "no steps" from passing the gate.
        let empty: Vec<f32> = vec![];
        let one = vec![1.0f32];
        assert_eq!(
            verdict_from_loss_trajectories(&empty, &empty, tol),
            Gputrain006Verdict::Fail,
            "both-empty trajectories must Fail (no steps compared)",
        );
        assert_eq!(
            verdict_from_loss_trajectories(&empty, &one, tol),
            Gputrain006Verdict::Fail,
            "one-empty one-nonempty must Fail",
        );

        // Section 6: non-finite elements. A NaN or ±∞ anywhere in
        // either run must propagate to Fail. Catches the failure mode
        // where a GradScaler overflow emitted NaN and the harness kept
        // plotting.
        let mut nan_a = vec![1.0f32; 10];
        let nan_b = vec![1.0f32; 10];
        nan_a[3] = f32::NAN;
        assert_eq!(
            verdict_from_loss_trajectories(&nan_a, &nan_b, tol),
            Gputrain006Verdict::Fail,
            "NaN in run_a must Fail",
        );
        let mut inf_b = vec![1.0f32; 10];
        inf_b[7] = f32::INFINITY;
        assert_eq!(
            verdict_from_loss_trajectories(&nan_b, &inf_b, tol),
            Gputrain006Verdict::Fail,
            "+inf in run_b must Fail",
        );
        // Non-finite single-step delta.
        assert_eq!(
            verdict_from_loss_delta(f32::NAN, tol),
            Gputrain006Verdict::Fail,
            "NaN delta must Fail",
        );
        assert_eq!(
            verdict_from_loss_delta(1e-6, f32::INFINITY),
            Gputrain006Verdict::Fail,
            "infinite tolerance must Fail (no rubber-stamp Pass)",
        );
        // Negative tolerance / delta.
        assert_eq!(
            verdict_from_loss_delta(-1e-6, tol),
            Gputrain006Verdict::Fail,
            "negative delta must Fail (caller passed raw a-b, not |a-b|)",
        );
        assert_eq!(
            verdict_from_loss_delta(1e-6, -1e-5),
            Gputrain006Verdict::Fail,
            "negative tolerance must Fail (nonsense threshold)",
        );

        // Section 7: provenance pin — the 1e-5 tolerance is load-
        // bearing and lockstep with the YAML contract rule and peer
        // INV-TRAIN-006 (CPU 1e-6, CUDA 1e-5). Any future tightening
        // (e.g. after trueno#203 lands deterministic kernels) or
        // relaxation must move the constant, the YAML rule, and this
        // test together.
        assert!(
            (AC_GPUTRAIN_006_MAX_SEED_LOSS_DELTA - 1e-5).abs() < 1e-9,
            "INV-GPUTRAIN-006 tolerance is 1e-5 \
             (spec §14.4 / gpu-training-backend-v1 INV-GPUTRAIN-006)",
        );
    }

    /// FALSIFY-GPUTRAIN-006-v2 empirical-bound discharge: prove the
    /// 4-bound ReproducibilityStudyResult verdict shape. The bounds
    /// were measured on RTX 4090 sm_89 with the deterministic-mode
    /// stack engaged (PTX atomicAdd removed, cuBLAS PEDANTIC, APR-MONO
    /// dep migration); evidence file
    /// `evidence/task-132/gputrain-006-empirical-v1.json` holds the
    /// raw 10-run × 100-step study. Any mutation to one of the 4
    /// constants, any flip of the inequality direction, or any leak of
    /// non-finite handling must break this test before a live RTX 4090
    /// reproducibility-runner dispatch.
    #[test]
    fn falsify_gputrain_006_empirical_reproducibility_bounds() {
        // Section 1: at-bound study (every metric exactly at its
        // floor/ceiling). Pass per inclusive comparisons. Mutating any
        // `<=` to strict `<` or any `>=` to strict `>` flips a metric
        // to Fail.
        let at_bound = ReproducibilityStudyResult {
            per_step_drift_max: AC_GPUTRAIN_006_PER_STEP_DRIFT_FLOOR,
            random_walk_epsilon: AC_GPUTRAIN_006_RANDOM_WALK_EPSILON,
            cosine_sim_worst: AC_GPUTRAIN_006_COSINE_SIM_FLOOR,
            final_loss_range: AC_GPUTRAIN_006_FINAL_LOSS_RANGE_FLOOR,
        };
        assert_eq!(
            verdict_from_reproducibility_study(&at_bound),
            Gputrain006Verdict::Pass,
            "every metric exactly at bound must Pass per inclusive ceiling",
        );

        // Section 2: empirical-pass case — observed v1 numbers from the
        // study evidence file. Each metric must be strictly within its
        // bound.
        let v1_observed = ReproducibilityStudyResult {
            per_step_drift_max: 9.2e-4,            // ≤ 1.0e-3
            random_walk_epsilon: 2.74e-4,          // ≤ 3.0e-4
            cosine_sim_worst: 0.999_999_999_7_f32, // ≥ 0.999_999_99
            final_loss_range: 1.341e-3,            // ≤ 2.0e-3
        };
        assert_eq!(
            verdict_from_reproducibility_study(&v1_observed),
            Gputrain006Verdict::Pass,
            "v1 empirical study must Pass — these are the proof points",
        );

        // Section 3: each bound, broken individually. Any mutation that
        // accidentally flips one comparison direction, or weakens one
        // bound, must fail to Pass at least one of these four cases.

        // 3a. Per-step drift overshoot.
        let mut drift_high = v1_observed;
        drift_high.per_step_drift_max = AC_GPUTRAIN_006_PER_STEP_DRIFT_FLOOR + 1e-6;
        assert_eq!(
            verdict_from_reproducibility_study(&drift_high),
            Gputrain006Verdict::Fail,
            "per_step_drift_max above floor must Fail",
        );

        // 3b. Random-walk ε overshoot.
        let mut eps_high = v1_observed;
        eps_high.random_walk_epsilon = AC_GPUTRAIN_006_RANDOM_WALK_EPSILON + 1e-6;
        assert_eq!(
            verdict_from_reproducibility_study(&eps_high),
            Gputrain006Verdict::Fail,
            "random_walk_epsilon above ceiling must Fail",
        );

        // 3c. Cosine similarity below floor. Subtract 1e-6 (well above
        // FP32 ULP at magnitude ~1.0, which is ~1.19e-7) so the
        // arithmetic actually moves the value below the floor.
        let mut cos_low = v1_observed;
        cos_low.cosine_sim_worst = AC_GPUTRAIN_006_COSINE_SIM_FLOOR - 1e-6;
        assert!(
            cos_low.cosine_sim_worst < AC_GPUTRAIN_006_COSINE_SIM_FLOOR,
            "test sanity: cos_low should actually be below floor in FP32"
        );
        assert_eq!(
            verdict_from_reproducibility_study(&cos_low),
            Gputrain006Verdict::Fail,
            "cosine_sim_worst below floor must Fail",
        );

        // 3d. Final loss range overshoot.
        let mut range_high = v1_observed;
        range_high.final_loss_range = AC_GPUTRAIN_006_FINAL_LOSS_RANGE_FLOOR + 1e-6;
        assert_eq!(
            verdict_from_reproducibility_study(&range_high),
            Gputrain006Verdict::Fail,
            "final_loss_range above floor must Fail",
        );

        // Section 4: non-finite metrics — every field independently.
        // A NaN or ±∞ in any of the four fields must short-circuit to
        // Fail before the bound checks run, catching the harness bug
        // where a metric was computed from a degenerate input.
        for (field_name, mutate) in [
            ("per_step_drift_max", 1u32),
            ("random_walk_epsilon", 2u32),
            ("cosine_sim_worst", 3u32),
            ("final_loss_range", 4u32),
        ] {
            for non_finite in [f32::NAN, f32::INFINITY, f32::NEG_INFINITY] {
                let mut s = v1_observed;
                match mutate {
                    1 => s.per_step_drift_max = non_finite,
                    2 => s.random_walk_epsilon = non_finite,
                    3 => s.cosine_sim_worst = non_finite,
                    4 => s.final_loss_range = non_finite,
                    _ => unreachable!(),
                }
                assert_eq!(
                    verdict_from_reproducibility_study(&s),
                    Gputrain006Verdict::Fail,
                    "non-finite ({non_finite}) in {field_name} must Fail",
                );
            }
        }

        // Section 5: negative ranges (caller bug — forgot abs()).
        let mut neg = v1_observed;
        neg.per_step_drift_max = -1e-4;
        assert_eq!(
            verdict_from_reproducibility_study(&neg),
            Gputrain006Verdict::Fail,
            "negative per_step_drift_max must Fail (raw a-b leaked, not |a-b|)",
        );

        // Section 6: cosine similarity range guard. Reproducible traces
        // give ~1.0; any value outside [0, 1+ULP] is a caller bug that
        // must Fail.
        for bad_cos in [-0.5_f32, -1.0_f32, 1.5_f32, 100.0_f32] {
            let mut s = v1_observed;
            s.cosine_sim_worst = bad_cos;
            assert_eq!(
                verdict_from_reproducibility_study(&s),
                Gputrain006Verdict::Fail,
                "cosine_sim_worst out-of-range ({bad_cos}) must Fail",
            );
        }

        // Section 7: cosine similarity at exactly 1.0 (identical traces)
        // must Pass. ULP overshoot above 1.0 (FP32 inner product on
        // identical vectors) must also Pass — the verdict allows up to
        // 1.0001 for that exact reason.
        let identical = ReproducibilityStudyResult {
            per_step_drift_max: 0.0,
            random_walk_epsilon: 0.0,
            cosine_sim_worst: 1.0,
            final_loss_range: 0.0,
        };
        assert_eq!(
            verdict_from_reproducibility_study(&identical),
            Gputrain006Verdict::Pass,
            "perfect identity (cos=1.0, all drift=0) must Pass",
        );
        let identity_ulp =
            ReproducibilityStudyResult { cosine_sim_worst: 1.000_000_1, ..identical };
        assert_eq!(
            verdict_from_reproducibility_study(&identity_ulp),
            Gputrain006Verdict::Pass,
            "FP32 cos_sim ULP overshoot above 1.0 (identity reduction) must Pass",
        );

        // Section 8: provenance pin — the 4 constants are load-bearing
        // and lockstep with the YAML contract rule and the empirical
        // evidence file.  Any future ratchet (tighten after better
        // determinism lands) or relaxation (a hardware regression) must
        // move ALL of: the constant, the YAML rule, and the v2 evidence
        // file together. Triple-pinned to prevent silent drift.
        assert!(
            (AC_GPUTRAIN_006_PER_STEP_DRIFT_FLOOR - 1.0e-3).abs() < 1e-9,
            "AC_GPUTRAIN_006_PER_STEP_DRIFT_FLOOR is 1.0e-3 \
             (provenance: evidence/task-132/gputrain-006-empirical-v1.json)",
        );
        assert!(
            (AC_GPUTRAIN_006_RANDOM_WALK_EPSILON - 3.0e-4).abs() < 1e-9,
            "AC_GPUTRAIN_006_RANDOM_WALK_EPSILON is 3.0e-4",
        );
        assert!(
            (AC_GPUTRAIN_006_COSINE_SIM_FLOOR - 0.999_999_99_f32).abs() < 1e-12,
            "AC_GPUTRAIN_006_COSINE_SIM_FLOOR is 0.999_999_99",
        );
        assert!(
            (AC_GPUTRAIN_006_FINAL_LOSS_RANGE_FLOOR - 2.0e-3).abs() < 1e-9,
            "AC_GPUTRAIN_006_FINAL_LOSS_RANGE_FLOOR is 2.0e-3",
        );
    }
}