pub const AC_GPUTRAIN_006_MAX_SEED_LOSS_DELTA: f32 = 1e-5;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Gputrain006Verdict {
Pass,
Fail,
}
#[must_use]
pub const fn verdict_from_loss_delta(delta_abs: f32, tolerance: f32) -> Gputrain006Verdict {
if !delta_abs.is_finite() || !tolerance.is_finite() {
return Gputrain006Verdict::Fail;
}
if delta_abs < 0.0 || tolerance < 0.0 {
return Gputrain006Verdict::Fail;
}
if delta_abs <= tolerance {
Gputrain006Verdict::Pass
} else {
Gputrain006Verdict::Fail
}
}
#[must_use]
pub fn verdict_from_loss_trajectories(
run_a: &[f32],
run_b: &[f32],
tolerance: f32,
) -> Gputrain006Verdict {
if run_a.is_empty() || run_b.is_empty() || run_a.len() != run_b.len() {
return Gputrain006Verdict::Fail;
}
if !tolerance.is_finite() || tolerance < 0.0 {
return Gputrain006Verdict::Fail;
}
for (a, b) in run_a.iter().zip(run_b.iter()) {
if !a.is_finite() || !b.is_finite() {
return Gputrain006Verdict::Fail;
}
let delta = (a - b).abs();
if delta > tolerance {
return Gputrain006Verdict::Fail;
}
}
Gputrain006Verdict::Pass
}
pub const AC_GPUTRAIN_006_PER_STEP_DRIFT_FLOOR: f32 = 1.0e-3;
pub const AC_GPUTRAIN_006_RANDOM_WALK_EPSILON: f32 = 3.0e-4;
pub const AC_GPUTRAIN_006_COSINE_SIM_FLOOR: f32 = 0.999_999_99;
pub const AC_GPUTRAIN_006_FINAL_LOSS_RANGE_FLOOR: f32 = 2.0e-3;
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct ReproducibilityStudyResult {
pub per_step_drift_max: f32,
pub random_walk_epsilon: f32,
pub cosine_sim_worst: f32,
pub final_loss_range: f32,
}
#[must_use]
pub fn verdict_from_reproducibility_study(
study: &ReproducibilityStudyResult,
) -> Gputrain006Verdict {
if !study.per_step_drift_max.is_finite()
|| !study.random_walk_epsilon.is_finite()
|| !study.cosine_sim_worst.is_finite()
|| !study.final_loss_range.is_finite()
{
return Gputrain006Verdict::Fail;
}
if study.per_step_drift_max < 0.0
|| study.random_walk_epsilon < 0.0
|| study.final_loss_range < 0.0
{
return Gputrain006Verdict::Fail;
}
if !(0.0..=1.000_1).contains(&study.cosine_sim_worst) {
return Gputrain006Verdict::Fail;
}
if study.per_step_drift_max > AC_GPUTRAIN_006_PER_STEP_DRIFT_FLOOR {
return Gputrain006Verdict::Fail;
}
if study.random_walk_epsilon > AC_GPUTRAIN_006_RANDOM_WALK_EPSILON {
return Gputrain006Verdict::Fail;
}
if study.cosine_sim_worst < AC_GPUTRAIN_006_COSINE_SIM_FLOOR {
return Gputrain006Verdict::Fail;
}
if study.final_loss_range > AC_GPUTRAIN_006_FINAL_LOSS_RANGE_FLOOR {
return Gputrain006Verdict::Fail;
}
Gputrain006Verdict::Pass
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn falsify_gputrain_006_seed_reproducibility_threshold_logic() {
let tol = AC_GPUTRAIN_006_MAX_SEED_LOSS_DELTA;
assert_eq!(
verdict_from_loss_delta(tol, tol),
Gputrain006Verdict::Pass,
"delta == tolerance (1e-5) must Pass per inclusive ceiling",
);
let one_ulp_above = f32::from_bits(tol.to_bits() + 1);
assert!(one_ulp_above > tol);
assert_eq!(
verdict_from_loss_delta(one_ulp_above, tol),
Gputrain006Verdict::Fail,
"one ULP above tolerance must Fail",
);
assert_eq!(
verdict_from_loss_delta(1e-3, tol),
Gputrain006Verdict::Fail,
"100× tolerance must Fail (visible seed plumbing regression)",
);
let mut run_a = vec![1.0f32; 100];
let mut run_b = vec![1.0f32; 100];
run_b[42] = 1.0 + 1e-3; assert_eq!(
verdict_from_loss_trajectories(&run_a, &run_b, tol),
Gputrain006Verdict::Fail,
"single-step trajectory violation at k=42 must Fail",
);
run_b[42] = 1.0 + (tol / 2.0);
assert_eq!(
verdict_from_loss_trajectories(&run_a, &run_b, tol),
Gputrain006Verdict::Pass,
"all-within-tolerance trajectory must Pass",
);
for i in 0..run_a.len() {
run_a[i] = 2.0 + (i as f32) * 1e-3;
run_b[i] = run_a[i] + (tol / 10.0);
}
assert_eq!(
verdict_from_loss_trajectories(&run_a, &run_b, tol),
Gputrain006Verdict::Pass,
"uniform within-tolerance drift across 100 steps must Pass",
);
let short = vec![1.0f32; 50];
let long = vec![1.0f32; 100];
assert_eq!(
verdict_from_loss_trajectories(&short, &long, tol),
Gputrain006Verdict::Fail,
"length mismatch (50 vs 100) must Fail",
);
assert_eq!(
verdict_from_loss_trajectories(&long, &short, tol),
Gputrain006Verdict::Fail,
"reverse length mismatch must also Fail",
);
let empty: Vec<f32> = vec![];
let one = vec![1.0f32];
assert_eq!(
verdict_from_loss_trajectories(&empty, &empty, tol),
Gputrain006Verdict::Fail,
"both-empty trajectories must Fail (no steps compared)",
);
assert_eq!(
verdict_from_loss_trajectories(&empty, &one, tol),
Gputrain006Verdict::Fail,
"one-empty one-nonempty must Fail",
);
let mut nan_a = vec![1.0f32; 10];
let nan_b = vec![1.0f32; 10];
nan_a[3] = f32::NAN;
assert_eq!(
verdict_from_loss_trajectories(&nan_a, &nan_b, tol),
Gputrain006Verdict::Fail,
"NaN in run_a must Fail",
);
let mut inf_b = vec![1.0f32; 10];
inf_b[7] = f32::INFINITY;
assert_eq!(
verdict_from_loss_trajectories(&nan_b, &inf_b, tol),
Gputrain006Verdict::Fail,
"+inf in run_b must Fail",
);
assert_eq!(
verdict_from_loss_delta(f32::NAN, tol),
Gputrain006Verdict::Fail,
"NaN delta must Fail",
);
assert_eq!(
verdict_from_loss_delta(1e-6, f32::INFINITY),
Gputrain006Verdict::Fail,
"infinite tolerance must Fail (no rubber-stamp Pass)",
);
assert_eq!(
verdict_from_loss_delta(-1e-6, tol),
Gputrain006Verdict::Fail,
"negative delta must Fail (caller passed raw a-b, not |a-b|)",
);
assert_eq!(
verdict_from_loss_delta(1e-6, -1e-5),
Gputrain006Verdict::Fail,
"negative tolerance must Fail (nonsense threshold)",
);
assert!(
(AC_GPUTRAIN_006_MAX_SEED_LOSS_DELTA - 1e-5).abs() < 1e-9,
"INV-GPUTRAIN-006 tolerance is 1e-5 \
(spec §14.4 / gpu-training-backend-v1 INV-GPUTRAIN-006)",
);
}
#[test]
fn falsify_gputrain_006_empirical_reproducibility_bounds() {
let at_bound = ReproducibilityStudyResult {
per_step_drift_max: AC_GPUTRAIN_006_PER_STEP_DRIFT_FLOOR,
random_walk_epsilon: AC_GPUTRAIN_006_RANDOM_WALK_EPSILON,
cosine_sim_worst: AC_GPUTRAIN_006_COSINE_SIM_FLOOR,
final_loss_range: AC_GPUTRAIN_006_FINAL_LOSS_RANGE_FLOOR,
};
assert_eq!(
verdict_from_reproducibility_study(&at_bound),
Gputrain006Verdict::Pass,
"every metric exactly at bound must Pass per inclusive ceiling",
);
let v1_observed = ReproducibilityStudyResult {
per_step_drift_max: 9.2e-4, random_walk_epsilon: 2.74e-4, cosine_sim_worst: 0.999_999_999_7_f32, final_loss_range: 1.341e-3, };
assert_eq!(
verdict_from_reproducibility_study(&v1_observed),
Gputrain006Verdict::Pass,
"v1 empirical study must Pass — these are the proof points",
);
let mut drift_high = v1_observed;
drift_high.per_step_drift_max = AC_GPUTRAIN_006_PER_STEP_DRIFT_FLOOR + 1e-6;
assert_eq!(
verdict_from_reproducibility_study(&drift_high),
Gputrain006Verdict::Fail,
"per_step_drift_max above floor must Fail",
);
let mut eps_high = v1_observed;
eps_high.random_walk_epsilon = AC_GPUTRAIN_006_RANDOM_WALK_EPSILON + 1e-6;
assert_eq!(
verdict_from_reproducibility_study(&eps_high),
Gputrain006Verdict::Fail,
"random_walk_epsilon above ceiling must Fail",
);
let mut cos_low = v1_observed;
cos_low.cosine_sim_worst = AC_GPUTRAIN_006_COSINE_SIM_FLOOR - 1e-6;
assert!(
cos_low.cosine_sim_worst < AC_GPUTRAIN_006_COSINE_SIM_FLOOR,
"test sanity: cos_low should actually be below floor in FP32"
);
assert_eq!(
verdict_from_reproducibility_study(&cos_low),
Gputrain006Verdict::Fail,
"cosine_sim_worst below floor must Fail",
);
let mut range_high = v1_observed;
range_high.final_loss_range = AC_GPUTRAIN_006_FINAL_LOSS_RANGE_FLOOR + 1e-6;
assert_eq!(
verdict_from_reproducibility_study(&range_high),
Gputrain006Verdict::Fail,
"final_loss_range above floor must Fail",
);
for (field_name, mutate) in [
("per_step_drift_max", 1u32),
("random_walk_epsilon", 2u32),
("cosine_sim_worst", 3u32),
("final_loss_range", 4u32),
] {
for non_finite in [f32::NAN, f32::INFINITY, f32::NEG_INFINITY] {
let mut s = v1_observed;
match mutate {
1 => s.per_step_drift_max = non_finite,
2 => s.random_walk_epsilon = non_finite,
3 => s.cosine_sim_worst = non_finite,
4 => s.final_loss_range = non_finite,
_ => unreachable!(),
}
assert_eq!(
verdict_from_reproducibility_study(&s),
Gputrain006Verdict::Fail,
"non-finite ({non_finite}) in {field_name} must Fail",
);
}
}
let mut neg = v1_observed;
neg.per_step_drift_max = -1e-4;
assert_eq!(
verdict_from_reproducibility_study(&neg),
Gputrain006Verdict::Fail,
"negative per_step_drift_max must Fail (raw a-b leaked, not |a-b|)",
);
for bad_cos in [-0.5_f32, -1.0_f32, 1.5_f32, 100.0_f32] {
let mut s = v1_observed;
s.cosine_sim_worst = bad_cos;
assert_eq!(
verdict_from_reproducibility_study(&s),
Gputrain006Verdict::Fail,
"cosine_sim_worst out-of-range ({bad_cos}) must Fail",
);
}
let identical = ReproducibilityStudyResult {
per_step_drift_max: 0.0,
random_walk_epsilon: 0.0,
cosine_sim_worst: 1.0,
final_loss_range: 0.0,
};
assert_eq!(
verdict_from_reproducibility_study(&identical),
Gputrain006Verdict::Pass,
"perfect identity (cos=1.0, all drift=0) must Pass",
);
let identity_ulp =
ReproducibilityStudyResult { cosine_sim_worst: 1.000_000_1, ..identical };
assert_eq!(
verdict_from_reproducibility_study(&identity_ulp),
Gputrain006Verdict::Pass,
"FP32 cos_sim ULP overshoot above 1.0 (identity reduction) must Pass",
);
assert!(
(AC_GPUTRAIN_006_PER_STEP_DRIFT_FLOOR - 1.0e-3).abs() < 1e-9,
"AC_GPUTRAIN_006_PER_STEP_DRIFT_FLOOR is 1.0e-3 \
(provenance: evidence/task-132/gputrain-006-empirical-v1.json)",
);
assert!(
(AC_GPUTRAIN_006_RANDOM_WALK_EPSILON - 3.0e-4).abs() < 1e-9,
"AC_GPUTRAIN_006_RANDOM_WALK_EPSILON is 3.0e-4",
);
assert!(
(AC_GPUTRAIN_006_COSINE_SIM_FLOOR - 0.999_999_99_f32).abs() < 1e-12,
"AC_GPUTRAIN_006_COSINE_SIM_FLOOR is 0.999_999_99",
);
assert!(
(AC_GPUTRAIN_006_FINAL_LOSS_RANGE_FLOOR - 2.0e-3).abs() < 1e-9,
"AC_GPUTRAIN_006_FINAL_LOSS_RANGE_FLOOR is 2.0e-3",
);
}
}