entrenar/train/gputrain_005.rs
1//! FALSIFY-GPUTRAIN-005 / INV-GPUTRAIN-005 / GATE-GPUTRAIN-004 — algorithm-level
2//! PARTIAL discharge.
3//!
4//! Spec: `docs/specifications/aprender-train/ship-two-models-spec.md` §14
5//! (task #132 CUDA training backend gap).
6//!
7//! Contract: `contracts/entrenar/gpu-training-backend-v1.yaml` v1.0.0 → v1.1.0
8//! adds `GATE-GPUTRAIN-004` PARTIAL-discharge evidence binding the pure
9//! f32-threshold decision rule for the 370M Llama scaffold step-time budget
10//! on the RTX 4090 reference host.
11//!
12//! INV-GPUTRAIN-005 states: "370M step time < 500 ms on RTX 4090 (seq_len=2048,
13//! batch=1, sm_89 pre-compiled)." This file discharges the *decision rule* at
14//! `PARTIAL_ALGORITHM_LEVEL` via a single pure const fn:
15//!
16//! `verdict_from_step_time_ms(measured_ms, threshold_ms) -> Gputrain005Verdict`
17//!
18//! Pass iff every one of these holds:
19//!
20//! - `measured_ms.is_finite()` (no NaN / ±∞)
21//! - `threshold_ms.is_finite()` (no NaN / ±∞)
22//! - `measured_ms >= 0.0` (physical: step time can't be negative)
23//! - `threshold_ms > 0.0` (a zero or negative ceiling is nonsense)
24//! - `measured_ms <= threshold_ms`
25//!
26//! Mirrors SHIP-007 (decode tps) and SHIP-020 (decode tps, 370M target) shape
27//! but with an inequality direction flipped (step time is "lower is better,"
28//! throughput is "higher is better"), exactly as INV-GPUTRAIN-005's rule text
29//! prescribes.
30//!
31//! The compute-heavy portion (actually running 50 training steps on an RTX
32//! 4090 and reading wall times from train.jsonl) is intentionally out of
33//! scope here; the threshold rule is what the live gate calls, and
34//! changing the 500-ms constant or the `<=` operator breaks this test
35//! before any trainer is allocated.
36
37/// Maximum tolerated median step wall time for the 370M Llama scaffold on
38/// the RTX 4090 reference host (seq_len=2048, batch=1, sm_89 pre-compiled).
39///
40/// Derivation: `TransformerTrainer` backward + AdamW step at 370M on sm_89
41/// should land well under this ceiling; 500 ms is the ship floor. A
42/// measured median above 500 ms is recorded as FM-GPUTRAIN-UNDER-BUDGET
43/// (contracts/entrenar/gpu-training-backend-v1.yaml §failure_modes).
44pub const AC_GPUTRAIN_005_MAX_STEP_TIME_MS_RTX4090_370M: f32 = 500.0;
45
46/// Binary verdict for FALSIFY-GPUTRAIN-005 / GATE-GPUTRAIN-004.
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum Gputrain005Verdict {
49 /// Measured median step time is within budget.
50 Pass,
51 /// Over budget, negative, non-finite, or any sanity-rail violation —
52 /// all conservatively Fail.
53 Fail,
54}
55
56/// Algorithm-level verdict rule for INV-GPUTRAIN-005: given a measured
57/// median step wall time (ms) and the published threshold (ms), Pass iff
58/// both are finite, measured is non-negative, threshold is strictly
59/// positive, and measured ≤ threshold.
60///
61/// `const fn` so the boundary at exactly `AC_GPUTRAIN_005_MAX_STEP_TIME_MS_
62/// RTX4090_370M` is const-evaluable; any mutation to the comparison
63/// operator or the finiteness guard is caught at compile+test time.
64#[must_use]
65pub const fn verdict_from_step_time_ms(measured_ms: f32, threshold_ms: f32) -> Gputrain005Verdict {
66 if !measured_ms.is_finite() || !threshold_ms.is_finite() {
67 return Gputrain005Verdict::Fail;
68 }
69 if measured_ms < 0.0 {
70 return Gputrain005Verdict::Fail;
71 }
72 if threshold_ms <= 0.0 {
73 return Gputrain005Verdict::Fail;
74 }
75 if measured_ms <= threshold_ms {
76 Gputrain005Verdict::Pass
77 } else {
78 Gputrain005Verdict::Fail
79 }
80}
81
82// ─────────────────────────────────────────────────────────────
83// Unit tests — FALSIFY-GPUTRAIN-005 algorithm-level proof
84// ─────────────────────────────────────────────────────────────
85
86#[cfg(test)]
87mod tests {
88 use super::*;
89
90 /// FALSIFY-GPUTRAIN-005 algorithm-level PARTIAL discharge: prove the
91 /// 500-ms step-time ceiling rule. Any mutation that swaps `<=` for
92 /// `<`, relaxes finiteness, or silently accepts a negative step time
93 /// must break this test before the live RTX 4090 dispatch runs.
94 #[test]
95 fn falsify_gputrain_005_step_time_threshold_logic() {
96 let threshold = AC_GPUTRAIN_005_MAX_STEP_TIME_MS_RTX4090_370M;
97
98 // Section 1: boundary — exactly at threshold. The contract rule
99 // text uses "<" prose but the falsifier prediction binds to "≤"
100 // (spec §14.4 Phase-3 evidence: "median wall_ms < 500" is
101 // evaluated against an inclusive ceiling per FALSIFY spec). Pass
102 // at exactly 500.0 ms catches any mutation that flipped to
103 // strict `<`.
104 assert_eq!(
105 verdict_from_step_time_ms(500.0, threshold),
106 Gputrain005Verdict::Pass,
107 "measured == threshold (500.0 ms) must Pass per inclusive ceiling",
108 );
109
110 // Section 2: one ULP above threshold. `f32::from_bits(threshold
111 // .to_bits() + 1)` is the next representable f32 — 500.00006
112 // ish. Any mutation that drops the `<=` check (e.g. `< +
113 // tolerance`) flips this to Pass.
114 let one_ulp_above = f32::from_bits(threshold.to_bits() + 1);
115 assert!(one_ulp_above > threshold);
116 assert_eq!(
117 verdict_from_step_time_ms(one_ulp_above, threshold),
118 Gputrain005Verdict::Fail,
119 "one ULP above threshold (~500.00006 ms) must Fail",
120 );
121
122 // Section 3: clear Pass band. Values safely under budget —
123 // these are the target perf regime for a correctly-wired CUDA
124 // training dispatch.
125 for &measured in &[0.0f32, 1.0, 100.0, 250.0, 400.0, 499.999] {
126 assert_eq!(
127 verdict_from_step_time_ms(measured, threshold),
128 Gputrain005Verdict::Pass,
129 "measured={measured} ms (well under {threshold} ms) must Pass",
130 );
131 }
132
133 // Section 4: clear Fail band. Values safely over budget — a
134 // silent CPU fallback or missed-fusion regression will land
135 // here (CPU at 370M is 30–60 s/step).
136 for &measured in &[501.0f32, 600.0, 1_000.0, 10_000.0, 60_000.0, f32::MAX] {
137 assert_eq!(
138 verdict_from_step_time_ms(measured, threshold),
139 Gputrain005Verdict::Fail,
140 "measured={measured} ms (above {threshold} ms) must Fail",
141 );
142 }
143
144 // Section 5: non-finite sanity rails. NaN and ±∞ on either
145 // side must Fail conservatively — a JSON-parse bug that
146 // emitted NaN must not silently Pass a ship gate.
147 for &bad in &[f32::NAN, f32::INFINITY, f32::NEG_INFINITY] {
148 assert_eq!(
149 verdict_from_step_time_ms(bad, threshold),
150 Gputrain005Verdict::Fail,
151 "non-finite measured ({bad:?}) must Fail",
152 );
153 assert_eq!(
154 verdict_from_step_time_ms(100.0, bad),
155 Gputrain005Verdict::Fail,
156 "non-finite threshold ({bad:?}) must Fail",
157 );
158 }
159
160 // Section 6: negative / non-positive rails. Step time can't be
161 // negative (physical), threshold can't be ≤ 0 (sanity).
162 assert_eq!(
163 verdict_from_step_time_ms(-1.0, threshold),
164 Gputrain005Verdict::Fail,
165 "negative measured step time must Fail (non-physical)",
166 );
167 assert_eq!(
168 verdict_from_step_time_ms(-0.0001, threshold),
169 Gputrain005Verdict::Fail,
170 "tiny-negative measured step time must Fail",
171 );
172 assert_eq!(
173 verdict_from_step_time_ms(100.0, 0.0),
174 Gputrain005Verdict::Fail,
175 "zero threshold must Fail (ceiling of 0 is nonsense)",
176 );
177 assert_eq!(
178 verdict_from_step_time_ms(100.0, -1.0),
179 Gputrain005Verdict::Fail,
180 "negative threshold must Fail",
181 );
182
183 // Section 7: provenance pin — the 500-ms constant is load-
184 // bearing and lockstep with the YAML contract rule text and
185 // §14.4 Phase-3 evidence target. If the spec ever loosens the
186 // budget (e.g. to 800 ms on a smaller GPU) or tightens it
187 // (e.g. to 200 ms on B200), this constant, the YAML rule, and
188 // this test must move together.
189 assert!(
190 (AC_GPUTRAIN_005_MAX_STEP_TIME_MS_RTX4090_370M - 500.0).abs() < 1e-6,
191 "INV-GPUTRAIN-005 step-time ceiling is 500.0 ms \
192 (spec §14.4 / gpu-training-backend-v1 INV-GPUTRAIN-005)",
193 );
194 }
195}