Skip to main content

entrenar/train/
gputrain_005.rs

1//! FALSIFY-GPUTRAIN-005 / INV-GPUTRAIN-005 / GATE-GPUTRAIN-004 — algorithm-level
2//! PARTIAL discharge.
3//!
4//! Spec: `docs/specifications/aprender-train/ship-two-models-spec.md` §14
5//! (task #132 CUDA training backend gap).
6//!
7//! Contract: `contracts/entrenar/gpu-training-backend-v1.yaml` v1.0.0 → v1.1.0
8//! adds `GATE-GPUTRAIN-004` PARTIAL-discharge evidence binding the pure
9//! f32-threshold decision rule for the 370M Llama scaffold step-time budget
10//! on the RTX 4090 reference host.
11//!
12//! INV-GPUTRAIN-005 states: "370M step time < 500 ms on RTX 4090 (seq_len=2048,
13//! batch=1, sm_89 pre-compiled)." This file discharges the *decision rule* at
14//! `PARTIAL_ALGORITHM_LEVEL` via a single pure const fn:
15//!
16//!   `verdict_from_step_time_ms(measured_ms, threshold_ms) -> Gputrain005Verdict`
17//!
18//! Pass iff every one of these holds:
19//!
20//!   - `measured_ms.is_finite()` (no NaN / ±∞)
21//!   - `threshold_ms.is_finite()` (no NaN / ±∞)
22//!   - `measured_ms >= 0.0` (physical: step time can't be negative)
23//!   - `threshold_ms > 0.0` (a zero or negative ceiling is nonsense)
24//!   - `measured_ms <= threshold_ms`
25//!
26//! Mirrors SHIP-007 (decode tps) and SHIP-020 (decode tps, 370M target) shape
27//! but with an inequality direction flipped (step time is "lower is better,"
28//! throughput is "higher is better"), exactly as INV-GPUTRAIN-005's rule text
29//! prescribes.
30//!
31//! The compute-heavy portion (actually running 50 training steps on an RTX
32//! 4090 and reading wall times from train.jsonl) is intentionally out of
33//! scope here; the threshold rule is what the live gate calls, and
34//! changing the 500-ms constant or the `<=` operator breaks this test
35//! before any trainer is allocated.
36
37/// Maximum tolerated median step wall time for the 370M Llama scaffold on
38/// the RTX 4090 reference host (seq_len=2048, batch=1, sm_89 pre-compiled).
39///
40/// Derivation: `TransformerTrainer` backward + AdamW step at 370M on sm_89
41/// should land well under this ceiling; 500 ms is the ship floor. A
42/// measured median above 500 ms is recorded as FM-GPUTRAIN-UNDER-BUDGET
43/// (contracts/entrenar/gpu-training-backend-v1.yaml §failure_modes).
44pub const AC_GPUTRAIN_005_MAX_STEP_TIME_MS_RTX4090_370M: f32 = 500.0;
45
46/// Binary verdict for FALSIFY-GPUTRAIN-005 / GATE-GPUTRAIN-004.
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum Gputrain005Verdict {
49    /// Measured median step time is within budget.
50    Pass,
51    /// Over budget, negative, non-finite, or any sanity-rail violation —
52    /// all conservatively Fail.
53    Fail,
54}
55
56/// Algorithm-level verdict rule for INV-GPUTRAIN-005: given a measured
57/// median step wall time (ms) and the published threshold (ms), Pass iff
58/// both are finite, measured is non-negative, threshold is strictly
59/// positive, and measured ≤ threshold.
60///
61/// `const fn` so the boundary at exactly `AC_GPUTRAIN_005_MAX_STEP_TIME_MS_
62/// RTX4090_370M` is const-evaluable; any mutation to the comparison
63/// operator or the finiteness guard is caught at compile+test time.
64#[must_use]
65pub const fn verdict_from_step_time_ms(measured_ms: f32, threshold_ms: f32) -> Gputrain005Verdict {
66    if !measured_ms.is_finite() || !threshold_ms.is_finite() {
67        return Gputrain005Verdict::Fail;
68    }
69    if measured_ms < 0.0 {
70        return Gputrain005Verdict::Fail;
71    }
72    if threshold_ms <= 0.0 {
73        return Gputrain005Verdict::Fail;
74    }
75    if measured_ms <= threshold_ms {
76        Gputrain005Verdict::Pass
77    } else {
78        Gputrain005Verdict::Fail
79    }
80}
81
82// ─────────────────────────────────────────────────────────────
83// Unit tests — FALSIFY-GPUTRAIN-005 algorithm-level proof
84// ─────────────────────────────────────────────────────────────
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89
90    /// FALSIFY-GPUTRAIN-005 algorithm-level PARTIAL discharge: prove the
91    /// 500-ms step-time ceiling rule. Any mutation that swaps `<=` for
92    /// `<`, relaxes finiteness, or silently accepts a negative step time
93    /// must break this test before the live RTX 4090 dispatch runs.
94    #[test]
95    fn falsify_gputrain_005_step_time_threshold_logic() {
96        let threshold = AC_GPUTRAIN_005_MAX_STEP_TIME_MS_RTX4090_370M;
97
98        // Section 1: boundary — exactly at threshold. The contract rule
99        // text uses "<" prose but the falsifier prediction binds to "≤"
100        // (spec §14.4 Phase-3 evidence: "median wall_ms < 500" is
101        // evaluated against an inclusive ceiling per FALSIFY spec). Pass
102        // at exactly 500.0 ms catches any mutation that flipped to
103        // strict `<`.
104        assert_eq!(
105            verdict_from_step_time_ms(500.0, threshold),
106            Gputrain005Verdict::Pass,
107            "measured == threshold (500.0 ms) must Pass per inclusive ceiling",
108        );
109
110        // Section 2: one ULP above threshold. `f32::from_bits(threshold
111        // .to_bits() + 1)` is the next representable f32 — 500.00006
112        // ish. Any mutation that drops the `<=` check (e.g. `< +
113        // tolerance`) flips this to Pass.
114        let one_ulp_above = f32::from_bits(threshold.to_bits() + 1);
115        assert!(one_ulp_above > threshold);
116        assert_eq!(
117            verdict_from_step_time_ms(one_ulp_above, threshold),
118            Gputrain005Verdict::Fail,
119            "one ULP above threshold (~500.00006 ms) must Fail",
120        );
121
122        // Section 3: clear Pass band. Values safely under budget —
123        // these are the target perf regime for a correctly-wired CUDA
124        // training dispatch.
125        for &measured in &[0.0f32, 1.0, 100.0, 250.0, 400.0, 499.999] {
126            assert_eq!(
127                verdict_from_step_time_ms(measured, threshold),
128                Gputrain005Verdict::Pass,
129                "measured={measured} ms (well under {threshold} ms) must Pass",
130            );
131        }
132
133        // Section 4: clear Fail band. Values safely over budget — a
134        // silent CPU fallback or missed-fusion regression will land
135        // here (CPU at 370M is 30–60 s/step).
136        for &measured in &[501.0f32, 600.0, 1_000.0, 10_000.0, 60_000.0, f32::MAX] {
137            assert_eq!(
138                verdict_from_step_time_ms(measured, threshold),
139                Gputrain005Verdict::Fail,
140                "measured={measured} ms (above {threshold} ms) must Fail",
141            );
142        }
143
144        // Section 5: non-finite sanity rails. NaN and ±∞ on either
145        // side must Fail conservatively — a JSON-parse bug that
146        // emitted NaN must not silently Pass a ship gate.
147        for &bad in &[f32::NAN, f32::INFINITY, f32::NEG_INFINITY] {
148            assert_eq!(
149                verdict_from_step_time_ms(bad, threshold),
150                Gputrain005Verdict::Fail,
151                "non-finite measured ({bad:?}) must Fail",
152            );
153            assert_eq!(
154                verdict_from_step_time_ms(100.0, bad),
155                Gputrain005Verdict::Fail,
156                "non-finite threshold ({bad:?}) must Fail",
157            );
158        }
159
160        // Section 6: negative / non-positive rails. Step time can't be
161        // negative (physical), threshold can't be ≤ 0 (sanity).
162        assert_eq!(
163            verdict_from_step_time_ms(-1.0, threshold),
164            Gputrain005Verdict::Fail,
165            "negative measured step time must Fail (non-physical)",
166        );
167        assert_eq!(
168            verdict_from_step_time_ms(-0.0001, threshold),
169            Gputrain005Verdict::Fail,
170            "tiny-negative measured step time must Fail",
171        );
172        assert_eq!(
173            verdict_from_step_time_ms(100.0, 0.0),
174            Gputrain005Verdict::Fail,
175            "zero threshold must Fail (ceiling of 0 is nonsense)",
176        );
177        assert_eq!(
178            verdict_from_step_time_ms(100.0, -1.0),
179            Gputrain005Verdict::Fail,
180            "negative threshold must Fail",
181        );
182
183        // Section 7: provenance pin — the 500-ms constant is load-
184        // bearing and lockstep with the YAML contract rule text and
185        // §14.4 Phase-3 evidence target. If the spec ever loosens the
186        // budget (e.g. to 800 ms on a smaller GPU) or tightens it
187        // (e.g. to 200 ms on B200), this constant, the YAML rule, and
188        // this test must move together.
189        assert!(
190            (AC_GPUTRAIN_005_MAX_STEP_TIME_MS_RTX4090_370M - 500.0).abs() < 1e-6,
191            "INV-GPUTRAIN-005 step-time ceiling is 500.0 ms \
192             (spec §14.4 / gpu-training-backend-v1 INV-GPUTRAIN-005)",
193        );
194    }
195}