Skip to main content

entrenar/train/
gputrain_004.rs

1//! FALSIFY-GPUTRAIN-004 / INV-GPUTRAIN-004 / GATE-GPUTRAIN-005 — algorithm-level
2//! PARTIAL discharge.
3//!
4//! Spec: `docs/specifications/aprender-train/ship-two-models-spec.md` §14
5//! (task #132 CUDA training backend gap).
6//!
7//! Contract: `contracts/entrenar/gpu-training-backend-v1.yaml` v1.0.0 → v1.1.0
8//! adds `GATE-GPUTRAIN-005` PARTIAL-discharge evidence binding the pure
9//! dispatch invariant: choosing `Device::Cpu` never invokes CUDA code, and
10//! choosing `Device::Cuda(_)` never invokes CPU-only code.
11//!
12//! INV-GPUTRAIN-004 states: "CPU fallback path remains fully functional (peer
13//! GATE-TRAIN-001..010 still PASS)." The symmetric dual is also in scope:
14//! the silent-CPU-fallback-when-CUDA-was-requested class (task #126's defect)
15//! must be a ship blocker. Together they bound the `requested → dispatched`
16//! relation:
17//!
18//!   - `Device::Cpu` requested ⇒ `Device::Cpu` dispatched. Full stop.
19//!   - `Device::Cuda(_)` requested ⇒ `Device::Cuda(_)` dispatched.
20//!   - Any cross-assignment is a silent-promotion / silent-fallback bug.
21//!   - Any unknown label is conservatively Fail (no implicit defaults).
22//!
23//! The compute-heavy portion (actually running a training step through either
24//! `TransformerTrainer` or `CudaTransformerTrainer`) is intentionally out of
25//! scope here; the decision rule is what the live dispatch wrapper calls
26//! *before* instantiating any trainer, and changing either dispatch set or
27//! the equality rule breaks this test before any trainer is allocated.
28
29/// Set of device-tag labels that count as "the CPU training path."
30/// Pinned as a slice of byte-literals so a mutation that merged the CPU
31/// and CUDA sets (e.g. a typo in device dispatch switch) is caught
32/// immediately by the provenance pin in the mutation survey.
33pub const AC_GPUTRAIN_004_CPU_DISPATCH_VARIANTS: &[&str] = &["Cpu"];
34
35/// Set of device-tag labels that count as "the CUDA training path."
36/// Mirrors the `Device::Cuda` enum variant name. If the enum gets a
37/// third variant (e.g. `Rocm`, `Metal`), the survey's provenance pin
38/// forces a contract amendment in lockstep.
39pub const AC_GPUTRAIN_004_CUDA_DISPATCH_VARIANTS: &[&str] = &["Cuda"];
40
41/// Binary verdict for FALSIFY-GPUTRAIN-004 / GATE-GPUTRAIN-005.
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum Gputrain004Verdict {
44    /// The requested device matches the dispatched device's equivalence
45    /// class (both CPU or both CUDA).
46    Pass,
47    /// Requested vs dispatched are in different classes, or either is
48    /// unknown / empty — both are ship-blocking bugs.
49    Fail,
50}
51
52/// Algorithm-level verdict rule for INV-GPUTRAIN-004: given the operator-
53/// requested device class and the class the dispatch wrapper actually
54/// selected, Pass iff both labels live in the same set
55/// (CPU ↔ CPU, CUDA ↔ CUDA). Every other combination — including any
56/// unknown label on either side — is a conservative Fail because silent
57/// promotion (cpu→cuda) and silent fallback (cuda→cpu) are both recorded
58/// defect classes (task #126 is the silent-fallback proof-of-existence).
59#[must_use]
60pub fn verdict_from_dispatch_label(requested: &str, dispatched: &str) -> Gputrain004Verdict {
61    let req_cpu = AC_GPUTRAIN_004_CPU_DISPATCH_VARIANTS.contains(&requested);
62    let req_cuda = AC_GPUTRAIN_004_CUDA_DISPATCH_VARIANTS.contains(&requested);
63    let dis_cpu = AC_GPUTRAIN_004_CPU_DISPATCH_VARIANTS.contains(&dispatched);
64    let dis_cuda = AC_GPUTRAIN_004_CUDA_DISPATCH_VARIANTS.contains(&dispatched);
65
66    match (req_cpu, req_cuda, dis_cpu, dis_cuda) {
67        (true, false, true, false) => Gputrain004Verdict::Pass, // cpu → cpu
68        (false, true, false, true) => Gputrain004Verdict::Pass, // cuda → cuda
69        _ => Gputrain004Verdict::Fail,
70    }
71}
72
73// ─────────────────────────────────────────────────────────────
74// Unit tests — FALSIFY-GPUTRAIN-004 algorithm-level proof
75// ─────────────────────────────────────────────────────────────
76
77#[cfg(test)]
78mod tests {
79    use super::*;
80
81    /// FALSIFY-GPUTRAIN-004 algorithm-level PARTIAL discharge: prove the
82    /// CPU-fallback-preserved + no-silent-promotion dispatch invariant.
83    /// Any mutation that swaps the equality rule, quietly accepts a
84    /// cross-class dispatch, or defaults unknown labels to Pass must
85    /// break this test before the live trainer is instantiated.
86    #[test]
87    fn falsify_gputrain_004_cpu_fallback_preserved_logic() {
88        // Section 1: cpu → cpu — baseline Pass. The CPU-only host path
89        // (INV-GPUTRAIN-004's primary rule) must round-trip.
90        assert_eq!(
91            verdict_from_dispatch_label("Cpu", "Cpu"),
92            Gputrain004Verdict::Pass,
93            "cpu → cpu must Pass (primary INV-GPUTRAIN-004 case)",
94        );
95
96        // Section 2: cuda → cuda — baseline Pass on the CUDA side. Any
97        // mutation that flipped the Pass case to Fail here would break
98        // every GPU training dispatch, so catching it is free.
99        assert_eq!(
100            verdict_from_dispatch_label("Cuda", "Cuda"),
101            Gputrain004Verdict::Pass,
102            "cuda → cuda must Pass (symmetric CUDA path preservation)",
103        );
104
105        // Section 3: cpu → cuda — silent GPU promotion bug. Catches a
106        // refactor that accidentally upgrades a `--device cpu` request
107        // to GPU because a helper preferred CUDA when it was "available."
108        // Must Fail — operators who asked for CPU parity runs MUST get
109        // CPU dispatch (reproducibility, debug isolation).
110        assert_eq!(
111            verdict_from_dispatch_label("Cpu", "Cuda"),
112            Gputrain004Verdict::Fail,
113            "cpu → cuda must Fail (silent GPU promotion bug)",
114        );
115
116        // Section 4: cuda → cpu — silent CPU fallback bug. THIS IS THE
117        // EXACT DEFECT TASK #126 HIT. 14 minutes of compute at 0 MiB GPU
118        // memory because the CLI accepted `--device cuda` and ran CPU.
119        // Must Fail — that's the whole point of INV-GPUTRAIN-002.
120        assert_eq!(
121            verdict_from_dispatch_label("Cuda", "Cpu"),
122            Gputrain004Verdict::Fail,
123            "cuda → cpu must Fail (this is the task #126 silent-CPU-fallback bug)",
124        );
125
126        // Section 5: unknown label on either side. Catches a refactor
127        // that added a third device (e.g. "Metal", "Rocm", "Xpu")
128        // without an explicit contract amendment. Conservative Fail
129        // because default-behaviour on unknown labels is what made
130        // task #126 possible in the first place.
131        assert_eq!(
132            verdict_from_dispatch_label("xpu", "Cpu"),
133            Gputrain004Verdict::Fail,
134            "unknown requested label must Fail (no implicit default)",
135        );
136        assert_eq!(
137            verdict_from_dispatch_label("Cpu", "metal"),
138            Gputrain004Verdict::Fail,
139            "unknown dispatched label must Fail (no implicit default)",
140        );
141        assert_eq!(
142            verdict_from_dispatch_label("rocm", "rocm"),
143            Gputrain004Verdict::Fail,
144            "unknown label on both sides must Fail even if matched \
145             (only the two recorded sets are allowed)",
146        );
147
148        // Section 6: empty string on either side. A stringly-typed
149        // dispatch wrapper is easy to break on uninitialized default
150        // values; an empty label must never Pass.
151        assert_eq!(
152            verdict_from_dispatch_label("", "Cpu"),
153            Gputrain004Verdict::Fail,
154            "empty requested label must Fail",
155        );
156        assert_eq!(
157            verdict_from_dispatch_label("Cpu", ""),
158            Gputrain004Verdict::Fail,
159            "empty dispatched label must Fail",
160        );
161        assert_eq!(
162            verdict_from_dispatch_label("", ""),
163            Gputrain004Verdict::Fail,
164            "both-empty must Fail (degenerate input, no implicit default)",
165        );
166
167        // Section 7: case-sensitivity and whitespace. A mutation that
168        // relaxed to case-insensitive compare (e.g. `.eq_ignore_ascii_
169        // case()`) would silently accept "CPU" vs "Cpu" which are NOT
170        // in the pinned slice. Fail-closed is required.
171        assert_eq!(
172            verdict_from_dispatch_label("CPU", "Cpu"),
173            Gputrain004Verdict::Fail,
174            "uppercase `CPU` is not in the pinned slice — must Fail",
175        );
176        assert_eq!(
177            verdict_from_dispatch_label("cpu", "Cpu"),
178            Gputrain004Verdict::Fail,
179            "lowercase `cpu` is not in the pinned slice — must Fail \
180             (slice contents are the authoritative grammar)",
181        );
182        assert_eq!(
183            verdict_from_dispatch_label("Cpu ", "Cpu"),
184            Gputrain004Verdict::Fail,
185            "trailing whitespace must Fail (no implicit trim)",
186        );
187
188        // Provenance pin on slice contents. Load-bearing: if a new
189        // Device variant is added (e.g. Rocm, Metal), both the enum
190        // side and this slice must move together. The byte-literal
191        // shape prevents accidental merger into a single set.
192        assert_eq!(
193            AC_GPUTRAIN_004_CPU_DISPATCH_VARIANTS,
194            &["Cpu"],
195            "CPU dispatch set must be exactly [\"Cpu\"] \
196             (spec §14.4 / gpu-training-backend-v1 INV-GPUTRAIN-004)",
197        );
198        assert_eq!(
199            AC_GPUTRAIN_004_CUDA_DISPATCH_VARIANTS,
200            &["Cuda"],
201            "CUDA dispatch set must be exactly [\"Cuda\"] \
202             (spec §14.4 / gpu-training-backend-v1 INV-GPUTRAIN-004)",
203        );
204        // Explicitly prove the two sets are disjoint. Any mutation that
205        // merged them (e.g. both contained "Cpu") would trivially make
206        // every verdict Pass.
207        for cpu in AC_GPUTRAIN_004_CPU_DISPATCH_VARIANTS {
208            assert!(
209                !AC_GPUTRAIN_004_CUDA_DISPATCH_VARIANTS.contains(cpu),
210                "CPU/CUDA dispatch sets must be disjoint",
211            );
212        }
213    }
214}