entrenar/train/gputrain_004.rs
1//! FALSIFY-GPUTRAIN-004 / INV-GPUTRAIN-004 / GATE-GPUTRAIN-005 — algorithm-level
2//! PARTIAL discharge.
3//!
4//! Spec: `docs/specifications/aprender-train/ship-two-models-spec.md` §14
5//! (task #132 CUDA training backend gap).
6//!
7//! Contract: `contracts/entrenar/gpu-training-backend-v1.yaml` v1.0.0 → v1.1.0
8//! adds `GATE-GPUTRAIN-005` PARTIAL-discharge evidence binding the pure
9//! dispatch invariant: choosing `Device::Cpu` never invokes CUDA code, and
10//! choosing `Device::Cuda(_)` never invokes CPU-only code.
11//!
12//! INV-GPUTRAIN-004 states: "CPU fallback path remains fully functional (peer
13//! GATE-TRAIN-001..010 still PASS)." The symmetric dual is also in scope:
14//! the silent-CPU-fallback-when-CUDA-was-requested class (task #126's defect)
15//! must be a ship blocker. Together they bound the `requested → dispatched`
16//! relation:
17//!
18//! - `Device::Cpu` requested ⇒ `Device::Cpu` dispatched. Full stop.
19//! - `Device::Cuda(_)` requested ⇒ `Device::Cuda(_)` dispatched.
20//! - Any cross-assignment is a silent-promotion / silent-fallback bug.
21//! - Any unknown label is conservatively Fail (no implicit defaults).
22//!
23//! The compute-heavy portion (actually running a training step through either
24//! `TransformerTrainer` or `CudaTransformerTrainer`) is intentionally out of
25//! scope here; the decision rule is what the live dispatch wrapper calls
26//! *before* instantiating any trainer, and changing either dispatch set or
27//! the equality rule breaks this test before any trainer is allocated.
28
29/// Set of device-tag labels that count as "the CPU training path."
30/// Pinned as a slice of byte-literals so a mutation that merged the CPU
31/// and CUDA sets (e.g. a typo in device dispatch switch) is caught
32/// immediately by the provenance pin in the mutation survey.
33pub const AC_GPUTRAIN_004_CPU_DISPATCH_VARIANTS: &[&str] = &["Cpu"];
34
35/// Set of device-tag labels that count as "the CUDA training path."
36/// Mirrors the `Device::Cuda` enum variant name. If the enum gets a
37/// third variant (e.g. `Rocm`, `Metal`), the survey's provenance pin
38/// forces a contract amendment in lockstep.
39pub const AC_GPUTRAIN_004_CUDA_DISPATCH_VARIANTS: &[&str] = &["Cuda"];
40
41/// Binary verdict for FALSIFY-GPUTRAIN-004 / GATE-GPUTRAIN-005.
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum Gputrain004Verdict {
44 /// The requested device matches the dispatched device's equivalence
45 /// class (both CPU or both CUDA).
46 Pass,
47 /// Requested vs dispatched are in different classes, or either is
48 /// unknown / empty — both are ship-blocking bugs.
49 Fail,
50}
51
52/// Algorithm-level verdict rule for INV-GPUTRAIN-004: given the operator-
53/// requested device class and the class the dispatch wrapper actually
54/// selected, Pass iff both labels live in the same set
55/// (CPU ↔ CPU, CUDA ↔ CUDA). Every other combination — including any
56/// unknown label on either side — is a conservative Fail because silent
57/// promotion (cpu→cuda) and silent fallback (cuda→cpu) are both recorded
58/// defect classes (task #126 is the silent-fallback proof-of-existence).
59#[must_use]
60pub fn verdict_from_dispatch_label(requested: &str, dispatched: &str) -> Gputrain004Verdict {
61 let req_cpu = AC_GPUTRAIN_004_CPU_DISPATCH_VARIANTS.contains(&requested);
62 let req_cuda = AC_GPUTRAIN_004_CUDA_DISPATCH_VARIANTS.contains(&requested);
63 let dis_cpu = AC_GPUTRAIN_004_CPU_DISPATCH_VARIANTS.contains(&dispatched);
64 let dis_cuda = AC_GPUTRAIN_004_CUDA_DISPATCH_VARIANTS.contains(&dispatched);
65
66 match (req_cpu, req_cuda, dis_cpu, dis_cuda) {
67 (true, false, true, false) => Gputrain004Verdict::Pass, // cpu → cpu
68 (false, true, false, true) => Gputrain004Verdict::Pass, // cuda → cuda
69 _ => Gputrain004Verdict::Fail,
70 }
71}
72
73// ─────────────────────────────────────────────────────────────
74// Unit tests — FALSIFY-GPUTRAIN-004 algorithm-level proof
75// ─────────────────────────────────────────────────────────────
76
77#[cfg(test)]
78mod tests {
79 use super::*;
80
81 /// FALSIFY-GPUTRAIN-004 algorithm-level PARTIAL discharge: prove the
82 /// CPU-fallback-preserved + no-silent-promotion dispatch invariant.
83 /// Any mutation that swaps the equality rule, quietly accepts a
84 /// cross-class dispatch, or defaults unknown labels to Pass must
85 /// break this test before the live trainer is instantiated.
86 #[test]
87 fn falsify_gputrain_004_cpu_fallback_preserved_logic() {
88 // Section 1: cpu → cpu — baseline Pass. The CPU-only host path
89 // (INV-GPUTRAIN-004's primary rule) must round-trip.
90 assert_eq!(
91 verdict_from_dispatch_label("Cpu", "Cpu"),
92 Gputrain004Verdict::Pass,
93 "cpu → cpu must Pass (primary INV-GPUTRAIN-004 case)",
94 );
95
96 // Section 2: cuda → cuda — baseline Pass on the CUDA side. Any
97 // mutation that flipped the Pass case to Fail here would break
98 // every GPU training dispatch, so catching it is free.
99 assert_eq!(
100 verdict_from_dispatch_label("Cuda", "Cuda"),
101 Gputrain004Verdict::Pass,
102 "cuda → cuda must Pass (symmetric CUDA path preservation)",
103 );
104
105 // Section 3: cpu → cuda — silent GPU promotion bug. Catches a
106 // refactor that accidentally upgrades a `--device cpu` request
107 // to GPU because a helper preferred CUDA when it was "available."
108 // Must Fail — operators who asked for CPU parity runs MUST get
109 // CPU dispatch (reproducibility, debug isolation).
110 assert_eq!(
111 verdict_from_dispatch_label("Cpu", "Cuda"),
112 Gputrain004Verdict::Fail,
113 "cpu → cuda must Fail (silent GPU promotion bug)",
114 );
115
116 // Section 4: cuda → cpu — silent CPU fallback bug. THIS IS THE
117 // EXACT DEFECT TASK #126 HIT. 14 minutes of compute at 0 MiB GPU
118 // memory because the CLI accepted `--device cuda` and ran CPU.
119 // Must Fail — that's the whole point of INV-GPUTRAIN-002.
120 assert_eq!(
121 verdict_from_dispatch_label("Cuda", "Cpu"),
122 Gputrain004Verdict::Fail,
123 "cuda → cpu must Fail (this is the task #126 silent-CPU-fallback bug)",
124 );
125
126 // Section 5: unknown label on either side. Catches a refactor
127 // that added a third device (e.g. "Metal", "Rocm", "Xpu")
128 // without an explicit contract amendment. Conservative Fail
129 // because default-behaviour on unknown labels is what made
130 // task #126 possible in the first place.
131 assert_eq!(
132 verdict_from_dispatch_label("xpu", "Cpu"),
133 Gputrain004Verdict::Fail,
134 "unknown requested label must Fail (no implicit default)",
135 );
136 assert_eq!(
137 verdict_from_dispatch_label("Cpu", "metal"),
138 Gputrain004Verdict::Fail,
139 "unknown dispatched label must Fail (no implicit default)",
140 );
141 assert_eq!(
142 verdict_from_dispatch_label("rocm", "rocm"),
143 Gputrain004Verdict::Fail,
144 "unknown label on both sides must Fail even if matched \
145 (only the two recorded sets are allowed)",
146 );
147
148 // Section 6: empty string on either side. A stringly-typed
149 // dispatch wrapper is easy to break on uninitialized default
150 // values; an empty label must never Pass.
151 assert_eq!(
152 verdict_from_dispatch_label("", "Cpu"),
153 Gputrain004Verdict::Fail,
154 "empty requested label must Fail",
155 );
156 assert_eq!(
157 verdict_from_dispatch_label("Cpu", ""),
158 Gputrain004Verdict::Fail,
159 "empty dispatched label must Fail",
160 );
161 assert_eq!(
162 verdict_from_dispatch_label("", ""),
163 Gputrain004Verdict::Fail,
164 "both-empty must Fail (degenerate input, no implicit default)",
165 );
166
167 // Section 7: case-sensitivity and whitespace. A mutation that
168 // relaxed to case-insensitive compare (e.g. `.eq_ignore_ascii_
169 // case()`) would silently accept "CPU" vs "Cpu" which are NOT
170 // in the pinned slice. Fail-closed is required.
171 assert_eq!(
172 verdict_from_dispatch_label("CPU", "Cpu"),
173 Gputrain004Verdict::Fail,
174 "uppercase `CPU` is not in the pinned slice — must Fail",
175 );
176 assert_eq!(
177 verdict_from_dispatch_label("cpu", "Cpu"),
178 Gputrain004Verdict::Fail,
179 "lowercase `cpu` is not in the pinned slice — must Fail \
180 (slice contents are the authoritative grammar)",
181 );
182 assert_eq!(
183 verdict_from_dispatch_label("Cpu ", "Cpu"),
184 Gputrain004Verdict::Fail,
185 "trailing whitespace must Fail (no implicit trim)",
186 );
187
188 // Provenance pin on slice contents. Load-bearing: if a new
189 // Device variant is added (e.g. Rocm, Metal), both the enum
190 // side and this slice must move together. The byte-literal
191 // shape prevents accidental merger into a single set.
192 assert_eq!(
193 AC_GPUTRAIN_004_CPU_DISPATCH_VARIANTS,
194 &["Cpu"],
195 "CPU dispatch set must be exactly [\"Cpu\"] \
196 (spec §14.4 / gpu-training-backend-v1 INV-GPUTRAIN-004)",
197 );
198 assert_eq!(
199 AC_GPUTRAIN_004_CUDA_DISPATCH_VARIANTS,
200 &["Cuda"],
201 "CUDA dispatch set must be exactly [\"Cuda\"] \
202 (spec §14.4 / gpu-training-backend-v1 INV-GPUTRAIN-004)",
203 );
204 // Explicitly prove the two sets are disjoint. Any mutation that
205 // merged them (e.g. both contained "Cpu") would trivially make
206 // every verdict Pass.
207 for cpu in AC_GPUTRAIN_004_CPU_DISPATCH_VARIANTS {
208 assert!(
209 !AC_GPUTRAIN_004_CUDA_DISPATCH_VARIANTS.contains(cpu),
210 "CPU/CUDA dispatch sets must be disjoint",
211 );
212 }
213 }
214}