vyre_driver/speculation_substrate.rs
1//! N2 substrate (foundation half): per-rewrite speculation-as-substrate
2//! decision policy.
3//!
4//! Generalizes I2's trace-JIT speculation to ANY "probably profitable"
5//! rewrite (vec_pack, shared_promote, async_load_promote, ...). For each
6//! candidate rewrite the runtime keeps two compiled variants - a
7//! conservative baseline and a speculative variant - and races them
8//! against the autotune DB's recorded winner.
9//!
10//! This module owns the pure *decision*: given the speculative variant's
11//! observed cost vs the baseline (recorded by I3 [`crate::autotune_store`]),
12//! return [`SpeculationVerdict::Adopt`] (replace baseline with speculative
13//! in the cache) or [`SpeculationVerdict::Reject`] (drop speculative,
14//! stop racing). Pure arithmetic; no I/O, no allocation.
15//!
16//! The runtime side (compiling both variants on a side pipeline cache
17//! key, dispatching them in alternation, recording observations to
18//! [`crate::autotune_store`]) lives in `runtime_megakernel` and is
19//! Codex's lane. This module is the half that's safe to land before
20//! that wiring exists - every consumer reads the same decision contract.
21
22/// Per-shape observation feeding the speculation decision.
23#[derive(Debug, Clone, Copy)]
24pub struct SpeculationObservation {
25 /// Number of times the baseline variant was dispatched. Used to
26 /// gate how confident we are in `baseline_mean_ns`.
27 pub baseline_dispatches: u32,
28 /// Mean wall-clock dispatch latency of the baseline variant in
29 /// nanoseconds.
30 pub baseline_mean_ns: u64,
31 /// Number of times the speculative variant was dispatched.
32 pub speculative_dispatches: u32,
33 /// Mean wall-clock dispatch latency of the speculative variant.
34 pub speculative_mean_ns: u64,
35 /// Side-compile cost (one-time amortized over future dispatches).
36 /// Treated as overhead the speculative variant must pay back.
37 pub side_compile_cost_ns: u64,
38}
39
40/// Verdict returned by [`decide_speculation`].
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum SpeculationVerdict {
43 /// Speculative variant wins - replace the baseline in the cache.
44 /// Future dispatches use the speculative variant directly.
45 Adopt,
46 /// Speculative variant loses or is statistically inconclusive -
47 /// drop it from the cache and stop racing on this shape.
48 Reject,
49 /// Not enough samples yet - keep racing.
50 KeepRacing,
51}
52
53/// Minimum number of dispatches per variant before a verdict can be
54/// rendered. Below this threshold the variance dominates and the
55/// decision is unreliable; the runtime keeps racing both variants.
56pub const MIN_DISPATCHES_FOR_VERDICT: u32 = 8;
57
58/// Minimum savings in basis points (1 bp = 0.01%) the speculative
59/// variant must show over the baseline to be adopted, after side-compile
60/// cost amortization. 1500 bps = 15% - tuned conservative so adopting
61/// is rare but high-confidence.
62pub const MIN_ADOPT_SAVINGS_BPS: u64 = 1500;
63
64/// Decide whether to adopt the speculative variant, reject it, or keep
65/// racing. Pure arithmetic; widened throughout so adversarial inputs cannot
66/// panic or silently clamp a release-path adoption decision.
67#[must_use]
68pub fn decide_speculation(obs: SpeculationObservation) -> SpeculationVerdict {
69 if obs.baseline_dispatches < MIN_DISPATCHES_FOR_VERDICT
70 || obs.speculative_dispatches < MIN_DISPATCHES_FOR_VERDICT
71 {
72 return SpeculationVerdict::KeepRacing;
73 }
74 if obs.baseline_mean_ns == 0 {
75 // Degenerate baseline - keep racing rather than divide-by-zero.
76 return SpeculationVerdict::KeepRacing;
77 }
78
79 // Amortized speculative cost: per-dispatch latency plus
80 // side-compile-cost / dispatches-so-far. The further we go, the
81 // less the side-compile bites.
82 let amortized_overhead_ns = obs
83 .side_compile_cost_ns
84 .checked_div(u64::from(obs.speculative_dispatches.max(1)))
85 .unwrap_or(u64::MAX);
86 let effective_speculative_ns =
87 u128::from(obs.speculative_mean_ns) + u128::from(amortized_overhead_ns);
88 let baseline_mean_ns = u128::from(obs.baseline_mean_ns);
89
90 if effective_speculative_ns >= baseline_mean_ns {
91 return SpeculationVerdict::Reject;
92 }
93 let savings_ns = u64::try_from(baseline_mean_ns - effective_speculative_ns).unwrap_or(u64::MAX);
94 let savings_bps = crate::numeric::ratio_basis_points_u64_wide(
95 savings_ns,
96 obs.baseline_mean_ns,
97 0,
98 "speculation savings",
99 "driver",
100 );
101 if savings_bps >= MIN_ADOPT_SAVINGS_BPS {
102 SpeculationVerdict::Adopt
103 } else {
104 // Speculative wins but by less than the threshold - keep
105 // racing in case the gap widens with more samples.
106 SpeculationVerdict::KeepRacing
107 }
108}
109
110#[cfg(test)]
111mod tests {
112 use super::*;
113
114 fn obs(b_n: u32, b_ns: u64, s_n: u32, s_ns: u64, sc_ns: u64) -> SpeculationObservation {
115 SpeculationObservation {
116 baseline_dispatches: b_n,
117 baseline_mean_ns: b_ns,
118 speculative_dispatches: s_n,
119 speculative_mean_ns: s_ns,
120 side_compile_cost_ns: sc_ns,
121 }
122 }
123
124 #[test]
125 fn under_threshold_keeps_racing() {
126 // baseline only sampled 3 times - too few to verdict.
127 let v = decide_speculation(obs(3, 100_000, 100, 50_000, 0));
128 assert_eq!(v, SpeculationVerdict::KeepRacing);
129 }
130
131 #[test]
132 fn speculative_clearly_faster_adopts() {
133 // baseline 100us, speculative 50us, no side-compile cost.
134 // savings = 50%, well over 15% threshold.
135 let v = decide_speculation(obs(50, 100_000, 50, 50_000, 0));
136 assert_eq!(v, SpeculationVerdict::Adopt);
137 }
138
139 #[test]
140 fn speculative_slower_rejects() {
141 let v = decide_speculation(obs(50, 50_000, 50, 100_000, 0));
142 assert_eq!(v, SpeculationVerdict::Reject);
143 }
144
145 #[test]
146 fn speculative_marginally_faster_keeps_racing() {
147 // baseline 100us, speculative 95us → 5% savings, under 15%.
148 let v = decide_speculation(obs(50, 100_000, 50, 95_000, 0));
149 assert_eq!(v, SpeculationVerdict::KeepRacing);
150 }
151
152 #[test]
153 fn side_compile_cost_amortizes_into_decision() {
154 // baseline 100us, speculative 50us, but side-compile = 1ms.
155 // After 50 dispatches, amortized overhead = 1ms/50 = 20us.
156 // Effective speculative = 50us + 20us = 70us → 30% savings.
157 let v = decide_speculation(obs(50, 100_000, 50, 50_000, 1_000_000));
158 assert_eq!(v, SpeculationVerdict::Adopt);
159 }
160
161 #[test]
162 fn side_compile_cost_can_dominate_early() {
163 // Same shape but only 8 speculative dispatches.
164 // Amortized overhead = 1ms/8 = 125us. Effective = 50us + 125us = 175us
165 // > baseline 100us → reject.
166 let v = decide_speculation(obs(50, 100_000, 8, 50_000, 1_000_000));
167 assert_eq!(v, SpeculationVerdict::Reject);
168 }
169
170 #[test]
171 fn zero_baseline_keeps_racing_rather_than_dividing_by_zero() {
172 let v = decide_speculation(obs(50, 0, 50, 50_000, 0));
173 assert_eq!(v, SpeculationVerdict::KeepRacing);
174 }
175
176 #[test]
177 fn extreme_inputs_do_not_panic() {
178 assert_eq!(
179 decide_speculation(obs(u32::MAX, u64::MAX, u32::MAX, u64::MAX, u64::MAX)),
180 SpeculationVerdict::Reject
181 );
182 assert_eq!(
183 decide_speculation(obs(u32::MAX, 1, u32::MAX, u64::MAX, 0)),
184 SpeculationVerdict::Reject
185 );
186 }
187
188 #[test]
189 fn huge_savings_use_widened_arithmetic_not_saturation() {
190 assert_eq!(
191 decide_speculation(obs(u32::MAX, u64::MAX, u32::MAX, 1, 0)),
192 SpeculationVerdict::Adopt
193 );
194 }
195
196 #[test]
197 fn speculation_policy_source_uses_exact_widened_arithmetic() {
198 let source = include_str!("speculation_substrate.rs");
199
200 assert!(
201 !source.contains(concat!("saturating", "_add"))
202 && !source.contains(concat!("saturating", "_mul")),
203 "Fix: speculation adoption policy must use widened exact arithmetic, not saturating math that can hide release-path cost corruption."
204 );
205 assert!(
206 source.contains("u128::from(obs.speculative_mean_ns)")
207 && source.contains("u128::from(obs.baseline_mean_ns)")
208 && source.contains("crate::numeric::ratio_basis_points_u64_wide"),
209 "Fix: speculation adoption policy must compute effective cost and savings in widened integer space."
210 );
211 }
212}