Skip to main content

vyre_driver/
speculation_substrate.rs

1//! N2 substrate (foundation half): per-rewrite speculation-as-substrate
2//! decision policy.
3//!
4//! Generalizes I2's trace-JIT speculation to ANY "probably profitable"
5//! rewrite (vec_pack, shared_promote, async_load_promote, ...). For each
6//! candidate rewrite the runtime keeps two compiled variants  -  a
7//! conservative baseline and a speculative variant  -  and races them
8//! against the autotune DB's recorded winner.
9//!
10//! This module owns the pure *decision*: given the speculative variant's
11//! observed cost vs the baseline (recorded by I3 [`crate::autotune_store`]),
12//! return [`SpeculationVerdict::Adopt`] (replace baseline with speculative
13//! in the cache) or [`SpeculationVerdict::Reject`] (drop speculative,
14//! stop racing). Pure arithmetic; no I/O, no allocation.
15//!
16//! The runtime side (compiling both variants on a side pipeline cache
17//! key, dispatching them in alternation, recording observations to
18//! [`crate::autotune_store`]) lives in `runtime_megakernel` and is
19//! Codex's lane. This module is the half that's safe to land before
20//! that wiring exists  -  every consumer reads the same decision contract.
21
22/// Per-shape observation feeding the speculation decision.
23#[derive(Debug, Clone, Copy)]
24pub struct SpeculationObservation {
25    /// Number of times the baseline variant was dispatched. Used to
26    /// gate how confident we are in `baseline_mean_ns`.
27    pub baseline_dispatches: u32,
28    /// Mean wall-clock dispatch latency of the baseline variant in
29    /// nanoseconds.
30    pub baseline_mean_ns: u64,
31    /// Number of times the speculative variant was dispatched.
32    pub speculative_dispatches: u32,
33    /// Mean wall-clock dispatch latency of the speculative variant.
34    pub speculative_mean_ns: u64,
35    /// Side-compile cost (one-time amortized over future dispatches).
36    /// Treated as overhead the speculative variant must pay back.
37    pub side_compile_cost_ns: u64,
38}
39
40/// Verdict returned by [`decide_speculation`].
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum SpeculationVerdict {
43    /// Speculative variant wins  -  replace the baseline in the cache.
44    /// Future dispatches use the speculative variant directly.
45    Adopt,
46    /// Speculative variant loses or is statistically inconclusive  -
47    /// drop it from the cache and stop racing on this shape.
48    Reject,
49    /// Not enough samples yet  -  keep racing.
50    KeepRacing,
51}
52
53/// Minimum number of dispatches per variant before a verdict can be
54/// rendered. Below this threshold the variance dominates and the
55/// decision is unreliable; the runtime keeps racing both variants.
56pub const MIN_DISPATCHES_FOR_VERDICT: u32 = 8;
57
58/// Minimum savings in basis points (1 bp = 0.01%) the speculative
59/// variant must show over the baseline to be adopted, after side-compile
60/// cost amortization. 1500 bps = 15%  -  tuned conservative so adopting
61/// is rare but high-confidence.
62pub const MIN_ADOPT_SAVINGS_BPS: u64 = 1500;
63
64/// Decide whether to adopt the speculative variant, reject it, or keep
65/// racing. Pure arithmetic; widened throughout so adversarial inputs cannot
66/// panic or silently clamp a release-path adoption decision.
67#[must_use]
68pub fn decide_speculation(obs: SpeculationObservation) -> SpeculationVerdict {
69    if obs.baseline_dispatches < MIN_DISPATCHES_FOR_VERDICT
70        || obs.speculative_dispatches < MIN_DISPATCHES_FOR_VERDICT
71    {
72        return SpeculationVerdict::KeepRacing;
73    }
74    if obs.baseline_mean_ns == 0 {
75        // Degenerate baseline  -  keep racing rather than divide-by-zero.
76        return SpeculationVerdict::KeepRacing;
77    }
78
79    // Amortized speculative cost: per-dispatch latency plus
80    // side-compile-cost / dispatches-so-far. The further we go, the
81    // less the side-compile bites.
82    let amortized_overhead_ns = obs
83        .side_compile_cost_ns
84        .checked_div(u64::from(obs.speculative_dispatches.max(1)))
85        .unwrap_or(u64::MAX);
86    let effective_speculative_ns =
87        u128::from(obs.speculative_mean_ns) + u128::from(amortized_overhead_ns);
88    let baseline_mean_ns = u128::from(obs.baseline_mean_ns);
89
90    if effective_speculative_ns >= baseline_mean_ns {
91        return SpeculationVerdict::Reject;
92    }
93    let savings_ns = u64::try_from(baseline_mean_ns - effective_speculative_ns).unwrap_or(u64::MAX);
94    let savings_bps = crate::numeric::ratio_basis_points_u64_wide(
95        savings_ns,
96        obs.baseline_mean_ns,
97        0,
98        "speculation savings",
99        "driver",
100    );
101    if savings_bps >= MIN_ADOPT_SAVINGS_BPS {
102        SpeculationVerdict::Adopt
103    } else {
104        // Speculative wins but by less than the threshold  -  keep
105        // racing in case the gap widens with more samples.
106        SpeculationVerdict::KeepRacing
107    }
108}
109
110#[cfg(test)]
111mod tests {
112    use super::*;
113
114    fn obs(b_n: u32, b_ns: u64, s_n: u32, s_ns: u64, sc_ns: u64) -> SpeculationObservation {
115        SpeculationObservation {
116            baseline_dispatches: b_n,
117            baseline_mean_ns: b_ns,
118            speculative_dispatches: s_n,
119            speculative_mean_ns: s_ns,
120            side_compile_cost_ns: sc_ns,
121        }
122    }
123
124    #[test]
125    fn under_threshold_keeps_racing() {
126        // baseline only sampled 3 times  -  too few to verdict.
127        let v = decide_speculation(obs(3, 100_000, 100, 50_000, 0));
128        assert_eq!(v, SpeculationVerdict::KeepRacing);
129    }
130
131    #[test]
132    fn speculative_clearly_faster_adopts() {
133        // baseline 100us, speculative 50us, no side-compile cost.
134        // savings = 50%, well over 15% threshold.
135        let v = decide_speculation(obs(50, 100_000, 50, 50_000, 0));
136        assert_eq!(v, SpeculationVerdict::Adopt);
137    }
138
139    #[test]
140    fn speculative_slower_rejects() {
141        let v = decide_speculation(obs(50, 50_000, 50, 100_000, 0));
142        assert_eq!(v, SpeculationVerdict::Reject);
143    }
144
145    #[test]
146    fn speculative_marginally_faster_keeps_racing() {
147        // baseline 100us, speculative 95us → 5% savings, under 15%.
148        let v = decide_speculation(obs(50, 100_000, 50, 95_000, 0));
149        assert_eq!(v, SpeculationVerdict::KeepRacing);
150    }
151
152    #[test]
153    fn side_compile_cost_amortizes_into_decision() {
154        // baseline 100us, speculative 50us, but side-compile = 1ms.
155        // After 50 dispatches, amortized overhead = 1ms/50 = 20us.
156        // Effective speculative = 50us + 20us = 70us → 30% savings.
157        let v = decide_speculation(obs(50, 100_000, 50, 50_000, 1_000_000));
158        assert_eq!(v, SpeculationVerdict::Adopt);
159    }
160
161    #[test]
162    fn side_compile_cost_can_dominate_early() {
163        // Same shape but only 8 speculative dispatches.
164        // Amortized overhead = 1ms/8 = 125us. Effective = 50us + 125us = 175us
165        // > baseline 100us → reject.
166        let v = decide_speculation(obs(50, 100_000, 8, 50_000, 1_000_000));
167        assert_eq!(v, SpeculationVerdict::Reject);
168    }
169
170    #[test]
171    fn zero_baseline_keeps_racing_rather_than_dividing_by_zero() {
172        let v = decide_speculation(obs(50, 0, 50, 50_000, 0));
173        assert_eq!(v, SpeculationVerdict::KeepRacing);
174    }
175
176    #[test]
177    fn extreme_inputs_do_not_panic() {
178        assert_eq!(
179            decide_speculation(obs(u32::MAX, u64::MAX, u32::MAX, u64::MAX, u64::MAX)),
180            SpeculationVerdict::Reject
181        );
182        assert_eq!(
183            decide_speculation(obs(u32::MAX, 1, u32::MAX, u64::MAX, 0)),
184            SpeculationVerdict::Reject
185        );
186    }
187
188    #[test]
189    fn huge_savings_use_widened_arithmetic_not_saturation() {
190        assert_eq!(
191            decide_speculation(obs(u32::MAX, u64::MAX, u32::MAX, 1, 0)),
192            SpeculationVerdict::Adopt
193        );
194    }
195
196    #[test]
197    fn speculation_policy_source_uses_exact_widened_arithmetic() {
198        let source = include_str!("speculation_substrate.rs");
199
200        assert!(
201            !source.contains(concat!("saturating", "_add"))
202                && !source.contains(concat!("saturating", "_mul")),
203            "Fix: speculation adoption policy must use widened exact arithmetic, not saturating math that can hide release-path cost corruption."
204        );
205        assert!(
206            source.contains("u128::from(obs.speculative_mean_ns)")
207                && source.contains("u128::from(obs.baseline_mean_ns)")
208                && source.contains("crate::numeric::ratio_basis_points_u64_wide"),
209            "Fix: speculation adoption policy must compute effective cost and savings in widened integer space."
210        );
211    }
212}