Skip to main content

vyre_driver/
dispatch_policy.rs

1//! Bundled D-series + I2 policy invocation.
2//!
3//! The runtime dispatcher needs all six decisions for every batch
4//! (D1 persistent kernel, D2 arm independence, D3 async copy
5//! overlap, D4 command reuse, D9 bindless, I2 trace-JIT
6//! speculation). Calling six functions and threading six verdicts
7//! through the dispatcher is boilerplate. This module owns the
8//! one-shot bundle: pass `DispatchPolicyInputs`, get back a
9//! `DispatchPolicyVerdict` with every sub-decision already made.
10//!
11//! Pure composition  -  no new logic, just sequential calls into the
12//! per-substrate decide_* functions. Each verdict carries the
13//! sub-substrate's typed result so callers can match exhaustively.
14
15use crate::arm_independence::{
16    can_dispatch_concurrently, ArmBindingSummary, ArmIndependenceVerdict,
17};
18use crate::async_copy_overlap::{can_overlap_copy_with_kernel, CopyOverlapDecision};
19use crate::bindless_policy::{decide_bindless, BindlessDecision, BindlessInputs};
20use crate::command_reuse_policy::{decide_command_reuse, CommandReuseDecision, CommandReuseInputs};
21use crate::observability::{record_substrate_audit_event, SubstrateAuditEvent};
22use crate::persistent_kernel_policy::{
23    decide_persistent_kernel, PersistentKernelDecision, PersistentKernelInputs,
24};
25use crate::trace_jit_policy::{decide_trace_jit_speculation, TraceJitDecision, TraceJitInputs};
26
27/// Input bundle for a single dispatch-policy invocation.
28///
29/// Two arms (`arm_a`, `arm_b`) are needed for D2 / D3 even when
30/// only one is real  -  pass an empty `ArmBindingSummary::default()`
31/// for the absent slot. The `copy_dst_slot` is `None` when no H2D
32/// copy is queued for this batch.
33#[derive(Debug, Clone)]
34pub struct DispatchPolicyInputs {
35    /// D1 persistent-kernel inputs.
36    pub persistent: PersistentKernelInputs,
37    /// First arm of the D2 pair (also the kernel side of the D3 copy).
38    pub arm_a: ArmBindingSummary,
39    /// Second arm of the D2 pair.
40    pub arm_b: ArmBindingSummary,
41    /// D3 copy destination slot, or `None` when no H2D copy is queued.
42    pub copy_dst_slot: Option<u32>,
43    /// D4 command-reuse inputs.
44    pub graph: CommandReuseInputs,
45    /// D9 bindless inputs.
46    pub bindless: BindlessInputs,
47    /// I2 trace-JIT speculation inputs.
48    pub trace_jit: TraceJitInputs,
49}
50
51/// Result bundle from a single dispatch-policy invocation. Every
52/// sub-substrate verdict appears in its typed form.
53#[derive(Debug, Clone)]
54pub struct DispatchPolicyVerdict {
55    /// D1 persistent-kernel verdict.
56    pub persistent: PersistentKernelDecision,
57    /// D2 arm-independence verdict for the (arm_a, arm_b) pair.
58    pub arm_independence: ArmIndependenceVerdict,
59    /// `None` when the inputs had no `copy_dst_slot`; otherwise
60    /// the D3 substrate's verdict for that copy.
61    pub copy_overlap: Option<CopyOverlapDecision>,
62    /// D4 command-reuse verdict.
63    pub command_reuse: CommandReuseDecision,
64    /// D9 bindless verdict.
65    pub bindless: BindlessDecision,
66    /// I2 trace-JIT speculation verdict.
67    pub trace_jit: TraceJitDecision,
68}
69
70/// Mutually exclusive launch strategy selected from the dispatch-policy bundle.
71#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub enum DispatchExecutionMode {
73    /// Plain launches remain cheapest for this batch.
74    PlainLaunches,
75    /// Use persistent kernel mode.
76    PersistentKernel {
77        /// Predicted saved nanoseconds versus plain launches.
78        savings_ns: u128,
79    },
80    /// Use native command record/replay.
81    CommandReuse {
82        /// Predicted saved nanoseconds versus plain launches.
83        savings_ns: u128,
84    },
85}
86
87impl DispatchPolicyVerdict {
88    /// Return the mutually exclusive primary launch strategy.
89    ///
90    /// D1 persistent kernels and D4 command reuse can both be profitable on
91    /// paper. A concrete dispatcher cannot run both for the same launch group,
92    /// so this resolver chooses the higher predicted savings. Equal savings
93    /// prefer command reuse because it avoids persistent queue residency.
94    #[must_use]
95    pub fn primary_execution_mode(&self) -> DispatchExecutionMode {
96        select_primary_execution_mode(self.persistent, self.command_reuse)
97    }
98}
99
100/// One-shot evaluation of every dispatch-side policy substrate.
101#[must_use]
102pub fn evaluate_dispatch_policy(inputs: &DispatchPolicyInputs) -> DispatchPolicyVerdict {
103    let persistent = decide_persistent_kernel(inputs.persistent);
104    let arm_independence = can_dispatch_concurrently(&inputs.arm_a, &inputs.arm_b);
105    let copy_overlap = inputs
106        .copy_dst_slot
107        .map(|slot| can_overlap_copy_with_kernel(slot, &inputs.arm_a));
108    let command_reuse = decide_command_reuse(inputs.graph);
109    let bindless = decide_bindless(inputs.bindless);
110    let trace_jit = decide_trace_jit_speculation(inputs.trace_jit);
111    record_policy_audit_events(persistent, command_reuse, bindless, trace_jit);
112    DispatchPolicyVerdict {
113        persistent,
114        arm_independence,
115        copy_overlap,
116        command_reuse,
117        bindless,
118        trace_jit,
119    }
120}
121
122/// Select a single primary launch strategy from D1 and D4 decisions.
123#[must_use]
124pub fn select_primary_execution_mode(
125    persistent: PersistentKernelDecision,
126    command_reuse: CommandReuseDecision,
127) -> DispatchExecutionMode {
128    match (persistent, command_reuse) {
129        (
130            PersistentKernelDecision::PersistentKernel {
131                savings_ns: persistent_savings,
132            },
133            CommandReuseDecision::RecordAndReplay {
134                savings_ns: command_savings,
135            },
136        ) => {
137            if persistent_savings > command_savings {
138                DispatchExecutionMode::PersistentKernel {
139                    savings_ns: persistent_savings,
140                }
141            } else {
142                DispatchExecutionMode::CommandReuse {
143                    savings_ns: command_savings,
144                }
145            }
146        }
147        (
148            PersistentKernelDecision::PersistentKernel { savings_ns },
149            CommandReuseDecision::PlainLaunches,
150        ) => DispatchExecutionMode::PersistentKernel { savings_ns },
151        (
152            PersistentKernelDecision::StandardLaunches,
153            CommandReuseDecision::RecordAndReplay { savings_ns },
154        ) => DispatchExecutionMode::CommandReuse { savings_ns },
155        (PersistentKernelDecision::StandardLaunches, CommandReuseDecision::PlainLaunches) => {
156            DispatchExecutionMode::PlainLaunches
157        }
158    }
159}
160
161fn record_policy_audit_events(
162    persistent: PersistentKernelDecision,
163    command_reuse: CommandReuseDecision,
164    bindless: BindlessDecision,
165    trace_jit: TraceJitDecision,
166) {
167    record_policy_audit_events_with(
168        persistent,
169        command_reuse,
170        bindless,
171        trace_jit,
172        record_substrate_audit_event,
173    );
174}
175
176fn record_policy_audit_events_with(
177    persistent: PersistentKernelDecision,
178    command_reuse: CommandReuseDecision,
179    bindless: BindlessDecision,
180    trace_jit: TraceJitDecision,
181    mut record: impl FnMut(SubstrateAuditEvent),
182) {
183    if let PersistentKernelDecision::PersistentKernel { savings_ns } = persistent {
184        record(SubstrateAuditEvent {
185            substrate: "persistent_kernel",
186            action: "queue_batch",
187            saved_ns: savings_ns,
188            detail: "launch_overhead",
189        });
190    }
191    if let CommandReuseDecision::RecordAndReplay { savings_ns } = command_reuse {
192        record(SubstrateAuditEvent {
193            substrate: "command_reuse",
194            action: "record_and_replay",
195            saved_ns: savings_ns,
196            detail: "repeat_shape",
197        });
198    }
199    if bindless == BindlessDecision::Bindless {
200        record(SubstrateAuditEvent {
201            substrate: "bindless",
202            action: "descriptor_array",
203            saved_ns: 0,
204            detail: "resource_count_threshold",
205        });
206    }
207    if let TraceJitDecision::Speculate {
208        expected_savings_ns,
209    } = trace_jit
210    {
211        record(SubstrateAuditEvent {
212            substrate: "trace_jit",
213            action: "speculate",
214            saved_ns: expected_savings_ns,
215            detail: "predicted_shape",
216        });
217    }
218}
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223    use crate::bindless_policy::BindlessSupport;
224
225    fn arm(reads: &[u32], writes: &[u32]) -> ArmBindingSummary {
226        ArmBindingSummary {
227            reads: reads.iter().copied().collect(),
228            writes: writes.iter().copied().collect(),
229        }
230    }
231
232    fn aggressive_inputs() -> DispatchPolicyInputs {
233        DispatchPolicyInputs {
234            persistent: PersistentKernelInputs {
235                batch_size: 500,
236                per_launch_overhead_ns: 5_000,
237                per_item_kernel_ns: 1_000,
238                persistent_setup_overhead_ns: 50_000,
239            },
240            arm_a: arm(&[0, 1], &[2]),
241            arm_b: arm(&[3, 4], &[5]),
242            copy_dst_slot: Some(7),
243            graph: CommandReuseInputs {
244                repeat_count: 500,
245                per_launch_overhead_ns: 5_000,
246                record_overhead_ns: 25_000,
247                replay_overhead_ns: 500,
248            },
249            bindless: BindlessInputs {
250                resource_count: 40,
251                support: BindlessSupport::Full,
252                dynamic_indexing: true,
253            },
254            trace_jit: TraceJitInputs {
255                shader_hit_count: 100,
256                prediction_confidence_bps: 9_000,
257                speculative_spec_cost_ns: 10_000,
258                miss_cost_ns: 100_000,
259            },
260        }
261    }
262
263    fn conservative_inputs() -> DispatchPolicyInputs {
264        DispatchPolicyInputs {
265            persistent: PersistentKernelInputs {
266                batch_size: 1,
267                per_launch_overhead_ns: 5_000,
268                per_item_kernel_ns: 1_000,
269                persistent_setup_overhead_ns: 50_000,
270            },
271            arm_a: arm(&[5], &[1]),
272            arm_b: arm(&[0], &[5]),
273            copy_dst_slot: Some(5),
274            graph: CommandReuseInputs {
275                repeat_count: 1,
276                per_launch_overhead_ns: 5_000,
277                record_overhead_ns: 25_000,
278                replay_overhead_ns: 500,
279            },
280            bindless: BindlessInputs {
281                resource_count: 4,
282                support: BindlessSupport::Full,
283                dynamic_indexing: false,
284            },
285            trace_jit: TraceJitInputs {
286                shader_hit_count: 2,
287                prediction_confidence_bps: 9_000,
288                speculative_spec_cost_ns: 10_000,
289                miss_cost_ns: 100_000,
290            },
291        }
292    }
293
294    #[test]
295    fn aggressive_workload_routes_through_every_aggressive_path() {
296        let _guard = crate::observability::audit_events_test_lock();
297        crate::observability::clear_substrate_audit_events_for_test();
298        let v = evaluate_dispatch_policy(&aggressive_inputs());
299        assert!(matches!(
300            v.persistent,
301            PersistentKernelDecision::PersistentKernel { .. }
302        ));
303        assert_eq!(v.arm_independence, ArmIndependenceVerdict::Independent);
304        assert_eq!(v.copy_overlap, Some(CopyOverlapDecision::Overlap));
305        assert!(matches!(
306            v.command_reuse,
307            CommandReuseDecision::RecordAndReplay { .. }
308        ));
309        assert_eq!(v.bindless, BindlessDecision::Bindless);
310        assert!(matches!(v.trace_jit, TraceJitDecision::Speculate { .. }));
311        assert_eq!(
312            v.primary_execution_mode(),
313            DispatchExecutionMode::PersistentKernel {
314                savings_ns: 2_450_000
315            }
316        );
317        record_policy_audit_events_with(
318            v.persistent,
319            v.command_reuse,
320            v.bindless,
321            v.trace_jit,
322            crate::observability::record_substrate_audit_event_for_test,
323        );
324        let log = crate::observability::snapshot_for_test().to_audit_log();
325        assert!(log.contains("persistent_kernel queue_batch"));
326        assert!(log.contains("command_reuse record_and_replay"));
327        assert!(log.contains("bindless descriptor_array"));
328        assert!(log.contains("trace_jit speculate"));
329        crate::observability::clear_substrate_audit_events_for_test();
330    }
331
332    #[test]
333    fn conservative_workload_routes_through_every_conservative_path() {
334        let v = evaluate_dispatch_policy(&conservative_inputs());
335        assert_eq!(v.persistent, PersistentKernelDecision::StandardLaunches);
336        assert!(matches!(
337            v.arm_independence,
338            ArmIndependenceVerdict::SerializeRequired { .. }
339        ));
340        assert_eq!(v.copy_overlap, Some(CopyOverlapDecision::Serialize));
341        assert_eq!(v.command_reuse, CommandReuseDecision::PlainLaunches);
342        assert_eq!(v.bindless, BindlessDecision::TraditionalBindings);
343        assert_eq!(v.trace_jit, TraceJitDecision::HoldSteady);
344        assert_eq!(
345            v.primary_execution_mode(),
346            DispatchExecutionMode::PlainLaunches
347        );
348    }
349
350    #[test]
351    fn missing_copy_slot_reports_none_for_overlap() {
352        // When the dispatcher has no H2D copy queued, copy_overlap
353        // should return None instead of fabricating a verdict.
354        let mut inputs = aggressive_inputs();
355        inputs.copy_dst_slot = None;
356        let v = evaluate_dispatch_policy(&inputs);
357        assert_eq!(v.copy_overlap, None);
358    }
359
360    #[test]
361    fn primary_execution_mode_prefers_command_reuse_on_equal_savings() {
362        let mode = select_primary_execution_mode(
363            PersistentKernelDecision::PersistentKernel { savings_ns: 100 },
364            CommandReuseDecision::RecordAndReplay { savings_ns: 100 },
365        );
366        assert_eq!(
367            mode,
368            DispatchExecutionMode::CommandReuse { savings_ns: 100 }
369        );
370    }
371
372    #[test]
373    fn primary_execution_mode_selects_only_profitable_substrate() {
374        assert_eq!(
375            select_primary_execution_mode(
376                PersistentKernelDecision::PersistentKernel { savings_ns: 500 },
377                CommandReuseDecision::PlainLaunches,
378            ),
379            DispatchExecutionMode::PersistentKernel { savings_ns: 500 }
380        );
381        assert_eq!(
382            select_primary_execution_mode(
383                PersistentKernelDecision::StandardLaunches,
384                CommandReuseDecision::RecordAndReplay { savings_ns: 700 },
385            ),
386            DispatchExecutionMode::CommandReuse { savings_ns: 700 }
387        );
388    }
389}