1use crate::arm_independence::{
16 can_dispatch_concurrently, ArmBindingSummary, ArmIndependenceVerdict,
17};
18use crate::async_copy_overlap::{can_overlap_copy_with_kernel, CopyOverlapDecision};
19use crate::bindless_policy::{decide_bindless, BindlessDecision, BindlessInputs};
20use crate::command_reuse_policy::{decide_command_reuse, CommandReuseDecision, CommandReuseInputs};
21use crate::observability::{record_substrate_audit_event, SubstrateAuditEvent};
22use crate::persistent_kernel_policy::{
23 decide_persistent_kernel, PersistentKernelDecision, PersistentKernelInputs,
24};
25use crate::trace_jit_policy::{decide_trace_jit_speculation, TraceJitDecision, TraceJitInputs};
26
27#[derive(Debug, Clone)]
34pub struct DispatchPolicyInputs {
35 pub persistent: PersistentKernelInputs,
37 pub arm_a: ArmBindingSummary,
39 pub arm_b: ArmBindingSummary,
41 pub copy_dst_slot: Option<u32>,
43 pub graph: CommandReuseInputs,
45 pub bindless: BindlessInputs,
47 pub trace_jit: TraceJitInputs,
49}
50
51#[derive(Debug, Clone)]
54pub struct DispatchPolicyVerdict {
55 pub persistent: PersistentKernelDecision,
57 pub arm_independence: ArmIndependenceVerdict,
59 pub copy_overlap: Option<CopyOverlapDecision>,
62 pub command_reuse: CommandReuseDecision,
64 pub bindless: BindlessDecision,
66 pub trace_jit: TraceJitDecision,
68}
69
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub enum DispatchExecutionMode {
73 PlainLaunches,
75 PersistentKernel {
77 savings_ns: u128,
79 },
80 CommandReuse {
82 savings_ns: u128,
84 },
85}
86
87impl DispatchPolicyVerdict {
88 #[must_use]
95 pub fn primary_execution_mode(&self) -> DispatchExecutionMode {
96 select_primary_execution_mode(self.persistent, self.command_reuse)
97 }
98}
99
100#[must_use]
102pub fn evaluate_dispatch_policy(inputs: &DispatchPolicyInputs) -> DispatchPolicyVerdict {
103 let persistent = decide_persistent_kernel(inputs.persistent);
104 let arm_independence = can_dispatch_concurrently(&inputs.arm_a, &inputs.arm_b);
105 let copy_overlap = inputs
106 .copy_dst_slot
107 .map(|slot| can_overlap_copy_with_kernel(slot, &inputs.arm_a));
108 let command_reuse = decide_command_reuse(inputs.graph);
109 let bindless = decide_bindless(inputs.bindless);
110 let trace_jit = decide_trace_jit_speculation(inputs.trace_jit);
111 record_policy_audit_events(persistent, command_reuse, bindless, trace_jit);
112 DispatchPolicyVerdict {
113 persistent,
114 arm_independence,
115 copy_overlap,
116 command_reuse,
117 bindless,
118 trace_jit,
119 }
120}
121
122#[must_use]
124pub fn select_primary_execution_mode(
125 persistent: PersistentKernelDecision,
126 command_reuse: CommandReuseDecision,
127) -> DispatchExecutionMode {
128 match (persistent, command_reuse) {
129 (
130 PersistentKernelDecision::PersistentKernel {
131 savings_ns: persistent_savings,
132 },
133 CommandReuseDecision::RecordAndReplay {
134 savings_ns: command_savings,
135 },
136 ) => {
137 if persistent_savings > command_savings {
138 DispatchExecutionMode::PersistentKernel {
139 savings_ns: persistent_savings,
140 }
141 } else {
142 DispatchExecutionMode::CommandReuse {
143 savings_ns: command_savings,
144 }
145 }
146 }
147 (
148 PersistentKernelDecision::PersistentKernel { savings_ns },
149 CommandReuseDecision::PlainLaunches,
150 ) => DispatchExecutionMode::PersistentKernel { savings_ns },
151 (
152 PersistentKernelDecision::StandardLaunches,
153 CommandReuseDecision::RecordAndReplay { savings_ns },
154 ) => DispatchExecutionMode::CommandReuse { savings_ns },
155 (PersistentKernelDecision::StandardLaunches, CommandReuseDecision::PlainLaunches) => {
156 DispatchExecutionMode::PlainLaunches
157 }
158 }
159}
160
161fn record_policy_audit_events(
162 persistent: PersistentKernelDecision,
163 command_reuse: CommandReuseDecision,
164 bindless: BindlessDecision,
165 trace_jit: TraceJitDecision,
166) {
167 record_policy_audit_events_with(
168 persistent,
169 command_reuse,
170 bindless,
171 trace_jit,
172 record_substrate_audit_event,
173 );
174}
175
176fn record_policy_audit_events_with(
177 persistent: PersistentKernelDecision,
178 command_reuse: CommandReuseDecision,
179 bindless: BindlessDecision,
180 trace_jit: TraceJitDecision,
181 mut record: impl FnMut(SubstrateAuditEvent),
182) {
183 if let PersistentKernelDecision::PersistentKernel { savings_ns } = persistent {
184 record(SubstrateAuditEvent {
185 substrate: "persistent_kernel",
186 action: "queue_batch",
187 saved_ns: savings_ns,
188 detail: "launch_overhead",
189 });
190 }
191 if let CommandReuseDecision::RecordAndReplay { savings_ns } = command_reuse {
192 record(SubstrateAuditEvent {
193 substrate: "command_reuse",
194 action: "record_and_replay",
195 saved_ns: savings_ns,
196 detail: "repeat_shape",
197 });
198 }
199 if bindless == BindlessDecision::Bindless {
200 record(SubstrateAuditEvent {
201 substrate: "bindless",
202 action: "descriptor_array",
203 saved_ns: 0,
204 detail: "resource_count_threshold",
205 });
206 }
207 if let TraceJitDecision::Speculate {
208 expected_savings_ns,
209 } = trace_jit
210 {
211 record(SubstrateAuditEvent {
212 substrate: "trace_jit",
213 action: "speculate",
214 saved_ns: expected_savings_ns,
215 detail: "predicted_shape",
216 });
217 }
218}
219
220#[cfg(test)]
221mod tests {
222 use super::*;
223 use crate::bindless_policy::BindlessSupport;
224
225 fn arm(reads: &[u32], writes: &[u32]) -> ArmBindingSummary {
226 ArmBindingSummary {
227 reads: reads.iter().copied().collect(),
228 writes: writes.iter().copied().collect(),
229 }
230 }
231
232 fn aggressive_inputs() -> DispatchPolicyInputs {
233 DispatchPolicyInputs {
234 persistent: PersistentKernelInputs {
235 batch_size: 500,
236 per_launch_overhead_ns: 5_000,
237 per_item_kernel_ns: 1_000,
238 persistent_setup_overhead_ns: 50_000,
239 },
240 arm_a: arm(&[0, 1], &[2]),
241 arm_b: arm(&[3, 4], &[5]),
242 copy_dst_slot: Some(7),
243 graph: CommandReuseInputs {
244 repeat_count: 500,
245 per_launch_overhead_ns: 5_000,
246 record_overhead_ns: 25_000,
247 replay_overhead_ns: 500,
248 },
249 bindless: BindlessInputs {
250 resource_count: 40,
251 support: BindlessSupport::Full,
252 dynamic_indexing: true,
253 },
254 trace_jit: TraceJitInputs {
255 shader_hit_count: 100,
256 prediction_confidence_bps: 9_000,
257 speculative_spec_cost_ns: 10_000,
258 miss_cost_ns: 100_000,
259 },
260 }
261 }
262
263 fn conservative_inputs() -> DispatchPolicyInputs {
264 DispatchPolicyInputs {
265 persistent: PersistentKernelInputs {
266 batch_size: 1,
267 per_launch_overhead_ns: 5_000,
268 per_item_kernel_ns: 1_000,
269 persistent_setup_overhead_ns: 50_000,
270 },
271 arm_a: arm(&[5], &[1]),
272 arm_b: arm(&[0], &[5]),
273 copy_dst_slot: Some(5),
274 graph: CommandReuseInputs {
275 repeat_count: 1,
276 per_launch_overhead_ns: 5_000,
277 record_overhead_ns: 25_000,
278 replay_overhead_ns: 500,
279 },
280 bindless: BindlessInputs {
281 resource_count: 4,
282 support: BindlessSupport::Full,
283 dynamic_indexing: false,
284 },
285 trace_jit: TraceJitInputs {
286 shader_hit_count: 2,
287 prediction_confidence_bps: 9_000,
288 speculative_spec_cost_ns: 10_000,
289 miss_cost_ns: 100_000,
290 },
291 }
292 }
293
294 #[test]
295 fn aggressive_workload_routes_through_every_aggressive_path() {
296 let _guard = crate::observability::audit_events_test_lock();
297 crate::observability::clear_substrate_audit_events_for_test();
298 let v = evaluate_dispatch_policy(&aggressive_inputs());
299 assert!(matches!(
300 v.persistent,
301 PersistentKernelDecision::PersistentKernel { .. }
302 ));
303 assert_eq!(v.arm_independence, ArmIndependenceVerdict::Independent);
304 assert_eq!(v.copy_overlap, Some(CopyOverlapDecision::Overlap));
305 assert!(matches!(
306 v.command_reuse,
307 CommandReuseDecision::RecordAndReplay { .. }
308 ));
309 assert_eq!(v.bindless, BindlessDecision::Bindless);
310 assert!(matches!(v.trace_jit, TraceJitDecision::Speculate { .. }));
311 assert_eq!(
312 v.primary_execution_mode(),
313 DispatchExecutionMode::PersistentKernel {
314 savings_ns: 2_450_000
315 }
316 );
317 record_policy_audit_events_with(
318 v.persistent,
319 v.command_reuse,
320 v.bindless,
321 v.trace_jit,
322 crate::observability::record_substrate_audit_event_for_test,
323 );
324 let log = crate::observability::snapshot_for_test().to_audit_log();
325 assert!(log.contains("persistent_kernel queue_batch"));
326 assert!(log.contains("command_reuse record_and_replay"));
327 assert!(log.contains("bindless descriptor_array"));
328 assert!(log.contains("trace_jit speculate"));
329 crate::observability::clear_substrate_audit_events_for_test();
330 }
331
332 #[test]
333 fn conservative_workload_routes_through_every_conservative_path() {
334 let v = evaluate_dispatch_policy(&conservative_inputs());
335 assert_eq!(v.persistent, PersistentKernelDecision::StandardLaunches);
336 assert!(matches!(
337 v.arm_independence,
338 ArmIndependenceVerdict::SerializeRequired { .. }
339 ));
340 assert_eq!(v.copy_overlap, Some(CopyOverlapDecision::Serialize));
341 assert_eq!(v.command_reuse, CommandReuseDecision::PlainLaunches);
342 assert_eq!(v.bindless, BindlessDecision::TraditionalBindings);
343 assert_eq!(v.trace_jit, TraceJitDecision::HoldSteady);
344 assert_eq!(
345 v.primary_execution_mode(),
346 DispatchExecutionMode::PlainLaunches
347 );
348 }
349
350 #[test]
351 fn missing_copy_slot_reports_none_for_overlap() {
352 let mut inputs = aggressive_inputs();
355 inputs.copy_dst_slot = None;
356 let v = evaluate_dispatch_policy(&inputs);
357 assert_eq!(v.copy_overlap, None);
358 }
359
360 #[test]
361 fn primary_execution_mode_prefers_command_reuse_on_equal_savings() {
362 let mode = select_primary_execution_mode(
363 PersistentKernelDecision::PersistentKernel { savings_ns: 100 },
364 CommandReuseDecision::RecordAndReplay { savings_ns: 100 },
365 );
366 assert_eq!(
367 mode,
368 DispatchExecutionMode::CommandReuse { savings_ns: 100 }
369 );
370 }
371
372 #[test]
373 fn primary_execution_mode_selects_only_profitable_substrate() {
374 assert_eq!(
375 select_primary_execution_mode(
376 PersistentKernelDecision::PersistentKernel { savings_ns: 500 },
377 CommandReuseDecision::PlainLaunches,
378 ),
379 DispatchExecutionMode::PersistentKernel { savings_ns: 500 }
380 );
381 assert_eq!(
382 select_primary_execution_mode(
383 PersistentKernelDecision::StandardLaunches,
384 CommandReuseDecision::RecordAndReplay { savings_ns: 700 },
385 ),
386 DispatchExecutionMode::CommandReuse { savings_ns: 700 }
387 );
388 }
389}