1const WARP_SPARSE_DENSITY: f64 = 0.03125;
9const SPARSE_DENSITY: f64 = 0.125;
10const DENSE_DENSITY: f64 = 0.70;
11const BLOCK_DENSE_DENSITY: f64 = 0.85;
12const FUSION_PRESSURE: f64 = 0.70;
13const FUSION_PRESSURE_HYSTERESIS: f64 = 0.10;
14const FRONTIER_HYSTERESIS: f64 = 0.025;
15const MEMORY_RED_ZONE_BPS: u32 = 9_000;
16const MEMORY_HYSTERESIS_BPS: u32 = 250;
17const LAUNCH_PRESSURE_BPS: u32 = 1_500;
18const LAUNCH_HYSTERESIS_BPS: u32 = 250;
19const FUSION_READBACK_BYTES: u64 = 4_096;
20const DENSE_AVERAGE_DEGREE_BPS: u64 = 20_000;
21const WARP_SPARSE_AVERAGE_DEGREE_BPS: u64 = 80_000;
22
23#[derive(Clone, Copy, Debug, PartialEq)]
25pub struct MegakernelExecutionSample {
26 pub dispatch_cost_ns: f64,
28 pub frontier_density: f64,
30 pub readback_bytes: u64,
32}
33
34#[derive(Clone, Copy, Debug, Eq, PartialEq)]
36pub enum MegakernelExecutionTopology {
37 WarpSparseFrontier,
40 SparseFrontier,
42 BlockDenseFrontier,
45 DenseFrontier,
47 HybridFrontier,
49 FusedWave,
52}
53
54#[derive(Clone, Copy, Debug, Eq, PartialEq)]
56pub struct MegakernelGraphShape {
57 pub node_count: u64,
59 pub edge_count: u64,
61}
62
63#[derive(Clone, Copy, Debug, Eq, PartialEq)]
65pub struct MegakernelMemoryBudget {
66 pub required_bytes: u64,
68 pub budget_bytes: u64,
70}
71
72#[derive(Clone, Copy, Debug, Eq, PartialEq)]
74pub struct MegakernelMemoryPlan {
75 pub graph_bytes: u64,
77 pub frontier_bytes: u64,
79 pub scratch_bytes: u64,
81 pub output_bytes: u64,
83 pub required_bytes: u64,
85 pub budget_bytes: u64,
87 pub memory_pressure_bps: u32,
89}
90
91#[derive(Clone, Copy, Debug, Eq, PartialEq)]
93pub struct MegakernelExecutionPlan {
94 pub topology: MegakernelExecutionTopology,
96 pub memory: MegakernelMemoryPlan,
98 pub downgraded_to_sparse: bool,
101}
102
103#[derive(Clone, Debug, Eq, PartialEq)]
105pub enum MegakernelMemoryError {
106 ByteCountOverflow {
108 field: &'static str,
110 },
111 OverBudget {
113 topology: MegakernelExecutionTopology,
115 required_bytes: u64,
117 budget_bytes: u64,
119 node_count: u64,
121 edge_count: u64,
123 },
124}
125
126impl std::fmt::Display for MegakernelMemoryError {
127 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
128 match self {
129 Self::ByteCountOverflow { field } => write!(
130 f,
131 "megakernel memory planner overflowed while computing {field}. Fix: shard the graph or lower the candidate topology before planning device residency."
132 ),
133 Self::OverBudget {
134 topology,
135 required_bytes,
136 budget_bytes,
137 node_count,
138 edge_count,
139 } => write!(
140 f,
141 "megakernel {topology:?} plan requires {required_bytes} bytes but budget allows {budget_bytes} bytes for graph nodes={node_count} edges={edge_count}. Fix: choose a sparse topology, reduce fusion pressure, shard the graph, or raise the explicit device-memory budget."
142 ),
143 }
144 }
145}
146
147impl std::error::Error for MegakernelMemoryError {}
148
149#[derive(Clone, Copy, Debug, PartialEq)]
151pub struct MegakernelTopologyDecision {
152 pub topology: MegakernelExecutionTopology,
154 pub memory_pressure_bps: u32,
156 pub average_degree_bps: u64,
158 pub launch_pressure_bps: u32,
160}
161
162impl MegakernelTopologyDecision {
163 #[must_use]
165 pub fn stable_explanation(&self) -> String {
166 format!(
167 "megakernel-topology-v1|topology={:?}|memory_pressure_bps={}|average_degree_bps={}|launch_pressure_bps={}|reason={}",
168 self.topology,
169 self.memory_pressure_bps,
170 self.average_degree_bps,
171 self.launch_pressure_bps,
172 self.reason_code()
173 )
174 }
175
176 fn reason_code(&self) -> &'static str {
177 match self.topology {
178 MegakernelExecutionTopology::WarpSparseFrontier => "ultra_sparse_warp_specialized",
179 MegakernelExecutionTopology::SparseFrontier if self.memory_pressure_bps >= 9_000 => {
180 "memory_pressure_sparse_safety"
181 }
182 MegakernelExecutionTopology::SparseFrontier => "low_density_sparse_queue",
183 MegakernelExecutionTopology::BlockDenseFrontier => "high_density_block_specialized",
184 MegakernelExecutionTopology::DenseFrontier => "dense_coalesced_frontier",
185 MegakernelExecutionTopology::HybridFrontier => "transition_band_hybrid",
186 MegakernelExecutionTopology::FusedWave => "launch_and_readback_pressure_fused",
187 }
188 }
189}
190
191#[must_use]
193pub fn select_megakernel_topology(
194 sample: MegakernelExecutionSample,
195 graph: MegakernelGraphShape,
196 memory: MegakernelMemoryBudget,
197 launch_overhead_ns: f64,
198 fusion_pressure: f64,
199) -> MegakernelTopologyDecision {
200 let memory_pressure_bps = pressure_bps(memory.required_bytes, memory.budget_bytes);
201 let average_degree_bps = pressure_bps_u64(graph.edge_count, graph.node_count);
202 let launch_pressure_bps =
203 if sample.dispatch_cost_ns <= 0.0 || !sample.dispatch_cost_ns.is_finite() {
204 0
205 } else {
206 finite_ratio_bps(
207 launch_overhead_ns.max(0.0),
208 sample.dispatch_cost_ns,
209 "launch overhead pressure",
210 )
211 };
212 let density = finite_unit(sample.frontier_density);
213 let fusion = finite_unit(fusion_pressure);
214 let topology = if memory_pressure_bps >= MEMORY_RED_ZONE_BPS {
215 MegakernelExecutionTopology::SparseFrontier
216 } else if fusion >= FUSION_PRESSURE
217 && launch_pressure_bps >= LAUNCH_PRESSURE_BPS
218 && sample.readback_bytes >= FUSION_READBACK_BYTES
219 && memory_pressure_bps
220 <= checked_bps_sub(MEMORY_RED_ZONE_BPS, 500, "fusion memory red-zone margin")
221 {
222 MegakernelExecutionTopology::FusedWave
223 } else if density <= WARP_SPARSE_DENSITY && average_degree_bps <= WARP_SPARSE_AVERAGE_DEGREE_BPS
224 {
225 MegakernelExecutionTopology::WarpSparseFrontier
226 } else if density <= SPARSE_DENSITY {
227 MegakernelExecutionTopology::SparseFrontier
228 } else if density >= BLOCK_DENSE_DENSITY && average_degree_bps >= DENSE_AVERAGE_DEGREE_BPS {
229 MegakernelExecutionTopology::BlockDenseFrontier
230 } else if density >= DENSE_DENSITY && average_degree_bps >= DENSE_AVERAGE_DEGREE_BPS {
231 MegakernelExecutionTopology::DenseFrontier
232 } else {
233 MegakernelExecutionTopology::HybridFrontier
234 };
235 MegakernelTopologyDecision {
236 topology,
237 memory_pressure_bps,
238 average_degree_bps,
239 launch_pressure_bps,
240 }
241}
242
243#[must_use]
245pub fn select_megakernel_topology_stable(
246 sample: MegakernelExecutionSample,
247 graph: MegakernelGraphShape,
248 memory: MegakernelMemoryBudget,
249 launch_overhead_ns: f64,
250 fusion_pressure: f64,
251 previous_topology: MegakernelExecutionTopology,
252) -> MegakernelTopologyDecision {
253 let mut decision =
254 select_megakernel_topology(sample, graph, memory, launch_overhead_ns, fusion_pressure);
255 decision.topology = stabilize_topology(decision, sample, fusion_pressure, previous_topology);
256 decision
257}
258
259fn stabilize_topology(
260 decision: MegakernelTopologyDecision,
261 sample: MegakernelExecutionSample,
262 fusion_pressure: f64,
263 previous_topology: MegakernelExecutionTopology,
264) -> MegakernelExecutionTopology {
265 if decision.memory_pressure_bps >= MEMORY_RED_ZONE_BPS {
266 return decision.topology;
267 }
268 let density = finite_unit(sample.frontier_density);
269 let fusion = finite_unit(fusion_pressure);
270 if matches!(
271 previous_topology,
272 MegakernelExecutionTopology::SparseFrontier
273 | MegakernelExecutionTopology::WarpSparseFrontier
274 ) && decision.memory_pressure_bps
275 >= checked_bps_sub(
276 MEMORY_RED_ZONE_BPS,
277 MEMORY_HYSTERESIS_BPS,
278 "memory hysteresis floor",
279 )
280 {
281 return MegakernelExecutionTopology::SparseFrontier;
282 }
283
284 match previous_topology {
285 MegakernelExecutionTopology::WarpSparseFrontier
286 if density <= WARP_SPARSE_DENSITY + FRONTIER_HYSTERESIS
287 && decision.average_degree_bps <= WARP_SPARSE_AVERAGE_DEGREE_BPS =>
288 {
289 MegakernelExecutionTopology::WarpSparseFrontier
290 }
291 MegakernelExecutionTopology::SparseFrontier
292 if density <= SPARSE_DENSITY + FRONTIER_HYSTERESIS =>
293 {
294 MegakernelExecutionTopology::SparseFrontier
295 }
296 MegakernelExecutionTopology::HybridFrontier
297 if decision.topology == MegakernelExecutionTopology::SparseFrontier
298 && density >= SPARSE_DENSITY - FRONTIER_HYSTERESIS =>
299 {
300 MegakernelExecutionTopology::HybridFrontier
301 }
302 MegakernelExecutionTopology::HybridFrontier
303 if matches!(
304 decision.topology,
305 MegakernelExecutionTopology::DenseFrontier
306 | MegakernelExecutionTopology::BlockDenseFrontier
307 ) && density <= DENSE_DENSITY + FRONTIER_HYSTERESIS =>
308 {
309 MegakernelExecutionTopology::HybridFrontier
310 }
311 MegakernelExecutionTopology::DenseFrontier
312 if density >= DENSE_DENSITY - FRONTIER_HYSTERESIS
313 && decision.average_degree_bps >= DENSE_AVERAGE_DEGREE_BPS =>
314 {
315 MegakernelExecutionTopology::DenseFrontier
316 }
317 MegakernelExecutionTopology::BlockDenseFrontier
318 if density >= BLOCK_DENSE_DENSITY - FRONTIER_HYSTERESIS
319 && decision.average_degree_bps >= DENSE_AVERAGE_DEGREE_BPS =>
320 {
321 MegakernelExecutionTopology::BlockDenseFrontier
322 }
323 MegakernelExecutionTopology::FusedWave
324 if fusion >= FUSION_PRESSURE - FUSION_PRESSURE_HYSTERESIS
325 && decision.launch_pressure_bps
326 >= checked_bps_sub(
327 LAUNCH_PRESSURE_BPS,
328 LAUNCH_HYSTERESIS_BPS,
329 "launch hysteresis floor",
330 )
331 && sample.readback_bytes >= FUSION_READBACK_BYTES
332 && decision.memory_pressure_bps
333 <= checked_bps_sub(
334 MEMORY_RED_ZONE_BPS,
335 MEMORY_HYSTERESIS_BPS,
336 "memory hysteresis floor",
337 ) =>
338 {
339 MegakernelExecutionTopology::FusedWave
340 }
341 _ => decision.topology,
342 }
343}
344
345pub fn plan_megakernel_memory_budget(
347 topology: MegakernelExecutionTopology,
348 graph: MegakernelGraphShape,
349 bytes_per_node: u64,
350 bytes_per_edge: u64,
351 frontier_bytes: u64,
352 scratch_bytes: u64,
353 output_bytes: u64,
354 budget_bytes: u64,
355) -> Result<MegakernelMemoryPlan, MegakernelMemoryError> {
356 let node_bytes = checked_mul(graph.node_count, bytes_per_node, "node layout bytes")?;
357 let edge_bytes = checked_mul(graph.edge_count, bytes_per_edge, "edge layout bytes")?;
358 let graph_bytes = checked_add(node_bytes, edge_bytes, "graph layout bytes")?;
359 let topology_scratch_bytes = topology_scratch_bytes(topology, scratch_bytes)?;
360 let required_without_output =
361 checked_add(graph_bytes, frontier_bytes, "graph plus frontier bytes")?;
362 let required_without_output = checked_add(
363 required_without_output,
364 topology_scratch_bytes,
365 "scratch bytes",
366 )?;
367 let required_bytes = checked_add(required_without_output, output_bytes, "output bytes")?;
368 if required_bytes > budget_bytes {
369 return Err(MegakernelMemoryError::OverBudget {
370 topology,
371 required_bytes,
372 budget_bytes,
373 node_count: graph.node_count,
374 edge_count: graph.edge_count,
375 });
376 }
377 Ok(MegakernelMemoryPlan {
378 graph_bytes,
379 frontier_bytes,
380 scratch_bytes: topology_scratch_bytes,
381 output_bytes,
382 required_bytes,
383 budget_bytes,
384 memory_pressure_bps: pressure_bps(required_bytes, budget_bytes),
385 })
386}
387
388pub fn plan_megakernel_execution(
390 sample: MegakernelExecutionSample,
391 graph: MegakernelGraphShape,
392 bytes_per_node: u64,
393 bytes_per_edge: u64,
394 frontier_bytes: u64,
395 scratch_bytes: u64,
396 output_bytes: u64,
397 budget_bytes: u64,
398 launch_overhead_ns: f64,
399 fusion_pressure: f64,
400) -> Result<MegakernelExecutionPlan, MegakernelMemoryError> {
401 let sparse_memory = plan_megakernel_memory_budget(
402 MegakernelExecutionTopology::SparseFrontier,
403 graph,
404 bytes_per_node,
405 bytes_per_edge,
406 frontier_bytes,
407 scratch_bytes,
408 output_bytes,
409 budget_bytes,
410 )?;
411 let decision = select_megakernel_topology(
412 sample,
413 graph,
414 MegakernelMemoryBudget {
415 required_bytes: sparse_memory.required_bytes,
416 budget_bytes,
417 },
418 launch_overhead_ns,
419 fusion_pressure,
420 );
421 match plan_megakernel_memory_budget(
422 decision.topology,
423 graph,
424 bytes_per_node,
425 bytes_per_edge,
426 frontier_bytes,
427 scratch_bytes,
428 output_bytes,
429 budget_bytes,
430 ) {
431 Ok(memory) => Ok(MegakernelExecutionPlan {
432 topology: decision.topology,
433 memory,
434 downgraded_to_sparse: false,
435 }),
436 Err(MegakernelMemoryError::OverBudget { .. })
437 if decision.topology != MegakernelExecutionTopology::SparseFrontier =>
438 {
439 Ok(MegakernelExecutionPlan {
440 topology: MegakernelExecutionTopology::SparseFrontier,
441 memory: sparse_memory,
442 downgraded_to_sparse: true,
443 })
444 }
445 Err(error) => Err(error),
446 }
447}
448
449fn finite_unit(value: f64) -> f64 {
450 if value.is_finite() {
451 value.clamp(0.0, 1.0)
452 } else {
453 0.0
454 }
455}
456
457fn pressure_bps(numerator: u64, denominator: u64) -> u32 {
458 let clamped = pressure_bps_u64(numerator, denominator).min(10_000);
459 match u32::try_from(clamped) {
460 Ok(value) => value,
461 Err(error) => {
462 tracing::error!(
463 "megakernel pressure conversion failed after clamping value {clamped}: {error}. Fix: inspect ratio/clamp invariants before topology selection."
464 );
465 10_000
466 }
467 }
468}
469
470fn pressure_bps_u64(numerator: u64, denominator: u64) -> u64 {
471 crate::numeric::ratio_basis_points_u64_wide(
472 numerator,
473 denominator,
474 if numerator == 0 { 0 } else { u64::MAX },
475 "megakernel scheduler pressure",
476 "megakernel execution",
477 )
478}
479
480fn finite_ratio_bps(numerator: f64, denominator: f64, label: &'static str) -> u32 {
481 crate::numeric::finite_f64_ratio_basis_points_round(
482 numerator,
483 denominator,
484 u32::MAX,
485 u32::MAX,
486 label,
487 "megakernel execution",
488 )
489}
490
491fn checked_bps_sub(value: u32, margin: u32, label: &'static str) -> u32 {
492 if let Some(result) = value.checked_sub(margin) {
493 return result;
494 }
495 tracing::error!(
496 "megakernel {label} underflowed basis-point threshold. Fix: configure hysteresis below the threshold."
497 );
498 0
499}
500
501fn topology_scratch_bytes(
502 topology: MegakernelExecutionTopology,
503 base_scratch_bytes: u64,
504) -> Result<u64, MegakernelMemoryError> {
505 match topology {
506 MegakernelExecutionTopology::WarpSparseFrontier => Ok(base_scratch_bytes.max(32)),
507 MegakernelExecutionTopology::SparseFrontier => Ok(base_scratch_bytes),
508 MegakernelExecutionTopology::BlockDenseFrontier => checked_mul(
509 base_scratch_bytes.max(1024),
510 2,
511 "block dense topology scratch bytes",
512 ),
513 MegakernelExecutionTopology::DenseFrontier => {
514 checked_mul(base_scratch_bytes, 2, "dense topology scratch bytes")
515 }
516 MegakernelExecutionTopology::HybridFrontier => {
517 checked_mul(base_scratch_bytes, 3, "hybrid topology scratch bytes")
518 }
519 MegakernelExecutionTopology::FusedWave => {
520 checked_mul(base_scratch_bytes, 4, "fused topology scratch bytes")
521 }
522 }
523}
524
525fn checked_add(lhs: u64, rhs: u64, field: &'static str) -> Result<u64, MegakernelMemoryError> {
526 lhs.checked_add(rhs)
527 .ok_or(MegakernelMemoryError::ByteCountOverflow { field })
528}
529
530fn checked_mul(lhs: u64, rhs: u64, field: &'static str) -> Result<u64, MegakernelMemoryError> {
531 lhs.checked_mul(rhs)
532 .ok_or(MegakernelMemoryError::ByteCountOverflow { field })
533}
534
535#[cfg(test)]
536mod tests {
537 use super::{
538 plan_megakernel_execution, plan_megakernel_memory_budget, select_megakernel_topology,
539 select_megakernel_topology_stable, MegakernelExecutionSample, MegakernelExecutionTopology,
540 MegakernelGraphShape, MegakernelMemoryBudget, MegakernelMemoryError,
541 };
542
543 #[test]
544 fn topology_selector_uses_sparse_dense_hybrid_and_fused_bands() {
545 let graph = MegakernelGraphShape {
546 node_count: 1_000,
547 edge_count: 4_000,
548 };
549 let memory = MegakernelMemoryBudget {
550 required_bytes: 1_000,
551 budget_bytes: 10_000,
552 };
553 let warp_sparse = select_megakernel_topology(
554 MegakernelExecutionSample {
555 dispatch_cost_ns: 1_000.0,
556 frontier_density: 0.01,
557 readback_bytes: 256,
558 },
559 graph,
560 memory,
561 100.0,
562 0.0,
563 );
564 assert_eq!(
565 warp_sparse.topology,
566 MegakernelExecutionTopology::WarpSparseFrontier
567 );
568 assert_eq!(
569 warp_sparse.stable_explanation(),
570 "megakernel-topology-v1|topology=WarpSparseFrontier|memory_pressure_bps=1000|average_degree_bps=40000|launch_pressure_bps=1000|reason=ultra_sparse_warp_specialized"
571 );
572
573 let block_dense = select_megakernel_topology(
574 MegakernelExecutionSample {
575 dispatch_cost_ns: 1_000.0,
576 frontier_density: 0.90,
577 readback_bytes: 512,
578 },
579 graph,
580 memory,
581 100.0,
582 0.0,
583 );
584 assert_eq!(
585 block_dense.topology,
586 MegakernelExecutionTopology::BlockDenseFrontier
587 );
588
589 let hybrid = select_megakernel_topology(
590 MegakernelExecutionSample {
591 dispatch_cost_ns: 1_000.0,
592 frontier_density: 0.35,
593 readback_bytes: 512,
594 },
595 graph,
596 memory,
597 100.0,
598 0.0,
599 );
600 assert_eq!(hybrid.topology, MegakernelExecutionTopology::HybridFrontier);
601
602 let fused = select_megakernel_topology(
603 MegakernelExecutionSample {
604 dispatch_cost_ns: 1_000.0,
605 frontier_density: 0.50,
606 readback_bytes: 1 << 20,
607 },
608 graph,
609 memory,
610 250.0,
611 0.90,
612 );
613 assert_eq!(fused.topology, MegakernelExecutionTopology::FusedWave);
614 assert_eq!(fused.launch_pressure_bps, 2_500);
615 }
616
617 #[test]
618 fn stable_topology_selector_prevents_variant_flapping_near_thresholds() {
619 let graph = MegakernelGraphShape {
620 node_count: 1_000,
621 edge_count: 4_000,
622 };
623 let memory = MegakernelMemoryBudget {
624 required_bytes: 1_000,
625 budget_bytes: 10_000,
626 };
627 let sparse_to_hybrid = select_megakernel_topology_stable(
628 MegakernelExecutionSample {
629 dispatch_cost_ns: 1_000.0,
630 frontier_density: 0.14,
631 readback_bytes: 512,
632 },
633 graph,
634 memory,
635 100.0,
636 0.0,
637 MegakernelExecutionTopology::SparseFrontier,
638 );
639 assert_eq!(
640 sparse_to_hybrid.topology,
641 MegakernelExecutionTopology::SparseFrontier
642 );
643 }
644
645 #[test]
646 fn memory_planner_bounds_peak_bytes_by_topology() {
647 let graph = MegakernelGraphShape {
648 node_count: 1_000,
649 edge_count: 4_000,
650 };
651 let plan = plan_megakernel_memory_budget(
652 MegakernelExecutionTopology::FusedWave,
653 graph,
654 16,
655 8,
656 4_096,
657 2_048,
658 512,
659 128 * 1024,
660 )
661 .expect("Fix: valid fused plan should fit the explicit device-memory budget");
662
663 assert_eq!(plan.graph_bytes, 48_000);
664 assert_eq!(plan.scratch_bytes, 8_192);
665 assert_eq!(plan.required_bytes, 60_800);
666 assert!(plan.memory_pressure_bps > 0);
667 }
668
669 #[test]
670 fn memory_planner_rejects_budget_and_overflow_failures() {
671 let graph = MegakernelGraphShape {
672 node_count: 1_000,
673 edge_count: 4_000,
674 };
675 let err = plan_megakernel_memory_budget(
676 MegakernelExecutionTopology::DenseFrontier,
677 graph,
678 16,
679 8,
680 4_096,
681 2_048,
682 512,
683 32 * 1024,
684 )
685 .expect_err("over-budget dense plan must fail before allocation");
686 assert!(matches!(
687 err,
688 MegakernelMemoryError::OverBudget {
689 topology: MegakernelExecutionTopology::DenseFrontier,
690 ..
691 }
692 ));
693 assert!(err.to_string().contains("Fix: choose a sparse topology"));
694
695 let overflow = plan_megakernel_memory_budget(
696 MegakernelExecutionTopology::SparseFrontier,
697 MegakernelGraphShape {
698 node_count: u64::MAX,
699 edge_count: 0,
700 },
701 2,
702 0,
703 0,
704 0,
705 0,
706 u64::MAX,
707 )
708 .expect_err("overflowing graph byte count must be rejected");
709 assert!(matches!(
710 overflow,
711 MegakernelMemoryError::ByteCountOverflow {
712 field: "node layout bytes"
713 }
714 ));
715 }
716
717 #[test]
718 fn generated_execution_plans_never_exceed_budget_or_hide_overflow() {
719 let mut state = 0x4d59_5df4_d0f3_3173_u64;
720 for case_index in 0..1024usize {
721 let node_count = 1 + next_u64(&mut state) % 8_192;
722 let edge_count = node_count + next_u64(&mut state) % 65_536;
723 let bytes_per_node = 1 + next_u64(&mut state) % 64;
724 let bytes_per_edge = 1 + next_u64(&mut state) % 32;
725 let frontier_bytes = next_u64(&mut state) % 65_536;
726 let scratch_bytes = next_u64(&mut state) % 16_384;
727 let output_bytes = next_u64(&mut state) % 8_192;
728 let budget_bytes = 64 * 1024 + next_u64(&mut state) % (4 * 1024 * 1024);
729 let sample = MegakernelExecutionSample {
730 dispatch_cost_ns: 100.0 + (next_u64(&mut state) % 10_000) as f64,
731 frontier_density: (next_u64(&mut state) % 10_001) as f64 / 10_000.0,
732 readback_bytes: next_u64(&mut state) % (1 << 20),
733 };
734
735 let result = plan_megakernel_execution(
736 sample,
737 MegakernelGraphShape {
738 node_count,
739 edge_count,
740 },
741 bytes_per_node,
742 bytes_per_edge,
743 frontier_bytes,
744 scratch_bytes,
745 output_bytes,
746 budget_bytes,
747 250.0,
748 0.85,
749 );
750 match result {
751 Ok(plan) => {
752 assert!(
753 plan.memory.required_bytes <= plan.memory.budget_bytes,
754 "case {case_index}"
755 );
756 assert!(plan.memory.memory_pressure_bps <= 10_000);
757 }
758 Err(MegakernelMemoryError::OverBudget {
759 required_bytes,
760 budget_bytes,
761 ..
762 }) => assert!(required_bytes > budget_bytes, "case {case_index}"),
763 Err(MegakernelMemoryError::ByteCountOverflow { .. }) => {}
764 }
765 }
766 }
767
768 fn next_u64(state: &mut u64) -> u64 {
769 *state = state
770 .wrapping_mul(6_364_136_223_846_793_005)
771 .wrapping_add(1_442_695_040_888_963_407);
772 *state
773 }
774}