Skip to main content

vyre_runtime/megakernel/planner/
caps.rs

1//! Megakernel backend capability and report types.
2
3use std::time::Duration;
4
5use super::super::policy::{
6    MegakernelDispatchTopology, MegakernelExecutionMode, MegakernelQueuePressure,
7};
8
9/// Capabilities surfaced by megakernel-aware backends.
10#[derive(Debug, Clone, Copy)]
11pub struct MegakernelCaps {
12    /// Whether the backend implements a megakernel path.
13    pub supported: bool,
14    /// Maximum worker-count ceiling the backend accepts.
15    pub max_worker_count: u32,
16}
17
18impl MegakernelCaps {
19    /// Unsupported  -  every method returns an explicit error.
20    #[must_use]
21    pub const fn unsupported() -> Self {
22        Self {
23            supported: false,
24            max_worker_count: 0,
25        }
26    }
27
28    /// Declare supported with the given worker ceiling.
29    #[must_use]
30    pub const fn supported(max_worker_count: u32) -> Self {
31        Self {
32            supported: true,
33            max_worker_count,
34        }
35    }
36}
37
38/// One work-queue item the megakernel worker consumes.
39#[repr(C)]
40#[derive(Debug, Clone, Copy, PartialEq, Eq, bytemuck::Pod, bytemuck::Zeroable)]
41pub struct MegakernelWorkItem {
42    /// Stable op id index into the dialect registry.
43    pub op_handle: u32,
44    /// Input-buffer handle.
45    pub input_handle: u32,
46    /// Output-buffer handle.
47    pub output_handle: u32,
48    /// Optional per-item parameter word.
49    pub param: u32,
50}
51
52/// Production counters from one megakernel dispatch.
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub struct MegakernelTelemetry {
55    /// Bytes uploaded across control, ring, debug, and IO inputs.
56    pub bytes_uploaded: u64,
57    /// Bytes read back across all megakernel output buffers.
58    pub bytes_read_back: u64,
59    /// Total host/device transfer bytes attributable to this dispatch.
60    pub bytes_moved: u64,
61    /// Resident input allocations performed before dispatch.
62    pub resident_allocations: u32,
63    /// Kernel launches issued for this logical dispatch.
64    pub kernel_launches: u32,
65    /// Host-visible synchronization/readback wait points.
66    pub sync_points: u32,
67    /// Approximate lane occupancy in basis points, capped at 10000.
68    pub occupancy_proxy_bps: u16,
69    /// Active queue/frontier density in basis points, capped at 10000.
70    pub frontier_density_bps: u16,
71    /// Number of output buffers read back from the backend.
72    pub readback_buffers: u32,
73    /// True when the direct dispatch reused a compiled megakernel pipeline.
74    pub compiled_pipeline_cache_hit: bool,
75    /// True when the direct dispatch reused resident input resources.
76    pub resident_input_cache_hit: bool,
77    /// Scale-aware topology selected by the launch policy.
78    pub topology: MegakernelDispatchTopology,
79    /// Queue pressure classification selected by the launch policy.
80    pub pressure: MegakernelQueuePressure,
81    /// Interpreter or JIT route selected by launch policy telemetry.
82    pub execution_mode: MegakernelExecutionMode,
83    /// Sparse-hit capacity selected by the launch policy.
84    pub hit_capacity: u32,
85    /// Estimated peak device bytes for the selected launch plan.
86    pub estimated_peak_device_bytes: u64,
87    /// Hard device-memory budget applied to the launch. Zero means unbounded.
88    pub device_memory_budget_bytes: u64,
89}
90
91impl Default for MegakernelTelemetry {
92    fn default() -> Self {
93        Self {
94            bytes_uploaded: 0,
95            bytes_read_back: 0,
96            bytes_moved: 0,
97            resident_allocations: 0,
98            kernel_launches: 0,
99            sync_points: 0,
100            occupancy_proxy_bps: 0,
101            frontier_density_bps: 0,
102            readback_buffers: 0,
103            compiled_pipeline_cache_hit: false,
104            resident_input_cache_hit: false,
105            topology: MegakernelDispatchTopology::Empty,
106            pressure: MegakernelQueuePressure::Empty,
107            execution_mode: MegakernelExecutionMode::Interpreter,
108            hit_capacity: 0,
109            estimated_peak_device_bytes: 0,
110            device_memory_budget_bytes: 0,
111        }
112    }
113}
114
115/// Summary stats from one megakernel run.
116#[derive(Debug, Clone, Default)]
117pub struct MegakernelReport {
118    /// Items the workers processed before exiting.
119    pub items_processed: u64,
120    /// Items still queued when `max_wall_time` fired.
121    pub items_remaining: u64,
122    /// Wall-clock time spent.
123    pub wall_time: Duration,
124    /// Host-side time spent shaping the queue before publication:
125    /// dedupe, fusion planning, and launch-geometry preparation.
126    pub queue_plan_ns: u64,
127    /// Host-side time spent encoding protocol buffers and publishing
128    /// queued work into ring slots.
129    pub queue_publish_ns: u64,
130    /// Host-observed backend dispatch latency after queue publication.
131    pub backend_dispatch_ns: u64,
132    /// Host-observed time spent computing optional region lineage after
133    /// dispatch. Zero when lineage tracking is skipped.
134    pub lineage_ns: u64,
135    /// Logical work items removed by queue dedupe before publication.
136    pub deduped_items: u64,
137    /// Work items actually published into megakernel ring slots.
138    pub published_items: u64,
139    /// Number of work items included in region lineage tracking.
140    pub lineage_items: u64,
141    /// Production counters for performance gates and launch tuning.
142    pub telemetry: MegakernelTelemetry,
143    /// Per-output provenance lineage bitsets, one entry per fused
144    /// region in dispatch order. `lineage[i]` is a 32-bit set of
145    /// source-rule IDs that contributed to fused-region `i`'s output,
146    /// computed via the substrate
147    /// `vyre_self_substrate::scallop_provenance` Datalog
148    /// closure on the rule-derivation graph. Empty `Vec` when
149    /// provenance tracking was disabled for the dispatch.
150    ///
151    /// Lets observability collectors (Tempo, Honeycomb, Prometheus)
152    /// attribute every megakernel output back to the source rules
153    /// that derived it  -  without this, fused-region outputs lose
154    /// their lineage.
155    pub region_lineage: Vec<u32>,
156}