Skip to main content

vyre_runtime/megakernel/
protocol.rs

1//! Ring-buffer protocol constants  -  slot layout, control words, opcodes, debug log.
2//!
3//! Pure data module. No logic, no imports beyond std. Every constant
4//! has a doc-comment that says what the GPU kernel does with it.
5
6/// A single PRINTF event decoded out of the debug-log buffer.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub struct DebugRecord {
9    /// Format-string id  -  resolved by the host against its
10    /// registered format table.
11    pub fmt_id: u32,
12    /// Three argument words in the order the kernel wrote them.
13    pub args: [u32; 3],
14}
15
16/// Megakernel host-protocol encoding and decoding error.
17#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
18#[non_exhaustive]
19pub enum ProtocolError {
20    /// A requested buffer length overflowed host address space.
21    #[error("{buffer} byte length overflow. Fix: {fix}")]
22    ByteLengthOverflow {
23        /// Protocol buffer being sized.
24        buffer: &'static str,
25        /// Actionable remediation.
26        fix: &'static str,
27    },
28    /// A byte slice is not aligned to full u32 protocol words.
29    #[error("{buffer} has {byte_len} bytes, not a whole number of u32 words. Fix: {fix}")]
30    MisalignedByteLength {
31        /// Protocol buffer being decoded.
32        buffer: &'static str,
33        /// Byte length received by the decoder.
34        byte_len: usize,
35        /// Actionable remediation.
36        fix: &'static str,
37    },
38    /// A requested protocol word is outside the supplied byte slice.
39    #[error("{buffer} is missing word {word_idx} in {byte_len} bytes. Fix: {fix}")]
40    MissingWord {
41        /// Protocol buffer being decoded.
42        buffer: &'static str,
43        /// Word index requested.
44        word_idx: usize,
45        /// Byte length received by the decoder.
46        byte_len: usize,
47        /// Actionable remediation.
48        fix: &'static str,
49    },
50}
51
52/// Number of u32 words each ring-buffer slot occupies. 16 words = 64 B,
53/// a cache line on x86_64 and the slot size NVMe Submission Queue
54/// Entries will use when the `uring-cmd-nvme` extension lands.
55pub const SLOT_WORDS: u32 = 16;
56
57/// Word index of the slot status header (the CAS target).
58pub const STATUS_WORD: u32 = 0;
59
60/// Word index of the slot opcode (dispatched via If-tree).
61pub const OPCODE_WORD: u32 = 1;
62
63/// Word index of the slot tenant id.
64pub const TENANT_WORD: u32 = 2;
65
66/// Word index of the slot priority level.
67pub const PRIORITY_WORD: u32 = 3;
68
69/// First argument word. Opcodes read args at
70/// `ring_buffer[slot_base + ARG0_WORD .. slot_base + SLOT_WORDS]`.
71pub const ARG0_WORD: u32 = 4;
72
73/// Number of u32 argument words available per slot (12).
74pub const ARGS_PER_SLOT: u32 = SLOT_WORDS - ARG0_WORD;
75
76/// Control-buffer slot layout helpers.
77pub mod control;
78/// Debug helpers for inspecting megakernel slot/opcode state at runtime.
79pub mod debug;
80/// Opcode constants and decoding utilities for megakernel slots.
81pub mod opcode;
82/// Slot layout helpers (per-slot offsets, ARG0 helpers).
83pub mod slot;
84
85/// Minimum control-buffer words required by the compiled megakernel ABI.
86///
87/// This covers shutdown, done count, tenant masks, metrics, epoch, priority
88/// offsets, and the statically declared read/write buffer count in the IR.
89pub const CONTROL_MIN_WORDS: u32 = 160;
90/// Maximum host-observable words whose control-buffer byte length is
91/// representable by the u32 wire ABI.
92pub const MAX_OBSERVABLE_SLOTS: u32 = u32::MAX - control::OBSERVABLE_BASE;
93/// Maximum host-observable words the allocating encoder will materialize.
94pub const MAX_ENCODED_OBSERVABLE_SLOTS: u32 = 1_048_576;
95/// Maximum ring slots whose byte length is representable by the u32 wire ABI.
96pub const MAX_RING_SLOTS: u32 = u32::MAX / SLOT_WORDS;
97/// Maximum ring slots the allocating encoder will materialize.
98pub const MAX_ENCODED_RING_SLOTS: u32 = 1_048_576;
99/// Maximum debug-log records whose byte length is representable by the u32 wire ABI.
100pub const MAX_DEBUG_RECORDS: u32 = u32::MAX / debug::RECORD_WORDS;
101/// Maximum debug-log records the allocating encoder will materialize.
102pub const MAX_ENCODED_DEBUG_RECORDS: u32 = 1_048_576;
103
104mod codec;
105
106pub use codec::{
107    control_byte_len, count_done_ring_slots, debug_log_byte_len, encode_control,
108    encode_empty_debug_log, encode_empty_ring, read_debug_log, read_debug_log_into,
109    read_done_count, read_epoch, read_metrics, read_metrics_into, read_observable, ring_byte_len,
110    try_count_done_ring_slots, try_encode_control, try_encode_control_into,
111    try_encode_empty_debug_log, try_encode_empty_debug_log_into, try_encode_empty_ring,
112    try_encode_empty_ring_into, try_read_debug_log, try_read_debug_log_into, try_read_done_count,
113    try_read_epoch, try_read_metrics, try_read_metrics_into, try_read_observable,
114};
115
116/// Encode a single ring-buffer slot for a load-miss request.
117///
118/// Returns a 64-byte `Vec<u8>` containing the slot words:
119/// - status = [`slot::PUBLISHED`]
120/// - opcode = [`opcode::LOAD_MISS`]
121/// - tenant = 0
122/// - priority = 0
123/// - arg0 = resource_id (opaque to vyre; consumer-defined)
124/// - arg1 = prefetch as u32
125#[must_use]
126pub fn encode_load_miss(resource_id: u32, prefetch: bool) -> Vec<u8> {
127    let mut bytes = vec![0u8; slot_byte_len_or_panic()];
128    codec::write_word(
129        &mut bytes,
130        word_index_or_panic(STATUS_WORD),
131        slot::PUBLISHED,
132    );
133    codec::write_word(
134        &mut bytes,
135        word_index_or_panic(OPCODE_WORD),
136        opcode::LOAD_MISS,
137    );
138    codec::write_word(&mut bytes, word_index_or_panic(TENANT_WORD), 0);
139    codec::write_word(&mut bytes, word_index_or_panic(PRIORITY_WORD), 0);
140    codec::write_word(&mut bytes, word_index_or_panic(ARG0_WORD), resource_id);
141    codec::write_word(
142        &mut bytes,
143        word_index_or_panic(ARG0_WORD.checked_add(1).unwrap_or_else(|| {
144            panic!("megakernel load-miss arg word overflowed u32. Fix: keep ARG0_WORD within SLOT_WORDS.")
145        })),
146        u32::from(prefetch),
147    );
148    bytes
149}
150
151/// Decode a load-miss slot from ring-buffer bytes.
152///
153/// Returns `Some((resource_id, prefetch))` if the slot contains the
154/// [`opcode::LOAD_MISS`] opcode. Returns `None` if the byte slice is
155/// too short or the opcode does not match.
156#[must_use]
157pub fn decode_load_miss(ring_bytes: &[u8], slot: u32) -> Option<(u32, bool)> {
158    let slot_base = slot_word_base(slot)?;
159    let opcode_word = codec::read_word(ring_bytes, checked_slot_word(slot_base, OPCODE_WORD)?)?;
160    if opcode_word != opcode::LOAD_MISS {
161        return None;
162    }
163    let resource_id = codec::read_word(ring_bytes, checked_slot_word(slot_base, ARG0_WORD)?)?;
164    let prefetch = codec::read_word(
165        ring_bytes,
166        checked_slot_word(slot_base, ARG0_WORD.checked_add(1)?)?,
167    )? != 0;
168    Some((resource_id, prefetch))
169}
170
171fn slot_byte_len_or_panic() -> usize {
172    usize::try_from(SLOT_WORDS)
173        .unwrap_or_else(|error| {
174            panic!("SLOT_WORDS cannot fit usize: {error}. Fix: keep SLOT_WORDS within the host index ABI.")
175        })
176        .checked_mul(4)
177        .unwrap_or_else(|| {
178            panic!("megakernel slot byte length overflowed usize. Fix: keep SLOT_WORDS within the host index ABI.")
179        })
180}
181
182fn word_index_or_panic(word: u32) -> usize {
183    usize::try_from(word).unwrap_or_else(|error| {
184        panic!("megakernel protocol word index cannot fit usize: {error}. Fix: keep protocol word constants within the host index ABI.")
185    })
186}
187
188fn slot_word_base(slot: u32) -> Option<usize> {
189    let base_words = slot.checked_mul(SLOT_WORDS)?;
190    usize::try_from(base_words).ok()
191}
192
193fn checked_slot_word(slot_base: usize, word: u32) -> Option<usize> {
194    slot_base.checked_add(usize::try_from(word).ok()?)
195}
196
197/// Deprecated alias for [`encode_load_miss`]. The old MoE-specific
198/// parameter name was a boundary violation  -  vyre is a generic GPU
199/// substrate. New code must use [`encode_load_miss`]; this shim will
200/// be removed once consumers have migrated.
201#[deprecated(since = "0.5.0", note = "use `encode_load_miss`")]
202#[must_use]
203pub fn encode_expert_miss(resource_id: u32, prefetch: bool) -> Vec<u8> {
204    encode_load_miss(resource_id, prefetch)
205}
206
207/// Deprecated alias for [`decode_load_miss`]; see [`encode_expert_miss`].
208#[deprecated(since = "0.5.0", note = "use `decode_load_miss`")]
209#[must_use]
210pub fn decode_expert_miss(ring_bytes: &[u8], slot: u32) -> Option<(u32, bool)> {
211    decode_load_miss(ring_bytes, slot)
212}
213
214#[cfg(test)]
215mod tests;