Skip to main content

vyre_runtime/megakernel/
protocol.rs

1//! Ring-buffer protocol constants  -  slot layout, control words, opcodes, debug log.
2//!
3//! Pure data module. No logic, no imports beyond std. Every constant
4//! has a doc-comment that says what the GPU kernel does with it.
5
6/// A single PRINTF event decoded out of the debug-log buffer.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub struct DebugRecord {
9    /// Format-string id  -  resolved by the host against its
10    /// registered format table.
11    pub fmt_id: u32,
12    /// Three argument words in the order the kernel wrote them.
13    pub args: [u32; 3],
14}
15
16/// Megakernel host-protocol encoding and decoding error.
17#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
18#[non_exhaustive]
19pub enum ProtocolError {
20    /// A requested buffer length overflowed host address space.
21    #[error("{buffer} byte length overflow. Fix: {fix}")]
22    ByteLengthOverflow {
23        /// Protocol buffer being sized.
24        buffer: &'static str,
25        /// Actionable remediation.
26        fix: &'static str,
27    },
28    /// A byte slice is not aligned to full u32 protocol words.
29    #[error("{buffer} has {byte_len} bytes, not a whole number of u32 words. Fix: {fix}")]
30    MisalignedByteLength {
31        /// Protocol buffer being decoded.
32        buffer: &'static str,
33        /// Byte length received by the decoder.
34        byte_len: usize,
35        /// Actionable remediation.
36        fix: &'static str,
37    },
38    /// A requested protocol word is outside the supplied byte slice.
39    #[error("{buffer} is missing word {word_idx} in {byte_len} bytes. Fix: {fix}")]
40    MissingWord {
41        /// Protocol buffer being decoded.
42        buffer: &'static str,
43        /// Word index requested.
44        word_idx: usize,
45        /// Byte length received by the decoder.
46        byte_len: usize,
47        /// Actionable remediation.
48        fix: &'static str,
49    },
50}
51
52/// Number of u32 words each ring-buffer slot occupies. 16 words = 64 B,
53/// a cache line on x86_64 and the slot size NVMe Submission Queue
54/// Entries will use when the `uring-cmd-nvme` extension lands.
55pub const SLOT_WORDS: u32 = 16;
56
57/// Word index of the slot status header (the CAS target).
58pub const STATUS_WORD: u32 = 0;
59
60/// Word index of the slot opcode (dispatched via If-tree).
61pub const OPCODE_WORD: u32 = 1;
62
63/// Word index of the slot tenant id.
64pub const TENANT_WORD: u32 = 2;
65
66/// Word index of the slot priority level.
67pub const PRIORITY_WORD: u32 = 3;
68
69/// First argument word. Opcodes read args at
70/// `ring_buffer[slot_base + ARG0_WORD .. slot_base + SLOT_WORDS]`.
71pub const ARG0_WORD: u32 = 4;
72
73/// Number of u32 argument words available per slot (12).
74pub const ARGS_PER_SLOT: u32 = SLOT_WORDS - ARG0_WORD;
75
76/// Control-buffer slot layout helpers.
77pub mod control;
78/// Debug helpers for inspecting megakernel slot/opcode state at runtime.
79pub mod debug;
80/// Opcode constants and decoding utilities for megakernel slots.
81pub mod opcode;
82/// Slot layout helpers (per-slot offsets, ARG0 helpers).
83pub mod slot;
84
85/// Minimum control-buffer words required by the compiled megakernel ABI.
86///
87/// This covers shutdown, done count, tenant masks, metrics, epoch, priority
88/// offsets, and the statically declared read/write buffer count in the IR.
89pub const CONTROL_MIN_WORDS: u32 = 160;
90/// Maximum host-observable words whose control-buffer byte length is
91/// representable by the u32 wire ABI.
92pub const MAX_OBSERVABLE_SLOTS: u32 = u32::MAX - control::OBSERVABLE_BASE;
93/// Maximum host-observable words the allocating encoder will materialize.
94pub const MAX_ENCODED_OBSERVABLE_SLOTS: u32 = 1_048_576;
95/// Maximum ring slots whose byte length is representable by the u32 wire ABI.
96pub const MAX_RING_SLOTS: u32 = u32::MAX / SLOT_WORDS;
97/// Maximum ring slots the allocating encoder will materialize.
98pub const MAX_ENCODED_RING_SLOTS: u32 = 1_048_576;
99/// Maximum debug-log records whose byte length is representable by the u32 wire ABI.
100pub const MAX_DEBUG_RECORDS: u32 = u32::MAX / debug::RECORD_WORDS;
101/// Maximum debug-log records the allocating encoder will materialize.
102pub const MAX_ENCODED_DEBUG_RECORDS: u32 = 1_048_576;
103
104mod codec;
105
106pub use codec::{
107    control_byte_len, count_done_ring_slots, debug_log_byte_len, encode_control,
108    encode_empty_debug_log, encode_empty_ring, read_debug_log, read_debug_log_into,
109    read_done_count, read_epoch, read_metrics, read_metrics_into, read_observable, ring_byte_len,
110    try_count_done_ring_slots, try_encode_control, try_encode_control_into,
111    try_encode_empty_debug_log, try_encode_empty_debug_log_into, try_encode_empty_ring,
112    try_encode_empty_ring_into, try_read_debug_log, try_read_debug_log_into, try_read_done_count,
113    try_read_epoch, try_read_metrics, try_read_metrics_into, try_read_observable,
114};
115
116/// Encode a single ring-buffer slot for a load-miss request.
117///
118/// Returns a 64-byte `Vec<u8>` containing the slot words:
119/// - status = [`slot::PUBLISHED`]
120/// - opcode = [`opcode::LOAD_MISS`]
121/// - tenant = 0
122/// - priority = 0
123/// - arg0 = resource_id (opaque to vyre; consumer-defined)
124/// - arg1 = prefetch as u32
125#[must_use]
126pub fn encode_load_miss(resource_id: u32, prefetch: bool) -> Vec<u8> {
127    try_encode_load_miss(resource_id, prefetch).unwrap_or_default()
128}
129
130/// Strictly encode a single ring-buffer slot for a load-miss request.
131///
132/// # Errors
133///
134/// Returns [`ProtocolError`] when slot sizing, word indexing, or host staging
135/// reservation fails.
136pub fn try_encode_load_miss(resource_id: u32, prefetch: bool) -> Result<Vec<u8>, ProtocolError> {
137    let total_bytes = try_slot_byte_len()?;
138    let mut bytes = Vec::new();
139    codec::try_reserve_protocol_capacity(
140        &mut bytes,
141        total_bytes,
142        "slot",
143        "load-miss slot encode could not reserve host staging bytes; reuse a preallocated slot buffer",
144    )?;
145    try_encode_load_miss_into(resource_id, prefetch, &mut bytes)?;
146    Ok(bytes)
147}
148
149/// Strictly encode a load-miss slot into caller-owned storage.
150///
151/// Clears and resizes `dst` to exactly one protocol slot, reusing allocation.
152///
153/// # Errors
154///
155/// Returns [`ProtocolError`] when slot sizing, word indexing, or host staging
156/// reservation fails.
157pub fn try_encode_load_miss_into(
158    resource_id: u32,
159    prefetch: bool,
160    dst: &mut Vec<u8>,
161) -> Result<(), ProtocolError> {
162    let total_bytes = try_slot_byte_len()?;
163    dst.clear();
164    codec::try_reserve_protocol_capacity(
165        dst,
166        total_bytes,
167        "slot",
168        "load-miss slot encode could not reserve caller-owned staging bytes; reuse a larger slot buffer",
169    )?;
170    dst.resize(total_bytes, 0);
171
172    codec::write_word(dst, try_slot_word_index(0, STATUS_WORD)?, slot::PUBLISHED);
173    codec::write_word(dst, try_slot_word_index(0, OPCODE_WORD)?, opcode::LOAD_MISS);
174    codec::write_word(dst, try_slot_word_index(0, TENANT_WORD)?, 0);
175    codec::write_word(dst, try_slot_word_index(0, PRIORITY_WORD)?, 0);
176    codec::write_word(dst, try_slot_word_index(0, ARG0_WORD)?, resource_id);
177    let prefetch_word = ARG0_WORD
178        .checked_add(1)
179        .ok_or(ProtocolError::ByteLengthOverflow {
180            buffer: "slot",
181            fix: "load-miss argument word overflows u32; keep ARG0_WORD within SLOT_WORDS",
182        })?;
183    codec::write_word(
184        dst,
185        try_slot_word_index(0, prefetch_word)?,
186        u32::from(prefetch),
187    );
188    Ok(())
189}
190
191/// Decode a load-miss slot from ring-buffer bytes.
192///
193/// Returns `Some((resource_id, prefetch))` if the slot contains the
194/// [`opcode::LOAD_MISS`] opcode. Returns `None` if the byte slice is
195/// too short or the opcode does not match.
196#[must_use]
197pub fn decode_load_miss(ring_bytes: &[u8], slot: u32) -> Option<(u32, bool)> {
198    let opcode_word = codec::read_word(ring_bytes, try_slot_word_index(slot, OPCODE_WORD).ok()?)?;
199    if opcode_word != opcode::LOAD_MISS {
200        return None;
201    }
202    let resource_id = codec::read_word(ring_bytes, try_slot_word_index(slot, ARG0_WORD).ok()?)?;
203    let prefetch_word = ARG0_WORD.checked_add(1)?;
204    let prefetch =
205        codec::read_word(ring_bytes, try_slot_word_index(slot, prefetch_word).ok()?)? != 0;
206    Some((resource_id, prefetch))
207}
208
209/// Return the byte length of one ring-buffer slot.
210///
211/// # Errors
212///
213/// Returns [`ProtocolError`] when slot sizing overflows host address space.
214pub fn try_slot_byte_len() -> Result<usize, ProtocolError> {
215    codec::words_to_bytes(SLOT_WORDS).ok_or(ProtocolError::ByteLengthOverflow {
216        buffer: "slot",
217        fix: "slot byte length overflows host address space; keep SLOT_WORDS within the host index ABI",
218    })
219}
220
221/// Convert a protocol-local word index into a host index.
222///
223/// # Errors
224///
225/// Returns [`ProtocolError`] when the word index cannot fit host address space.
226pub fn try_word_index(word: u32) -> Result<usize, ProtocolError> {
227    usize::try_from(word).map_err(|_| ProtocolError::ByteLengthOverflow {
228        buffer: "slot",
229        fix: "protocol word index cannot fit host usize; keep protocol word constants within the host index ABI",
230    })
231}
232
233/// Return the first word index for `slot`.
234///
235/// # Errors
236///
237/// Returns [`ProtocolError`] when slot multiplication overflows the u32 wire
238/// ABI or host address space.
239pub fn try_slot_word_base(slot: u32) -> Result<usize, ProtocolError> {
240    let base_words = slot
241        .checked_mul(SLOT_WORDS)
242        .ok_or(ProtocolError::ByteLengthOverflow {
243            buffer: "ring",
244            fix: "slot word base overflows the u32 protocol word ABI; shard the megakernel ring before host access",
245        })?;
246    try_word_index(base_words)
247}
248
249/// Return the absolute ring word index for a slot-local word.
250///
251/// # Errors
252///
253/// Returns [`ProtocolError`] when `word` is outside one slot or when addition
254/// overflows host address space.
255pub fn try_slot_word_index(slot: u32, word: u32) -> Result<usize, ProtocolError> {
256    if word >= SLOT_WORDS {
257        return Err(ProtocolError::ByteLengthOverflow {
258            buffer: "slot",
259            fix: "slot-local word is outside SLOT_WORDS; keep protocol constants within the slot layout",
260        });
261    }
262    try_slot_word_base(slot)?
263        .checked_add(try_word_index(word)?)
264        .ok_or(ProtocolError::ByteLengthOverflow {
265            buffer: "ring",
266            fix: "slot word index overflows host address space; shard the megakernel ring before host access",
267        })
268}
269
270/// Deprecated alias for [`encode_load_miss`]. The old MoE-specific
271/// parameter name was a boundary violation  -  vyre is a generic GPU
272/// substrate. New code must use [`encode_load_miss`]; this shim will
273/// be removed once consumers have migrated.
274#[deprecated(since = "0.5.0", note = "use `encode_load_miss`")]
275#[must_use]
276pub fn encode_expert_miss(resource_id: u32, prefetch: bool) -> Vec<u8> {
277    encode_load_miss(resource_id, prefetch)
278}
279
280/// Deprecated alias for [`decode_load_miss`]; see [`encode_expert_miss`].
281#[deprecated(since = "0.5.0", note = "use `decode_load_miss`")]
282#[must_use]
283pub fn decode_expert_miss(ring_bytes: &[u8], slot: u32) -> Option<(u32, bool)> {
284    decode_load_miss(ring_bytes, slot)
285}
286
287#[cfg(test)]
288mod tests;