Skip to main content

vyre_runtime/megakernel/io/
mod.rs

1//! IO subsystem  -  GPU↔runtime DMA request queue for the persistent megakernel.
2//!
3//! Module ownership:
4//!  - `mod.rs`: doc + constants + IoRequest/IoCompletion + word/op/status modules
5//!  - `queue.rs`: [`MegakernelIoQueue`] + view
6//!  - `poll.rs`: poll/claim/peek surface
7//!  - `complete.rs`: completion-write surface
8//!  - `encode.rs`: bytes <-> validated queue helpers
9//!  - `helpers.rs`: low-level queue-word + validation utilities + IR builders
10//!  - `tests.rs`: full test suite
11//!
12//! ## Protocol
13//!
14//! Each IO slot is 8 × u32 words:
15//! ```text
16//! [op_type, src_handle, dst_handle, offset_lo, offset_hi, byte_count, status, tag]
17//! ```
18//!
19//! The GPU CAS-claims slots like the work ring, but uses the io_queue
20//! buffer. The host polls `status` for REQUEST and services the DMA.
21
22mod complete;
23mod encode;
24mod helpers;
25mod poll;
26mod queue;
27
28#[cfg(test)]
29mod tests;
30
31pub use complete::{
32    complete_io_request, complete_io_requests_batch, try_complete_io_request,
33    try_complete_io_requests_batch,
34};
35pub(crate) use encode::empty_io_queue_byte_len;
36pub use encode::{
37    encode_empty_io_queue, try_encode_empty_io_queue, try_encode_empty_io_queue_into,
38    validate_io_queue_bytes,
39};
40pub use helpers::io_completion_poll_body;
41pub use poll::{
42    claim_io_requests_into, poll_io_requests, try_claim_io_requests_into, try_poll_io_requests,
43    try_poll_io_requests_into,
44};
45pub use queue::MegakernelIoQueue;
46
47/// Number of u32 words per IO queue slot.
48pub const IO_SLOT_WORDS: u32 = 8;
49
50/// Default number of IO queue slots.
51pub const IO_SLOT_COUNT: u32 = 64;
52
53/// Resource table name used for resolving IO source handles.
54pub const IO_SOURCE_CAPABILITY_TABLE: &str = "io_source_capability_table";
55
56/// Resource table name used for resolving IO destination handles.
57pub const IO_DESTINATION_CAPABILITY_TABLE: &str = "io_destination_capability_table";
58
59/// Async stream tag used by megakernel IO DMA requests.
60pub const IO_QUEUE_DMA_TAG: &str = "io_queue_dma";
61
62/// Word offsets within an IO slot.
63pub mod io_word {
64    /// DMA operation type (see `IoOp`).
65    pub const OP_TYPE: u32 = 0;
66    /// Source buffer handle id.
67    pub const SRC_HANDLE: u32 = 1;
68    /// Destination buffer handle id.
69    pub const DST_HANDLE: u32 = 2;
70    /// Byte offset into source (low 32 bits).
71    pub const OFFSET_LO: u32 = 3;
72    /// Byte offset into source (high 32 bits, for >4GB transfers).
73    pub const OFFSET_HI: u32 = 4;
74    /// Number of bytes to transfer.
75    pub const BYTE_COUNT: u32 = 5;
76    /// Slot status  -  same semantics as work ring (EMPTY/PUBLISHED/CLAIMED/DONE).
77    pub const STATUS: u32 = 6;
78    /// Caller-supplied tag for correlating completions.
79    pub const TAG: u32 = 7;
80}
81
82/// IO operation types.
83pub mod io_op {
84    /// Read from storage into GPU buffer.
85    pub const READ: u32 = 0x01;
86    /// Write from GPU buffer to storage.
87    pub const WRITE: u32 = 0x02;
88    /// Memory fence  -  ensure all prior IO ops are visible.
89    pub const FENCE: u32 = 0x03;
90}
91
92/// IO completion status codes written by the host pump.
93pub mod io_status {
94    /// Operation completed successfully.
95    pub const OK: u32 = 0x10;
96    /// Operation failed  -  error code in the tag word.
97    pub const ERROR: u32 = 0x11;
98}
99
100/// Host-side IO request decoded from the io_queue buffer.
101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
102pub struct IoRequest {
103    /// Slot index in the io_queue.
104    pub slot_idx: u32,
105    /// Operation type.
106    pub op_type: u32,
107    /// Source buffer handle.
108    pub src_handle: u32,
109    /// Destination buffer handle.
110    pub dst_handle: u32,
111    /// 64-bit byte offset into source.
112    pub offset: u64,
113    /// Byte count to transfer.
114    pub byte_count: u32,
115    /// Caller tag.
116    pub tag: u32,
117}
118
119/// Host-side completion record published into `io_queue` for a mapped
120/// ingest slot the GPU can consume.
121#[derive(Debug, Clone, Copy, PartialEq, Eq)]
122pub struct IoCompletion {
123    /// Queue slot index.
124    pub slot_idx: u32,
125    /// Mapped ingest slot id / destination handle.
126    pub mapped_slot: u32,
127    /// Number of bytes now valid in the mapped slot.
128    pub byte_count: u32,
129    /// Caller-defined completion tag.
130    pub tag: u32,
131}