vyre_runtime/megakernel/io/mod.rs
1//! IO subsystem - GPU↔runtime DMA request queue for the persistent megakernel.
2//!
3//! Module ownership:
4//! - `mod.rs`: doc + constants + IoRequest/IoCompletion + word/op/status modules
5//! - `queue.rs`: [`MegakernelIoQueue`] + view
6//! - `poll.rs`: poll/claim/peek surface
7//! - `complete.rs`: completion-write surface
8//! - `encode.rs`: bytes <-> validated queue helpers
9//! - `helpers.rs`: low-level queue-word + validation utilities + IR builders
10//! - `tests.rs`: full test suite
11//!
12//! ## Protocol
13//!
14//! Each IO slot is 8 × u32 words:
15//! ```text
16//! [op_type, src_handle, dst_handle, offset_lo, offset_hi, byte_count, status, tag]
17//! ```
18//!
19//! The GPU CAS-claims slots like the work ring, but uses the io_queue
20//! buffer. The host polls `status` for REQUEST and services the DMA.
21
22mod complete;
23mod encode;
24mod helpers;
25mod poll;
26mod queue;
27
28#[cfg(test)]
29mod tests;
30
31pub use complete::{
32 complete_io_request, complete_io_requests_batch, try_complete_io_request,
33 try_complete_io_requests_batch,
34};
35pub(crate) use encode::empty_io_queue_byte_len;
36pub use encode::{
37 encode_empty_io_queue, try_encode_empty_io_queue, try_encode_empty_io_queue_into,
38 validate_io_queue_bytes,
39};
40pub use helpers::io_completion_poll_body;
41pub use poll::{
42 claim_io_requests_into, poll_io_requests, try_claim_io_requests_into, try_poll_io_requests,
43 try_poll_io_requests_into,
44};
45pub use queue::MegakernelIoQueue;
46
47/// Number of u32 words per IO queue slot.
48pub const IO_SLOT_WORDS: u32 = 8;
49
50/// Default number of IO queue slots.
51pub const IO_SLOT_COUNT: u32 = 64;
52
53/// Resource table name used for resolving IO source handles.
54pub const IO_SOURCE_CAPABILITY_TABLE: &str = "io_source_capability_table";
55
56/// Resource table name used for resolving IO destination handles.
57pub const IO_DESTINATION_CAPABILITY_TABLE: &str = "io_destination_capability_table";
58
59/// Async stream tag used by megakernel IO DMA requests.
60pub const IO_QUEUE_DMA_TAG: &str = "io_queue_dma";
61
62/// Word offsets within an IO slot.
63pub mod io_word {
64 /// DMA operation type (see `IoOp`).
65 pub const OP_TYPE: u32 = 0;
66 /// Source buffer handle id.
67 pub const SRC_HANDLE: u32 = 1;
68 /// Destination buffer handle id.
69 pub const DST_HANDLE: u32 = 2;
70 /// Byte offset into source (low 32 bits).
71 pub const OFFSET_LO: u32 = 3;
72 /// Byte offset into source (high 32 bits, for >4GB transfers).
73 pub const OFFSET_HI: u32 = 4;
74 /// Number of bytes to transfer.
75 pub const BYTE_COUNT: u32 = 5;
76 /// Slot status - same semantics as work ring (EMPTY/PUBLISHED/CLAIMED/DONE).
77 pub const STATUS: u32 = 6;
78 /// Caller-supplied tag for correlating completions.
79 pub const TAG: u32 = 7;
80}
81
82/// IO operation types.
83pub mod io_op {
84 /// Read from storage into GPU buffer.
85 pub const READ: u32 = 0x01;
86 /// Write from GPU buffer to storage.
87 pub const WRITE: u32 = 0x02;
88 /// Memory fence - ensure all prior IO ops are visible.
89 pub const FENCE: u32 = 0x03;
90}
91
92/// IO completion status codes written by the host pump.
93pub mod io_status {
94 /// Operation completed successfully.
95 pub const OK: u32 = 0x10;
96 /// Operation failed - error code in the tag word.
97 pub const ERROR: u32 = 0x11;
98}
99
100/// Host-side IO request decoded from the io_queue buffer.
101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
102pub struct IoRequest {
103 /// Slot index in the io_queue.
104 pub slot_idx: u32,
105 /// Operation type.
106 pub op_type: u32,
107 /// Source buffer handle.
108 pub src_handle: u32,
109 /// Destination buffer handle.
110 pub dst_handle: u32,
111 /// 64-bit byte offset into source.
112 pub offset: u64,
113 /// Byte count to transfer.
114 pub byte_count: u32,
115 /// Caller tag.
116 pub tag: u32,
117}
118
119/// Host-side completion record published into `io_queue` for a mapped
120/// ingest slot the GPU can consume.
121#[derive(Debug, Clone, Copy, PartialEq, Eq)]
122pub struct IoCompletion {
123 /// Queue slot index.
124 pub slot_idx: u32,
125 /// Mapped ingest slot id / destination handle.
126 pub mapped_slot: u32,
127 /// Number of bytes now valid in the mapped slot.
128 pub byte_count: u32,
129 /// Caller-defined completion tag.
130 pub tag: u32,
131}