Skip to main content

rvf_types/
wasm_bootstrap.rs

1//! WASM_SEG (0x10) types for self-bootstrapping RVF files.
2//!
3//! Defines the 64-byte `WasmHeader` and associated enums.
4//! A WASM_SEG embeds WASM bytecode that enables an RVF file to carry its
5//! own execution runtime. When combined with the data segments (VEC_SEG,
6//! INDEX_SEG, etc.), this makes the file fully self-bootstrapping:
7//!
8//! ```text
9//! ┌──────────────────────────────────────────────────────────┐
10//! │                    .rvf file                             │
11//! │                                                         │
12//! │  ┌─────────────┐  ┌──────────────┐  ┌───────────────┐  │
13//! │  │ WASM_SEG    │  │ WASM_SEG     │  │ VEC_SEG       │  │
14//! │  │ role=Interp │  │ role=uKernel │  │ (data)        │  │
15//! │  │ ~50 KB      │  │ ~5.5 KB      │  │               │  │
16//! │  └──────┬──────┘  └──────┬───────┘  └───────┬───────┘  │
17//! │         │                │                   │          │
18//! │         │   executes     │    processes      │          │
19//! │         └───────────────►└──────────────────►│          │
20//! │                                                         │
21//! │  Layer 0: Raw bytes                                     │
22//! │  Layer 1: Embedded WASM interpreter (native bootstrap)  │
23//! │  Layer 2: WASM microkernel (query engine)               │
24//! │  Layer 3: RVF data (vectors, indexes, manifests)        │
25//! └──────────────────────────────────────────────────────────┘
26//! ```
27//!
28//! The host only needs raw execution capability. RVF becomes
29//! self-bootstrapping — "runs anywhere compute exists."
30
31use crate::error::RvfError;
32
33/// Magic number for `WasmHeader`: "RVWM" in big-endian.
34pub const WASM_MAGIC: u32 = 0x5256_574D;
35
36/// Role of the embedded WASM module within the bootstrap chain.
37#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
38#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
39#[repr(u8)]
40pub enum WasmRole {
41    /// RVF microkernel: the query/ingest engine compiled to WASM.
42    /// This is the 5.5 KB Cognitum tile runtime with 14+ exports.
43    Microkernel = 0x00,
44    /// Minimal WASM interpreter: enables self-bootstrapping on hosts
45    /// that lack a native WASM runtime. The interpreter runs the
46    /// microkernel, which then processes RVF data.
47    Interpreter = 0x01,
48    /// Combined interpreter + microkernel in a single module.
49    /// The interpreter is linked with the microkernel for zero-copy
50    /// bootstrap on bare environments.
51    Combined = 0x02,
52    /// Domain-specific extension module (e.g., custom distance
53    /// functions, codon decoder for RVDNA, token scorer for RVText).
54    Extension = 0x03,
55    /// Control plane module: store management, export, segment
56    /// parsing, and file-level operations.
57    ControlPlane = 0x04,
58}
59
60impl TryFrom<u8> for WasmRole {
61    type Error = RvfError;
62
63    fn try_from(value: u8) -> Result<Self, Self::Error> {
64        match value {
65            0x00 => Ok(Self::Microkernel),
66            0x01 => Ok(Self::Interpreter),
67            0x02 => Ok(Self::Combined),
68            0x03 => Ok(Self::Extension),
69            0x04 => Ok(Self::ControlPlane),
70            _ => Err(RvfError::InvalidEnumValue {
71                type_name: "WasmRole",
72                value: value as u64,
73            }),
74        }
75    }
76}
77
78/// Target platform hint for the WASM module.
79#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
80#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
81#[repr(u8)]
82pub enum WasmTarget {
83    /// Generic wasm32 (runs on any compliant runtime).
84    Wasm32 = 0x00,
85    /// WASI Preview 1 (requires WASI syscalls).
86    WasiP1 = 0x01,
87    /// WASI Preview 2 (component model).
88    WasiP2 = 0x02,
89    /// Browser-optimized (expects Web APIs via imports).
90    Browser = 0x03,
91    /// Bare-metal tile (no imports beyond host-tile protocol).
92    BareTile = 0x04,
93}
94
95impl TryFrom<u8> for WasmTarget {
96    type Error = RvfError;
97
98    fn try_from(value: u8) -> Result<Self, Self::Error> {
99        match value {
100            0x00 => Ok(Self::Wasm32),
101            0x01 => Ok(Self::WasiP1),
102            0x02 => Ok(Self::WasiP2),
103            0x03 => Ok(Self::Browser),
104            0x04 => Ok(Self::BareTile),
105            _ => Err(RvfError::InvalidEnumValue {
106                type_name: "WasmTarget",
107                value: value as u64,
108            }),
109        }
110    }
111}
112
113/// WASM module feature requirements (bitfield).
114pub const WASM_FEAT_SIMD: u16 = 1 << 0;
115pub const WASM_FEAT_BULK_MEMORY: u16 = 1 << 1;
116pub const WASM_FEAT_MULTI_VALUE: u16 = 1 << 2;
117pub const WASM_FEAT_REFERENCE_TYPES: u16 = 1 << 3;
118pub const WASM_FEAT_THREADS: u16 = 1 << 4;
119pub const WASM_FEAT_TAIL_CALL: u16 = 1 << 5;
120pub const WASM_FEAT_GC: u16 = 1 << 6;
121pub const WASM_FEAT_EXCEPTION_HANDLING: u16 = 1 << 7;
122
123/// 64-byte header for WASM_SEG payloads.
124///
125/// Follows the standard 64-byte `SegmentHeader`. The WASM bytecode
126/// follows immediately after this header within the segment payload.
127///
128/// For self-bootstrapping files, two WASM_SEGs are present:
129/// 1. `role = Interpreter` — a minimal WASM interpreter (~50 KB)
130/// 2. `role = Microkernel` — the RVF query engine (~5.5 KB)
131///
132/// The bootstrap sequence is:
133/// 1. Host reads file, finds WASM_SEG with `role = Interpreter`
134/// 2. Host loads interpreter bytecode into any available execution engine
135/// 3. Interpreter instantiates the microkernel WASM_SEG
136/// 4. Microkernel processes VEC_SEG, INDEX_SEG, etc.
137#[derive(Clone, Copy, Debug)]
138#[repr(C)]
139pub struct WasmHeader {
140    /// Magic: `WASM_MAGIC` (0x5256574D, "RVWM").
141    pub wasm_magic: u32,
142    /// WasmHeader format version (currently 1).
143    pub header_version: u16,
144    /// Role in the bootstrap chain (see `WasmRole`).
145    pub role: u8,
146    /// Target platform (see `WasmTarget`).
147    pub target: u8,
148    /// Required WASM features bitfield (see `WASM_FEAT_*`).
149    pub required_features: u16,
150    /// Number of exports in the WASM module.
151    pub export_count: u16,
152    /// Uncompressed WASM bytecode size (bytes).
153    pub bytecode_size: u32,
154    /// Compressed bytecode size (0 if uncompressed).
155    pub compressed_size: u32,
156    /// Compression algorithm (same enum as SegmentHeader).
157    pub compression: u8,
158    /// Minimum linear memory pages required (64 KB each).
159    pub min_memory_pages: u8,
160    /// Maximum linear memory pages (0 = no limit).
161    pub max_memory_pages: u8,
162    /// Number of WASM tables.
163    pub table_count: u8,
164    /// SHAKE-256-256 hash of uncompressed bytecode.
165    pub bytecode_hash: [u8; 32],
166    /// Priority order for bootstrap resolution (lower = tried first).
167    /// The interpreter with lowest priority is used when multiple are present.
168    pub bootstrap_priority: u8,
169    /// If role=Interpreter, this is the interpreter type:
170    /// 0x00 = generic stack machine, 0x01 = wasm3-compatible,
171    /// 0x02 = wamr-compatible, 0x03 = wasmi-compatible.
172    pub interpreter_type: u8,
173    /// Reserved (must be zero).
174    pub reserved: [u8; 6],
175}
176
177// Compile-time assertion: WasmHeader must be exactly 64 bytes.
178const _: () = assert!(core::mem::size_of::<WasmHeader>() == 64);
179
180impl WasmHeader {
181    /// Serialize the header to a 64-byte little-endian array.
182    pub fn to_bytes(&self) -> [u8; 64] {
183        let mut buf = [0u8; 64];
184        buf[0x00..0x04].copy_from_slice(&self.wasm_magic.to_le_bytes());
185        buf[0x04..0x06].copy_from_slice(&self.header_version.to_le_bytes());
186        buf[0x06] = self.role;
187        buf[0x07] = self.target;
188        buf[0x08..0x0A].copy_from_slice(&self.required_features.to_le_bytes());
189        buf[0x0A..0x0C].copy_from_slice(&self.export_count.to_le_bytes());
190        buf[0x0C..0x10].copy_from_slice(&self.bytecode_size.to_le_bytes());
191        buf[0x10..0x14].copy_from_slice(&self.compressed_size.to_le_bytes());
192        buf[0x14] = self.compression;
193        buf[0x15] = self.min_memory_pages;
194        buf[0x16] = self.max_memory_pages;
195        buf[0x17] = self.table_count;
196        buf[0x18..0x38].copy_from_slice(&self.bytecode_hash);
197        buf[0x38] = self.bootstrap_priority;
198        buf[0x39] = self.interpreter_type;
199        buf[0x3A..0x40].copy_from_slice(&self.reserved);
200        buf
201    }
202
203    /// Deserialize a `WasmHeader` from a 64-byte slice.
204    pub fn from_bytes(data: &[u8; 64]) -> Result<Self, RvfError> {
205        let magic = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
206        if magic != WASM_MAGIC {
207            return Err(RvfError::BadMagic {
208                expected: WASM_MAGIC,
209                got: magic,
210            });
211        }
212
213        Ok(Self {
214            wasm_magic: magic,
215            header_version: u16::from_le_bytes([data[0x04], data[0x05]]),
216            role: data[0x06],
217            target: data[0x07],
218            required_features: u16::from_le_bytes([data[0x08], data[0x09]]),
219            export_count: u16::from_le_bytes([data[0x0A], data[0x0B]]),
220            bytecode_size: u32::from_le_bytes([data[0x0C], data[0x0D], data[0x0E], data[0x0F]]),
221            compressed_size: u32::from_le_bytes([
222                data[0x10], data[0x11], data[0x12], data[0x13],
223            ]),
224            compression: data[0x14],
225            min_memory_pages: data[0x15],
226            max_memory_pages: data[0x16],
227            table_count: data[0x17],
228            bytecode_hash: {
229                let mut h = [0u8; 32];
230                h.copy_from_slice(&data[0x18..0x38]);
231                h
232            },
233            bootstrap_priority: data[0x38],
234            interpreter_type: data[0x39],
235            reserved: {
236                let mut r = [0u8; 6];
237                r.copy_from_slice(&data[0x3A..0x40]);
238                r
239            },
240        })
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247
248    fn sample_header() -> WasmHeader {
249        WasmHeader {
250            wasm_magic: WASM_MAGIC,
251            header_version: 1,
252            role: WasmRole::Microkernel as u8,
253            target: WasmTarget::BareTile as u8,
254            required_features: WASM_FEAT_SIMD | WASM_FEAT_BULK_MEMORY,
255            export_count: 14,
256            bytecode_size: 5500,
257            compressed_size: 0,
258            compression: 0,
259            min_memory_pages: 2,  // 128 KB
260            max_memory_pages: 4,  // 256 KB
261            table_count: 0,
262            bytecode_hash: [0xAB; 32],
263            bootstrap_priority: 0,
264            interpreter_type: 0,
265            reserved: [0; 6],
266        }
267    }
268
269    #[test]
270    fn header_size_is_64() {
271        assert_eq!(core::mem::size_of::<WasmHeader>(), 64);
272    }
273
274    #[test]
275    fn magic_bytes_match_ascii() {
276        let bytes_be = WASM_MAGIC.to_be_bytes();
277        assert_eq!(&bytes_be, b"RVWM");
278    }
279
280    #[test]
281    fn round_trip_serialization() {
282        let original = sample_header();
283        let bytes = original.to_bytes();
284        let decoded = WasmHeader::from_bytes(&bytes).expect("from_bytes should succeed");
285
286        assert_eq!(decoded.wasm_magic, WASM_MAGIC);
287        assert_eq!(decoded.header_version, 1);
288        assert_eq!(decoded.role, WasmRole::Microkernel as u8);
289        assert_eq!(decoded.target, WasmTarget::BareTile as u8);
290        assert_eq!(decoded.required_features, WASM_FEAT_SIMD | WASM_FEAT_BULK_MEMORY);
291        assert_eq!(decoded.export_count, 14);
292        assert_eq!(decoded.bytecode_size, 5500);
293        assert_eq!(decoded.compressed_size, 0);
294        assert_eq!(decoded.compression, 0);
295        assert_eq!(decoded.min_memory_pages, 2);
296        assert_eq!(decoded.max_memory_pages, 4);
297        assert_eq!(decoded.table_count, 0);
298        assert_eq!(decoded.bytecode_hash, [0xAB; 32]);
299        assert_eq!(decoded.bootstrap_priority, 0);
300        assert_eq!(decoded.interpreter_type, 0);
301        assert_eq!(decoded.reserved, [0; 6]);
302    }
303
304    #[test]
305    fn bad_magic_returns_error() {
306        let mut bytes = sample_header().to_bytes();
307        bytes[0] = 0x00;
308        let err = WasmHeader::from_bytes(&bytes).unwrap_err();
309        match err {
310            RvfError::BadMagic { expected, .. } => assert_eq!(expected, WASM_MAGIC),
311            other => panic!("expected BadMagic, got {other:?}"),
312        }
313    }
314
315    #[test]
316    fn interpreter_header() {
317        let h = WasmHeader {
318            wasm_magic: WASM_MAGIC,
319            header_version: 1,
320            role: WasmRole::Interpreter as u8,
321            target: WasmTarget::Wasm32 as u8,
322            required_features: 0,
323            export_count: 3,
324            bytecode_size: 51_200, // ~50 KB interpreter
325            compressed_size: 22_000,
326            compression: 2, // ZSTD
327            min_memory_pages: 16, // 1 MB
328            max_memory_pages: 64, // 4 MB
329            table_count: 1,
330            bytecode_hash: [0xCD; 32],
331            bootstrap_priority: 0, // highest priority
332            interpreter_type: 0x03, // wasmi-compatible
333            reserved: [0; 6],
334        };
335        let bytes = h.to_bytes();
336        let decoded = WasmHeader::from_bytes(&bytes).unwrap();
337        assert_eq!(decoded.role, WasmRole::Interpreter as u8);
338        assert_eq!(decoded.bytecode_size, 51_200);
339        assert_eq!(decoded.interpreter_type, 0x03);
340    }
341
342    #[test]
343    fn combined_bootstrap_header() {
344        let h = WasmHeader {
345            wasm_magic: WASM_MAGIC,
346            header_version: 1,
347            role: WasmRole::Combined as u8,
348            target: WasmTarget::Wasm32 as u8,
349            required_features: WASM_FEAT_SIMD,
350            export_count: 17,
351            bytecode_size: 56_700, // interpreter + microkernel
352            compressed_size: 0,
353            compression: 0,
354            min_memory_pages: 16,
355            max_memory_pages: 64,
356            table_count: 1,
357            bytecode_hash: [0xEF; 32],
358            bootstrap_priority: 0,
359            interpreter_type: 0,
360            reserved: [0; 6],
361        };
362        let bytes = h.to_bytes();
363        let decoded = WasmHeader::from_bytes(&bytes).unwrap();
364        assert_eq!(decoded.role, WasmRole::Combined as u8);
365        assert_eq!(decoded.export_count, 17);
366    }
367
368    #[test]
369    fn wasm_role_try_from() {
370        assert_eq!(WasmRole::try_from(0x00), Ok(WasmRole::Microkernel));
371        assert_eq!(WasmRole::try_from(0x01), Ok(WasmRole::Interpreter));
372        assert_eq!(WasmRole::try_from(0x02), Ok(WasmRole::Combined));
373        assert_eq!(WasmRole::try_from(0x03), Ok(WasmRole::Extension));
374        assert_eq!(WasmRole::try_from(0x04), Ok(WasmRole::ControlPlane));
375        assert!(WasmRole::try_from(0x05).is_err());
376        assert!(WasmRole::try_from(0xFF).is_err());
377    }
378
379    #[test]
380    fn wasm_target_try_from() {
381        assert_eq!(WasmTarget::try_from(0x00), Ok(WasmTarget::Wasm32));
382        assert_eq!(WasmTarget::try_from(0x01), Ok(WasmTarget::WasiP1));
383        assert_eq!(WasmTarget::try_from(0x02), Ok(WasmTarget::WasiP2));
384        assert_eq!(WasmTarget::try_from(0x03), Ok(WasmTarget::Browser));
385        assert_eq!(WasmTarget::try_from(0x04), Ok(WasmTarget::BareTile));
386        assert!(WasmTarget::try_from(0x05).is_err());
387        assert!(WasmTarget::try_from(0xFF).is_err());
388    }
389
390    #[test]
391    fn feature_flags_bit_positions() {
392        assert_eq!(WASM_FEAT_SIMD, 0x0001);
393        assert_eq!(WASM_FEAT_BULK_MEMORY, 0x0002);
394        assert_eq!(WASM_FEAT_MULTI_VALUE, 0x0004);
395        assert_eq!(WASM_FEAT_REFERENCE_TYPES, 0x0008);
396        assert_eq!(WASM_FEAT_THREADS, 0x0010);
397        assert_eq!(WASM_FEAT_TAIL_CALL, 0x0020);
398        assert_eq!(WASM_FEAT_GC, 0x0040);
399        assert_eq!(WASM_FEAT_EXCEPTION_HANDLING, 0x0080);
400    }
401}