Skip to main content

forma_ir/
format.rs

1//! FMIR binary format definition and parsers.
2//!
3//! Defines the on-disk layout for Forma Module IR files:
4//! - 16-byte header (magic, version, flags, source hash)
5//! - 32-byte section table (4 sections x 8 bytes each)
6//! - Opcode, SlotType, IslandTrigger, PropsMode enums
7//! - SlotEntry and IslandEntry structs
8//!
9//! All multi-byte integers are little-endian.
10
11use std::fmt;
12
13// ---------------------------------------------------------------------------
14// Constants
15// ---------------------------------------------------------------------------
16
17/// Magic bytes identifying an FMIR file.
18pub const MAGIC: &[u8; 4] = b"FMIR";
19
20/// Size of the file header in bytes.
21pub const HEADER_SIZE: usize = 16;
22
23/// Size of the section table in bytes (4 sections x 8 bytes).
24pub const SECTION_TABLE_SIZE: usize = 32;
25
26/// Current IR format version.
27pub const IR_VERSION: u16 = 2;
28
29// ---------------------------------------------------------------------------
30// Error type
31// ---------------------------------------------------------------------------
32
33/// Errors that can occur when parsing FMIR binary data.
34#[derive(Debug, Clone, PartialEq, Eq)]
35pub enum IrError {
36    /// Input buffer is too short to contain the expected structure.
37    BufferTooShort { expected: usize, actual: usize },
38    /// Magic bytes do not match "FMIR".
39    BadMagic([u8; 4]),
40    /// IR version is not supported.
41    UnsupportedVersion(u16),
42    /// A section extends beyond the file boundary.
43    SectionOutOfBounds {
44        section: usize,
45        offset: u32,
46        size: u32,
47        file_len: usize,
48    },
49    /// An opcode byte does not map to a known opcode.
50    InvalidOpcode(u8),
51    /// A slot-type byte does not map to a known slot type.
52    InvalidSlotType(u8),
53    /// An island-trigger byte does not map to a known trigger.
54    InvalidIslandTrigger(u8),
55    /// A props-mode byte does not map to a known mode.
56    InvalidPropsMode(u8),
57    /// A slot-source byte does not map to a known source.
58    InvalidSlotSource(u8),
59    /// A string index is out of bounds.
60    StringIndexOutOfBounds { index: u32, len: usize },
61    /// A byte sequence is not valid UTF-8.
62    InvalidUtf8(String),
63    /// Nested LIST depth exceeded the maximum allowed.
64    ListDepthExceeded { max: u8 },
65    /// An island with the given id was not found in the island table.
66    IslandNotFound(u16),
67    /// Failed to parse JSON input.
68    JsonParseError(String),
69    /// A section descriptor has invalid values (e.g. integer overflow).
70    InvalidSection,
71}
72
73impl fmt::Display for IrError {
74    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75        match self {
76            IrError::BufferTooShort { expected, actual } => {
77                write!(
78                    f,
79                    "buffer too short: expected at least {expected} bytes, got {actual}"
80                )
81            }
82            IrError::BadMagic(got) => {
83                write!(f, "bad magic: expected FMIR, got {:?}", got)
84            }
85            IrError::UnsupportedVersion(v) => {
86                write!(f, "unsupported IR version: {v} (expected {IR_VERSION})")
87            }
88            IrError::SectionOutOfBounds {
89                section,
90                offset,
91                size,
92                file_len,
93            } => {
94                write!(
95                    f,
96                    "section {section} out of bounds: offset={offset}, size={size}, file_len={file_len}"
97                )
98            }
99            IrError::InvalidOpcode(b) => write!(f, "invalid opcode: 0x{b:02x}"),
100            IrError::InvalidSlotType(b) => write!(f, "invalid slot type: 0x{b:02x}"),
101            IrError::InvalidIslandTrigger(b) => {
102                write!(f, "invalid island trigger: 0x{b:02x}")
103            }
104            IrError::InvalidPropsMode(b) => write!(f, "invalid props mode: 0x{b:02x}"),
105            IrError::InvalidSlotSource(b) => write!(f, "invalid slot source: 0x{b:02x}"),
106            IrError::StringIndexOutOfBounds { index, len } => {
107                write!(f, "string index {index} out of bounds (table has {len} entries)")
108            }
109            IrError::InvalidUtf8(msg) => write!(f, "invalid UTF-8: {msg}"),
110            IrError::ListDepthExceeded { max } => {
111                write!(f, "nested LIST depth exceeded maximum of {max}")
112            }
113            IrError::IslandNotFound(id) => {
114                write!(f, "island with id {id} not found in island table")
115            }
116            IrError::JsonParseError(msg) => {
117                write!(f, "JSON parse error: {msg}")
118            }
119            IrError::InvalidSection => {
120                write!(f, "invalid section descriptor (integer overflow)")
121            }
122        }
123    }
124}
125
126impl std::error::Error for IrError {}
127
128// ---------------------------------------------------------------------------
129// Header (16 bytes)
130// ---------------------------------------------------------------------------
131
132/// FMIR file header — the first 16 bytes of every `.fmir` file.
133///
134/// Layout (little-endian):
135/// ```text
136/// [0..4)   magic        – b"FMIR"
137/// [4..6)   version      – u16
138/// [6..8)   flags        – u16 (reserved, must be 0)
139/// [8..16)  source_hash  – u64 (hash of original source)
140/// ```
141#[derive(Debug, Clone, Copy, PartialEq, Eq)]
142pub struct IrHeader {
143    pub version: u16,
144    pub flags: u16,
145    pub source_hash: u64,
146}
147
148impl IrHeader {
149    /// Parse an `IrHeader` from the first 16 bytes of `data`.
150    pub fn parse(data: &[u8]) -> Result<Self, IrError> {
151        if data.len() < HEADER_SIZE {
152            return Err(IrError::BufferTooShort {
153                expected: HEADER_SIZE,
154                actual: data.len(),
155            });
156        }
157
158        let magic: [u8; 4] = data[0..4].try_into().unwrap();
159        if &magic != MAGIC {
160            return Err(IrError::BadMagic(magic));
161        }
162
163        let version = u16::from_le_bytes(data[4..6].try_into().unwrap());
164        if version != IR_VERSION {
165            return Err(IrError::UnsupportedVersion(version));
166        }
167
168        let flags = u16::from_le_bytes(data[6..8].try_into().unwrap());
169        let source_hash = u64::from_le_bytes(data[8..16].try_into().unwrap());
170
171        Ok(IrHeader {
172            version,
173            flags,
174            source_hash,
175        })
176    }
177}
178
179// ---------------------------------------------------------------------------
180// Section table (32 bytes)
181// ---------------------------------------------------------------------------
182
183/// A single section descriptor: offset + size (both u32, little-endian).
184#[derive(Debug, Clone, Copy, PartialEq, Eq)]
185pub struct SectionDescriptor {
186    pub offset: u32,
187    pub size: u32,
188}
189
190/// Section table — four section descriptors immediately following the header.
191///
192/// Sections (in order):
193/// 0. Bytecode
194/// 1. String table
195/// 2. Slot table
196/// 3. Island table
197///
198/// Layout: 4 x (offset u32 + size u32) = 32 bytes.
199#[derive(Debug, Clone, Copy, PartialEq, Eq)]
200pub struct SectionTable {
201    pub sections: [SectionDescriptor; 4],
202}
203
204impl SectionTable {
205    /// Parse a `SectionTable` from `data` (must be at least 32 bytes).
206    pub fn parse(data: &[u8]) -> Result<Self, IrError> {
207        if data.len() < SECTION_TABLE_SIZE {
208            return Err(IrError::BufferTooShort {
209                expected: SECTION_TABLE_SIZE,
210                actual: data.len(),
211            });
212        }
213
214        let mut sections = [SectionDescriptor { offset: 0, size: 0 }; 4];
215        for (i, section) in sections.iter_mut().enumerate() {
216            let base = i * 8;
217            let offset = u32::from_le_bytes(data[base..base + 4].try_into().unwrap());
218            let size = u32::from_le_bytes(data[base + 4..base + 8].try_into().unwrap());
219            *section = SectionDescriptor { offset, size };
220        }
221
222        Ok(SectionTable { sections })
223    }
224
225    /// Validate that every section falls within `file_len` bytes.
226    pub fn validate(&self, file_len: usize) -> Result<(), IrError> {
227        for (i, sec) in self.sections.iter().enumerate() {
228            let end = (sec.offset as usize)
229                .checked_add(sec.size as usize)
230                .ok_or(IrError::InvalidSection)?;
231            if end > file_len {
232                return Err(IrError::SectionOutOfBounds {
233                    section: i,
234                    offset: sec.offset,
235                    size: sec.size,
236                    file_len,
237                });
238            }
239        }
240        Ok(())
241    }
242}
243
244// ---------------------------------------------------------------------------
245// Opcode enum (16 opcodes, 0x01–0x10)
246// ---------------------------------------------------------------------------
247
248/// Bytecode opcodes for the FMIR instruction stream.
249#[derive(Debug, Clone, Copy, PartialEq, Eq)]
250#[repr(u8)]
251pub enum Opcode {
252    OpenTag = 0x01,
253    CloseTag = 0x02,
254    VoidTag = 0x03,
255    Text = 0x04,
256    DynText = 0x05,
257    DynAttr = 0x06,
258    ShowIf = 0x07,
259    ShowElse = 0x08,
260    Switch = 0x09,
261    List = 0x0A,
262    IslandStart = 0x0B,
263    IslandEnd = 0x0C,
264    TryStart = 0x0D,
265    Fallback = 0x0E,
266    Preload = 0x0F,
267    Comment = 0x10,
268    ListItemKey = 0x11,
269    /// Extract a named property from an Object slot into a target slot.
270    /// Format: src_slot_id(u16) + prop_str_idx(u32) + target_slot_id(u16)
271    Prop = 0x12,
272}
273
274impl Opcode {
275    /// Convert a raw byte to an `Opcode`.
276    pub fn from_byte(b: u8) -> Result<Self, IrError> {
277        match b {
278            0x01 => Ok(Opcode::OpenTag),
279            0x02 => Ok(Opcode::CloseTag),
280            0x03 => Ok(Opcode::VoidTag),
281            0x04 => Ok(Opcode::Text),
282            0x05 => Ok(Opcode::DynText),
283            0x06 => Ok(Opcode::DynAttr),
284            0x07 => Ok(Opcode::ShowIf),
285            0x08 => Ok(Opcode::ShowElse),
286            0x09 => Ok(Opcode::Switch),
287            0x0A => Ok(Opcode::List),
288            0x0B => Ok(Opcode::IslandStart),
289            0x0C => Ok(Opcode::IslandEnd),
290            0x0D => Ok(Opcode::TryStart),
291            0x0E => Ok(Opcode::Fallback),
292            0x0F => Ok(Opcode::Preload),
293            0x10 => Ok(Opcode::Comment),
294            0x11 => Ok(Opcode::ListItemKey),
295            0x12 => Ok(Opcode::Prop),
296            _ => Err(IrError::InvalidOpcode(b)),
297        }
298    }
299}
300
301// ---------------------------------------------------------------------------
302// SlotType enum (5 types)
303// ---------------------------------------------------------------------------
304
305/// Data type hint for a slot entry.
306#[derive(Debug, Clone, Copy, PartialEq, Eq)]
307#[repr(u8)]
308pub enum SlotType {
309    Text = 0x01,
310    Bool = 0x02,
311    Number = 0x03,
312    Array = 0x04,
313    Object = 0x05,
314}
315
316impl SlotType {
317    /// Convert a raw byte to a `SlotType`.
318    pub fn from_byte(b: u8) -> Result<Self, IrError> {
319        match b {
320            0x01 => Ok(SlotType::Text),
321            0x02 => Ok(SlotType::Bool),
322            0x03 => Ok(SlotType::Number),
323            0x04 => Ok(SlotType::Array),
324            0x05 => Ok(SlotType::Object),
325            _ => Err(IrError::InvalidSlotType(b)),
326        }
327    }
328}
329
330// ---------------------------------------------------------------------------
331// IslandTrigger enum (4 triggers)
332// ---------------------------------------------------------------------------
333
334/// When an island should be hydrated.
335#[derive(Debug, Clone, Copy, PartialEq, Eq)]
336#[repr(u8)]
337pub enum IslandTrigger {
338    Load = 0x01,
339    Visible = 0x02,
340    Interaction = 0x03,
341    Idle = 0x04,
342}
343
344impl IslandTrigger {
345    /// Convert a raw byte to an `IslandTrigger`.
346    pub fn from_byte(b: u8) -> Result<Self, IrError> {
347        match b {
348            0x01 => Ok(IslandTrigger::Load),
349            0x02 => Ok(IslandTrigger::Visible),
350            0x03 => Ok(IslandTrigger::Interaction),
351            0x04 => Ok(IslandTrigger::Idle),
352            _ => Err(IrError::InvalidIslandTrigger(b)),
353        }
354    }
355}
356
357// ---------------------------------------------------------------------------
358// PropsMode enum (3 modes)
359// ---------------------------------------------------------------------------
360
361/// How island props are delivered.
362#[derive(Debug, Clone, Copy, PartialEq, Eq)]
363#[repr(u8)]
364pub enum PropsMode {
365    Inline = 0x01,
366    ScriptTag = 0x02,
367    Deferred = 0x03,
368}
369
370impl PropsMode {
371    /// Convert a raw byte to a `PropsMode`.
372    pub fn from_byte(b: u8) -> Result<Self, IrError> {
373        match b {
374            0x01 => Ok(PropsMode::Inline),
375            0x02 => Ok(PropsMode::ScriptTag),
376            0x03 => Ok(PropsMode::Deferred),
377            _ => Err(IrError::InvalidPropsMode(b)),
378        }
379    }
380}
381
382// ---------------------------------------------------------------------------
383// SlotSource enum (2 sources)
384// ---------------------------------------------------------------------------
385
386/// Where the slot value originates at runtime.
387#[derive(Debug, Clone, Copy, PartialEq, Eq)]
388#[repr(u8)]
389pub enum SlotSource {
390    Server = 0x00,
391    Client = 0x01,
392}
393
394impl SlotSource {
395    /// Convert a raw byte to a `SlotSource`.
396    pub fn from_byte(b: u8) -> Result<Self, IrError> {
397        match b {
398            0x00 => Ok(SlotSource::Server),
399            0x01 => Ok(SlotSource::Client),
400            _ => Err(IrError::InvalidSlotSource(b)),
401        }
402    }
403}
404
405// ---------------------------------------------------------------------------
406// SlotEntry
407// ---------------------------------------------------------------------------
408
409/// A slot declaration in the slot table.
410#[derive(Debug, Clone, PartialEq, Eq)]
411pub struct SlotEntry {
412    /// Unique slot identifier within the module.
413    pub slot_id: u16,
414    /// Index into the string table for the slot name.
415    pub name_str_idx: u32,
416    /// Expected data type for this slot.
417    pub type_hint: SlotType,
418    /// Where the slot value originates at runtime.
419    pub source: SlotSource,
420    /// Default value bytes (empty if no default).
421    pub default_bytes: Vec<u8>,
422}
423
424// ---------------------------------------------------------------------------
425// IslandEntry
426// ---------------------------------------------------------------------------
427
428/// An island declaration in the island table.
429#[derive(Debug, Clone, PartialEq, Eq)]
430pub struct IslandEntry {
431    /// Unique island identifier within the module.
432    pub id: u16,
433    /// When this island should hydrate.
434    pub trigger: IslandTrigger,
435    /// How props are delivered to the island.
436    pub props_mode: PropsMode,
437    /// Index into the string table for the island name.
438    pub name_str_idx: u32,
439    /// Byte offset of the ISLAND_START opcode in the bytecode stream.
440    pub byte_offset: u32,
441    /// Which slots belong to this island.
442    pub slot_ids: Vec<u16>,
443}
444
445// ---------------------------------------------------------------------------
446// Tests
447// ---------------------------------------------------------------------------
448
449#[cfg(test)]
450mod tests {
451    use super::*;
452
453    /// Build a valid 16-byte FMIR header.
454    fn make_header(version: u16, flags: u16, source_hash: u64) -> Vec<u8> {
455        let mut buf = Vec::with_capacity(HEADER_SIZE);
456        buf.extend_from_slice(MAGIC);
457        buf.extend_from_slice(&version.to_le_bytes());
458        buf.extend_from_slice(&flags.to_le_bytes());
459        buf.extend_from_slice(&source_hash.to_le_bytes());
460        buf
461    }
462
463    #[test]
464    fn parse_valid_header() {
465        let data = make_header(2, 0, 0xDEAD_BEEF_CAFE_BABE);
466        let hdr = IrHeader::parse(&data).unwrap();
467        assert_eq!(hdr.version, 2);
468        assert_eq!(hdr.flags, 0);
469        assert_eq!(hdr.source_hash, 0xDEAD_BEEF_CAFE_BABE);
470    }
471
472    #[test]
473    fn reject_bad_magic() {
474        let mut data = make_header(2, 0, 0);
475        data[0..4].copy_from_slice(b"NOPE");
476        let err = IrHeader::parse(&data).unwrap_err();
477        assert_eq!(err, IrError::BadMagic(*b"NOPE"));
478    }
479
480    #[test]
481    fn reject_unsupported_version() {
482        let data = make_header(99, 0, 0);
483        let err = IrHeader::parse(&data).unwrap_err();
484        assert_eq!(err, IrError::UnsupportedVersion(99));
485    }
486
487    /// Build a 32-byte section table from four (offset, size) pairs.
488    fn make_section_table(sections: [(u32, u32); 4]) -> Vec<u8> {
489        let mut buf = Vec::with_capacity(SECTION_TABLE_SIZE);
490        for (offset, size) in &sections {
491            buf.extend_from_slice(&offset.to_le_bytes());
492            buf.extend_from_slice(&size.to_le_bytes());
493        }
494        buf
495    }
496
497    #[test]
498    fn parse_section_table() {
499        let data = make_section_table([
500            (48, 100),  // bytecode
501            (148, 200), // string table
502            (348, 50),  // slot table
503            (398, 30),  // island table
504        ]);
505        let st = SectionTable::parse(&data).unwrap();
506        assert_eq!(st.sections[0], SectionDescriptor { offset: 48, size: 100 });
507        assert_eq!(st.sections[1], SectionDescriptor { offset: 148, size: 200 });
508        assert_eq!(st.sections[2], SectionDescriptor { offset: 348, size: 50 });
509        assert_eq!(st.sections[3], SectionDescriptor { offset: 398, size: 30 });
510    }
511
512    #[test]
513    fn validate_section_bounds() {
514        let data = make_section_table([
515            (48, 100),
516            (148, 200),
517            (348, 50),
518            (398, 9999), // way past end
519        ]);
520        let st = SectionTable::parse(&data).unwrap();
521        let err = st.validate(500).unwrap_err();
522        assert_eq!(
523            err,
524            IrError::SectionOutOfBounds {
525                section: 3,
526                offset: 398,
527                size: 9999,
528                file_len: 500,
529            }
530        );
531    }
532
533    #[test]
534    fn opcode_from_byte_all_valid() {
535        let expected = [
536            (0x01, Opcode::OpenTag),
537            (0x02, Opcode::CloseTag),
538            (0x03, Opcode::VoidTag),
539            (0x04, Opcode::Text),
540            (0x05, Opcode::DynText),
541            (0x06, Opcode::DynAttr),
542            (0x07, Opcode::ShowIf),
543            (0x08, Opcode::ShowElse),
544            (0x09, Opcode::Switch),
545            (0x0A, Opcode::List),
546            (0x0B, Opcode::IslandStart),
547            (0x0C, Opcode::IslandEnd),
548            (0x0D, Opcode::TryStart),
549            (0x0E, Opcode::Fallback),
550            (0x0F, Opcode::Preload),
551            (0x10, Opcode::Comment),
552            (0x11, Opcode::ListItemKey),
553            (0x12, Opcode::Prop),
554        ];
555        for (byte, op) in &expected {
556            assert_eq!(Opcode::from_byte(*byte).unwrap(), *op, "byte 0x{byte:02x}");
557        }
558    }
559
560    #[test]
561    fn opcode_from_byte_invalid() {
562        assert_eq!(Opcode::from_byte(0x00).unwrap_err(), IrError::InvalidOpcode(0x00));
563        assert_eq!(Opcode::from_byte(0x13).unwrap_err(), IrError::InvalidOpcode(0x13));
564        assert_eq!(Opcode::from_byte(0xFF).unwrap_err(), IrError::InvalidOpcode(0xFF));
565    }
566
567    #[test]
568    fn slot_type_from_byte() {
569        let expected = [
570            (0x01, SlotType::Text),
571            (0x02, SlotType::Bool),
572            (0x03, SlotType::Number),
573            (0x04, SlotType::Array),
574            (0x05, SlotType::Object),
575        ];
576        for (byte, st) in &expected {
577            assert_eq!(SlotType::from_byte(*byte).unwrap(), *st, "byte 0x{byte:02x}");
578        }
579        assert_eq!(
580            SlotType::from_byte(0x00).unwrap_err(),
581            IrError::InvalidSlotType(0x00)
582        );
583        assert_eq!(
584            SlotType::from_byte(0x06).unwrap_err(),
585            IrError::InvalidSlotType(0x06)
586        );
587    }
588
589    #[test]
590    fn island_trigger_from_byte() {
591        let expected = [
592            (0x01, IslandTrigger::Load),
593            (0x02, IslandTrigger::Visible),
594            (0x03, IslandTrigger::Interaction),
595            (0x04, IslandTrigger::Idle),
596        ];
597        for (byte, trigger) in &expected {
598            assert_eq!(
599                IslandTrigger::from_byte(*byte).unwrap(),
600                *trigger,
601                "byte 0x{byte:02x}"
602            );
603        }
604        assert_eq!(
605            IslandTrigger::from_byte(0x00).unwrap_err(),
606            IrError::InvalidIslandTrigger(0x00)
607        );
608        assert_eq!(
609            IslandTrigger::from_byte(0x05).unwrap_err(),
610            IrError::InvalidIslandTrigger(0x05)
611        );
612    }
613
614    #[test]
615    fn props_mode_from_byte() {
616        let expected = [
617            (0x01, PropsMode::Inline),
618            (0x02, PropsMode::ScriptTag),
619            (0x03, PropsMode::Deferred),
620        ];
621        for (byte, mode) in &expected {
622            assert_eq!(
623                PropsMode::from_byte(*byte).unwrap(),
624                *mode,
625                "byte 0x{byte:02x}"
626            );
627        }
628        assert_eq!(
629            PropsMode::from_byte(0x00).unwrap_err(),
630            IrError::InvalidPropsMode(0x00)
631        );
632        assert_eq!(
633            PropsMode::from_byte(0x04).unwrap_err(),
634            IrError::InvalidPropsMode(0x04)
635        );
636    }
637
638    #[test]
639    fn slot_source_from_byte() {
640        assert_eq!(SlotSource::from_byte(0x00).unwrap(), SlotSource::Server);
641        assert_eq!(SlotSource::from_byte(0x01).unwrap(), SlotSource::Client);
642        assert_eq!(
643            SlotSource::from_byte(0x02).unwrap_err(),
644            IrError::InvalidSlotSource(0x02)
645        );
646        assert_eq!(
647            SlotSource::from_byte(0xFF).unwrap_err(),
648            IrError::InvalidSlotSource(0xFF)
649        );
650    }
651}