Skip to main content

forma_ir/
format.rs

1//! FMIR binary format definition and parsers.
2//!
3//! Defines the on-disk layout for Forma Module IR files:
4//! - 16-byte header (magic, version, flags, source hash)
5//! - 32-byte section table (4 sections x 8 bytes each)
6//! - Opcode, SlotType, IslandTrigger, PropsMode enums
7//! - SlotEntry and IslandEntry structs
8//!
9//! All multi-byte integers are little-endian.
10
11use std::fmt;
12
13// ---------------------------------------------------------------------------
14// Constants
15// ---------------------------------------------------------------------------
16
17/// Magic bytes identifying an FMIR file.
18pub const MAGIC: &[u8; 4] = b"FMIR";
19
20/// Size of the file header in bytes.
21pub const HEADER_SIZE: usize = 16;
22
23/// Size of the section table in bytes (4 sections x 8 bytes).
24pub const SECTION_TABLE_SIZE: usize = 32;
25
26/// Current IR format version.
27pub const IR_VERSION: u16 = 2;
28
29// ---------------------------------------------------------------------------
30// Error type
31// ---------------------------------------------------------------------------
32
33/// Errors that can occur when parsing FMIR binary data.
34#[derive(Debug, Clone, PartialEq, Eq)]
35pub enum IrError {
36    /// Input buffer is too short to contain the expected structure.
37    BufferTooShort { expected: usize, actual: usize },
38    /// Magic bytes do not match "FMIR".
39    BadMagic([u8; 4]),
40    /// IR version is not supported.
41    UnsupportedVersion(u16),
42    /// A section extends beyond the file boundary.
43    SectionOutOfBounds {
44        section: usize,
45        offset: u32,
46        size: u32,
47        file_len: usize,
48    },
49    /// An opcode byte does not map to a known opcode.
50    InvalidOpcode(u8),
51    /// A slot-type byte does not map to a known slot type.
52    InvalidSlotType(u8),
53    /// An island-trigger byte does not map to a known trigger.
54    InvalidIslandTrigger(u8),
55    /// A props-mode byte does not map to a known mode.
56    InvalidPropsMode(u8),
57    /// A slot-source byte does not map to a known source.
58    InvalidSlotSource(u8),
59    /// A string index is out of bounds.
60    StringIndexOutOfBounds { index: u32, len: usize },
61    /// A byte sequence is not valid UTF-8.
62    InvalidUtf8(String),
63    /// Nested LIST depth exceeded the maximum allowed.
64    ListDepthExceeded { max: u8 },
65    /// An island with the given id was not found in the island table.
66    IslandNotFound(u16),
67    /// Failed to parse JSON input.
68    JsonParseError(String),
69    /// A section descriptor has invalid values (e.g. integer overflow).
70    InvalidSection,
71    /// Recursive opcode nesting (SHOW_IF, LIST, SWITCH, etc.) exceeded the
72    /// maximum allowed depth.
73    RecursionLimitExceeded,
74}
75
76impl fmt::Display for IrError {
77    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78        match self {
79            IrError::BufferTooShort { expected, actual } => {
80                write!(
81                    f,
82                    "buffer too short: expected at least {expected} bytes, got {actual}"
83                )
84            }
85            IrError::BadMagic(got) => {
86                write!(f, "bad magic: expected FMIR, got {:?}", got)
87            }
88            IrError::UnsupportedVersion(v) => {
89                write!(f, "unsupported IR version: {v} (expected {IR_VERSION})")
90            }
91            IrError::SectionOutOfBounds {
92                section,
93                offset,
94                size,
95                file_len,
96            } => {
97                write!(
98                    f,
99                    "section {section} out of bounds: offset={offset}, size={size}, file_len={file_len}"
100                )
101            }
102            IrError::InvalidOpcode(b) => write!(f, "invalid opcode: 0x{b:02x}"),
103            IrError::InvalidSlotType(b) => write!(f, "invalid slot type: 0x{b:02x}"),
104            IrError::InvalidIslandTrigger(b) => {
105                write!(f, "invalid island trigger: 0x{b:02x}")
106            }
107            IrError::InvalidPropsMode(b) => write!(f, "invalid props mode: 0x{b:02x}"),
108            IrError::InvalidSlotSource(b) => write!(f, "invalid slot source: 0x{b:02x}"),
109            IrError::StringIndexOutOfBounds { index, len } => {
110                write!(
111                    f,
112                    "string index {index} out of bounds (table has {len} entries)"
113                )
114            }
115            IrError::InvalidUtf8(msg) => write!(f, "invalid UTF-8: {msg}"),
116            IrError::ListDepthExceeded { max } => {
117                write!(f, "nested LIST depth exceeded maximum of {max}")
118            }
119            IrError::IslandNotFound(id) => {
120                write!(f, "island with id {id} not found in island table")
121            }
122            IrError::JsonParseError(msg) => {
123                write!(f, "JSON parse error: {msg}")
124            }
125            IrError::InvalidSection => {
126                write!(f, "invalid section descriptor (integer overflow)")
127            }
128            IrError::RecursionLimitExceeded => {
129                write!(f, "recursion limit exceeded (maximum depth: 64)")
130            }
131        }
132    }
133}
134
135impl std::error::Error for IrError {}
136
137// ---------------------------------------------------------------------------
138// Header (16 bytes)
139// ---------------------------------------------------------------------------
140
141/// FMIR file header — the first 16 bytes of every `.fmir` file.
142///
143/// Layout (little-endian):
144/// ```text
145/// [0..4)   magic        – b"FMIR"
146/// [4..6)   version      – u16
147/// [6..8)   flags        – u16 (reserved, must be 0)
148/// [8..16)  source_hash  – u64 (hash of original source)
149/// ```
150#[derive(Debug, Clone, Copy, PartialEq, Eq)]
151pub struct IrHeader {
152    pub version: u16,
153    pub flags: u16,
154    pub source_hash: u64,
155}
156
157impl IrHeader {
158    /// Parse an `IrHeader` from the first 16 bytes of `data`.
159    pub fn parse(data: &[u8]) -> Result<Self, IrError> {
160        if data.len() < HEADER_SIZE {
161            return Err(IrError::BufferTooShort {
162                expected: HEADER_SIZE,
163                actual: data.len(),
164            });
165        }
166
167        let magic: [u8; 4] = data[0..4].try_into().unwrap();
168        if &magic != MAGIC {
169            return Err(IrError::BadMagic(magic));
170        }
171
172        let version = u16::from_le_bytes(data[4..6].try_into().unwrap());
173        if version != IR_VERSION {
174            return Err(IrError::UnsupportedVersion(version));
175        }
176
177        let flags = u16::from_le_bytes(data[6..8].try_into().unwrap());
178        let source_hash = u64::from_le_bytes(data[8..16].try_into().unwrap());
179
180        Ok(IrHeader {
181            version,
182            flags,
183            source_hash,
184        })
185    }
186}
187
188// ---------------------------------------------------------------------------
189// Section table (32 bytes)
190// ---------------------------------------------------------------------------
191
192/// A single section descriptor: offset + size (both u32, little-endian).
193#[derive(Debug, Clone, Copy, PartialEq, Eq)]
194pub struct SectionDescriptor {
195    pub offset: u32,
196    pub size: u32,
197}
198
199/// Section table — four section descriptors immediately following the header.
200///
201/// Sections (in order):
202/// 0. Bytecode
203/// 1. String table
204/// 2. Slot table
205/// 3. Island table
206///
207/// Layout: 4 x (offset u32 + size u32) = 32 bytes.
208#[derive(Debug, Clone, Copy, PartialEq, Eq)]
209pub struct SectionTable {
210    pub sections: [SectionDescriptor; 4],
211}
212
213impl SectionTable {
214    /// Parse a `SectionTable` from `data` (must be at least 32 bytes).
215    pub fn parse(data: &[u8]) -> Result<Self, IrError> {
216        if data.len() < SECTION_TABLE_SIZE {
217            return Err(IrError::BufferTooShort {
218                expected: SECTION_TABLE_SIZE,
219                actual: data.len(),
220            });
221        }
222
223        let mut sections = [SectionDescriptor { offset: 0, size: 0 }; 4];
224        for (i, section) in sections.iter_mut().enumerate() {
225            let base = i * 8;
226            let offset = u32::from_le_bytes(data[base..base + 4].try_into().unwrap());
227            let size = u32::from_le_bytes(data[base + 4..base + 8].try_into().unwrap());
228            *section = SectionDescriptor { offset, size };
229        }
230
231        Ok(SectionTable { sections })
232    }
233
234    /// Validate that every section falls within `file_len` bytes.
235    pub fn validate(&self, file_len: usize) -> Result<(), IrError> {
236        for (i, sec) in self.sections.iter().enumerate() {
237            let end = (sec.offset as usize)
238                .checked_add(sec.size as usize)
239                .ok_or(IrError::InvalidSection)?;
240            if end > file_len {
241                return Err(IrError::SectionOutOfBounds {
242                    section: i,
243                    offset: sec.offset,
244                    size: sec.size,
245                    file_len,
246                });
247            }
248        }
249        Ok(())
250    }
251}
252
253// ---------------------------------------------------------------------------
254// Opcode enum (16 opcodes, 0x01–0x10)
255// ---------------------------------------------------------------------------
256
257/// Bytecode opcodes for the FMIR instruction stream.
258#[derive(Debug, Clone, Copy, PartialEq, Eq)]
259#[repr(u8)]
260pub enum Opcode {
261    OpenTag = 0x01,
262    CloseTag = 0x02,
263    VoidTag = 0x03,
264    Text = 0x04,
265    DynText = 0x05,
266    DynAttr = 0x06,
267    ShowIf = 0x07,
268    ShowElse = 0x08,
269    Switch = 0x09,
270    List = 0x0A,
271    IslandStart = 0x0B,
272    IslandEnd = 0x0C,
273    TryStart = 0x0D,
274    Fallback = 0x0E,
275    Preload = 0x0F,
276    Comment = 0x10,
277    ListItemKey = 0x11,
278    /// Extract a named property from an Object slot into a target slot.
279    /// Format: src_slot_id(u16) + prop_str_idx(u32) + target_slot_id(u16)
280    Prop = 0x12,
281}
282
283impl Opcode {
284    /// Convert a raw byte to an `Opcode`.
285    pub fn from_byte(b: u8) -> Result<Self, IrError> {
286        match b {
287            0x01 => Ok(Opcode::OpenTag),
288            0x02 => Ok(Opcode::CloseTag),
289            0x03 => Ok(Opcode::VoidTag),
290            0x04 => Ok(Opcode::Text),
291            0x05 => Ok(Opcode::DynText),
292            0x06 => Ok(Opcode::DynAttr),
293            0x07 => Ok(Opcode::ShowIf),
294            0x08 => Ok(Opcode::ShowElse),
295            0x09 => Ok(Opcode::Switch),
296            0x0A => Ok(Opcode::List),
297            0x0B => Ok(Opcode::IslandStart),
298            0x0C => Ok(Opcode::IslandEnd),
299            0x0D => Ok(Opcode::TryStart),
300            0x0E => Ok(Opcode::Fallback),
301            0x0F => Ok(Opcode::Preload),
302            0x10 => Ok(Opcode::Comment),
303            0x11 => Ok(Opcode::ListItemKey),
304            0x12 => Ok(Opcode::Prop),
305            _ => Err(IrError::InvalidOpcode(b)),
306        }
307    }
308}
309
310// ---------------------------------------------------------------------------
311// SlotType enum (5 types)
312// ---------------------------------------------------------------------------
313
314/// Data type hint for a slot entry.
315#[derive(Debug, Clone, Copy, PartialEq, Eq)]
316#[repr(u8)]
317pub enum SlotType {
318    Text = 0x01,
319    Bool = 0x02,
320    Number = 0x03,
321    Array = 0x04,
322    Object = 0x05,
323}
324
325impl SlotType {
326    /// Convert a raw byte to a `SlotType`.
327    pub fn from_byte(b: u8) -> Result<Self, IrError> {
328        match b {
329            0x01 => Ok(SlotType::Text),
330            0x02 => Ok(SlotType::Bool),
331            0x03 => Ok(SlotType::Number),
332            0x04 => Ok(SlotType::Array),
333            0x05 => Ok(SlotType::Object),
334            _ => Err(IrError::InvalidSlotType(b)),
335        }
336    }
337}
338
339// ---------------------------------------------------------------------------
340// IslandTrigger enum (4 triggers)
341// ---------------------------------------------------------------------------
342
343/// When an island should be hydrated.
344#[derive(Debug, Clone, Copy, PartialEq, Eq)]
345#[repr(u8)]
346pub enum IslandTrigger {
347    Load = 0x01,
348    Visible = 0x02,
349    Interaction = 0x03,
350    Idle = 0x04,
351}
352
353impl IslandTrigger {
354    /// Convert a raw byte to an `IslandTrigger`.
355    pub fn from_byte(b: u8) -> Result<Self, IrError> {
356        match b {
357            0x01 => Ok(IslandTrigger::Load),
358            0x02 => Ok(IslandTrigger::Visible),
359            0x03 => Ok(IslandTrigger::Interaction),
360            0x04 => Ok(IslandTrigger::Idle),
361            _ => Err(IrError::InvalidIslandTrigger(b)),
362        }
363    }
364}
365
366// ---------------------------------------------------------------------------
367// PropsMode enum (3 modes)
368// ---------------------------------------------------------------------------
369
370/// How island props are delivered.
371#[derive(Debug, Clone, Copy, PartialEq, Eq)]
372#[repr(u8)]
373pub enum PropsMode {
374    Inline = 0x01,
375    ScriptTag = 0x02,
376    Deferred = 0x03,
377}
378
379impl PropsMode {
380    /// Convert a raw byte to a `PropsMode`.
381    pub fn from_byte(b: u8) -> Result<Self, IrError> {
382        match b {
383            0x01 => Ok(PropsMode::Inline),
384            0x02 => Ok(PropsMode::ScriptTag),
385            0x03 => Ok(PropsMode::Deferred),
386            _ => Err(IrError::InvalidPropsMode(b)),
387        }
388    }
389}
390
391// ---------------------------------------------------------------------------
392// SlotSource enum (2 sources)
393// ---------------------------------------------------------------------------
394
395/// Where the slot value originates at runtime.
396#[derive(Debug, Clone, Copy, PartialEq, Eq)]
397#[repr(u8)]
398pub enum SlotSource {
399    Server = 0x00,
400    Client = 0x01,
401}
402
403impl SlotSource {
404    /// Convert a raw byte to a `SlotSource`.
405    pub fn from_byte(b: u8) -> Result<Self, IrError> {
406        match b {
407            0x00 => Ok(SlotSource::Server),
408            0x01 => Ok(SlotSource::Client),
409            _ => Err(IrError::InvalidSlotSource(b)),
410        }
411    }
412}
413
414// ---------------------------------------------------------------------------
415// SlotEntry
416// ---------------------------------------------------------------------------
417
418/// A slot declaration in the slot table.
419#[derive(Debug, Clone, PartialEq, Eq)]
420pub struct SlotEntry {
421    /// Unique slot identifier within the module.
422    pub slot_id: u16,
423    /// Index into the string table for the slot name.
424    pub name_str_idx: u32,
425    /// Expected data type for this slot.
426    pub type_hint: SlotType,
427    /// Where the slot value originates at runtime.
428    pub source: SlotSource,
429    /// Default value bytes (empty if no default).
430    pub default_bytes: Vec<u8>,
431}
432
433// ---------------------------------------------------------------------------
434// IslandEntry
435// ---------------------------------------------------------------------------
436
437/// An island declaration in the island table.
438#[derive(Debug, Clone, PartialEq, Eq)]
439pub struct IslandEntry {
440    /// Unique island identifier within the module.
441    pub id: u16,
442    /// When this island should hydrate.
443    pub trigger: IslandTrigger,
444    /// How props are delivered to the island.
445    pub props_mode: PropsMode,
446    /// Index into the string table for the island name.
447    pub name_str_idx: u32,
448    /// Byte offset of the ISLAND_START opcode in the bytecode stream.
449    pub byte_offset: u32,
450    /// Which slots belong to this island.
451    pub slot_ids: Vec<u16>,
452}
453
454// ---------------------------------------------------------------------------
455// Tests
456// ---------------------------------------------------------------------------
457
458#[cfg(test)]
459mod tests {
460    use super::*;
461
462    /// Build a valid 16-byte FMIR header.
463    fn make_header(version: u16, flags: u16, source_hash: u64) -> Vec<u8> {
464        let mut buf = Vec::with_capacity(HEADER_SIZE);
465        buf.extend_from_slice(MAGIC);
466        buf.extend_from_slice(&version.to_le_bytes());
467        buf.extend_from_slice(&flags.to_le_bytes());
468        buf.extend_from_slice(&source_hash.to_le_bytes());
469        buf
470    }
471
472    #[test]
473    fn parse_valid_header() {
474        let data = make_header(2, 0, 0xDEAD_BEEF_CAFE_BABE);
475        let hdr = IrHeader::parse(&data).unwrap();
476        assert_eq!(hdr.version, 2);
477        assert_eq!(hdr.flags, 0);
478        assert_eq!(hdr.source_hash, 0xDEAD_BEEF_CAFE_BABE);
479    }
480
481    #[test]
482    fn reject_bad_magic() {
483        let mut data = make_header(2, 0, 0);
484        data[0..4].copy_from_slice(b"NOPE");
485        let err = IrHeader::parse(&data).unwrap_err();
486        assert_eq!(err, IrError::BadMagic(*b"NOPE"));
487    }
488
489    #[test]
490    fn reject_unsupported_version() {
491        let data = make_header(99, 0, 0);
492        let err = IrHeader::parse(&data).unwrap_err();
493        assert_eq!(err, IrError::UnsupportedVersion(99));
494    }
495
496    /// Build a 32-byte section table from four (offset, size) pairs.
497    fn make_section_table(sections: [(u32, u32); 4]) -> Vec<u8> {
498        let mut buf = Vec::with_capacity(SECTION_TABLE_SIZE);
499        for (offset, size) in &sections {
500            buf.extend_from_slice(&offset.to_le_bytes());
501            buf.extend_from_slice(&size.to_le_bytes());
502        }
503        buf
504    }
505
506    #[test]
507    fn parse_section_table() {
508        let data = make_section_table([
509            (48, 100),  // bytecode
510            (148, 200), // string table
511            (348, 50),  // slot table
512            (398, 30),  // island table
513        ]);
514        let st = SectionTable::parse(&data).unwrap();
515        assert_eq!(
516            st.sections[0],
517            SectionDescriptor {
518                offset: 48,
519                size: 100
520            }
521        );
522        assert_eq!(
523            st.sections[1],
524            SectionDescriptor {
525                offset: 148,
526                size: 200
527            }
528        );
529        assert_eq!(
530            st.sections[2],
531            SectionDescriptor {
532                offset: 348,
533                size: 50
534            }
535        );
536        assert_eq!(
537            st.sections[3],
538            SectionDescriptor {
539                offset: 398,
540                size: 30
541            }
542        );
543    }
544
545    #[test]
546    fn validate_section_bounds() {
547        let data = make_section_table([
548            (48, 100),
549            (148, 200),
550            (348, 50),
551            (398, 9999), // way past end
552        ]);
553        let st = SectionTable::parse(&data).unwrap();
554        let err = st.validate(500).unwrap_err();
555        assert_eq!(
556            err,
557            IrError::SectionOutOfBounds {
558                section: 3,
559                offset: 398,
560                size: 9999,
561                file_len: 500,
562            }
563        );
564    }
565
566    #[test]
567    fn opcode_from_byte_all_valid() {
568        let expected = [
569            (0x01, Opcode::OpenTag),
570            (0x02, Opcode::CloseTag),
571            (0x03, Opcode::VoidTag),
572            (0x04, Opcode::Text),
573            (0x05, Opcode::DynText),
574            (0x06, Opcode::DynAttr),
575            (0x07, Opcode::ShowIf),
576            (0x08, Opcode::ShowElse),
577            (0x09, Opcode::Switch),
578            (0x0A, Opcode::List),
579            (0x0B, Opcode::IslandStart),
580            (0x0C, Opcode::IslandEnd),
581            (0x0D, Opcode::TryStart),
582            (0x0E, Opcode::Fallback),
583            (0x0F, Opcode::Preload),
584            (0x10, Opcode::Comment),
585            (0x11, Opcode::ListItemKey),
586            (0x12, Opcode::Prop),
587        ];
588        for (byte, op) in &expected {
589            assert_eq!(Opcode::from_byte(*byte).unwrap(), *op, "byte 0x{byte:02x}");
590        }
591    }
592
593    #[test]
594    fn opcode_from_byte_invalid() {
595        assert_eq!(
596            Opcode::from_byte(0x00).unwrap_err(),
597            IrError::InvalidOpcode(0x00)
598        );
599        assert_eq!(
600            Opcode::from_byte(0x13).unwrap_err(),
601            IrError::InvalidOpcode(0x13)
602        );
603        assert_eq!(
604            Opcode::from_byte(0xFF).unwrap_err(),
605            IrError::InvalidOpcode(0xFF)
606        );
607    }
608
609    #[test]
610    fn slot_type_from_byte() {
611        let expected = [
612            (0x01, SlotType::Text),
613            (0x02, SlotType::Bool),
614            (0x03, SlotType::Number),
615            (0x04, SlotType::Array),
616            (0x05, SlotType::Object),
617        ];
618        for (byte, st) in &expected {
619            assert_eq!(
620                SlotType::from_byte(*byte).unwrap(),
621                *st,
622                "byte 0x{byte:02x}"
623            );
624        }
625        assert_eq!(
626            SlotType::from_byte(0x00).unwrap_err(),
627            IrError::InvalidSlotType(0x00)
628        );
629        assert_eq!(
630            SlotType::from_byte(0x06).unwrap_err(),
631            IrError::InvalidSlotType(0x06)
632        );
633    }
634
635    #[test]
636    fn island_trigger_from_byte() {
637        let expected = [
638            (0x01, IslandTrigger::Load),
639            (0x02, IslandTrigger::Visible),
640            (0x03, IslandTrigger::Interaction),
641            (0x04, IslandTrigger::Idle),
642        ];
643        for (byte, trigger) in &expected {
644            assert_eq!(
645                IslandTrigger::from_byte(*byte).unwrap(),
646                *trigger,
647                "byte 0x{byte:02x}"
648            );
649        }
650        assert_eq!(
651            IslandTrigger::from_byte(0x00).unwrap_err(),
652            IrError::InvalidIslandTrigger(0x00)
653        );
654        assert_eq!(
655            IslandTrigger::from_byte(0x05).unwrap_err(),
656            IrError::InvalidIslandTrigger(0x05)
657        );
658    }
659
660    #[test]
661    fn props_mode_from_byte() {
662        let expected = [
663            (0x01, PropsMode::Inline),
664            (0x02, PropsMode::ScriptTag),
665            (0x03, PropsMode::Deferred),
666        ];
667        for (byte, mode) in &expected {
668            assert_eq!(
669                PropsMode::from_byte(*byte).unwrap(),
670                *mode,
671                "byte 0x{byte:02x}"
672            );
673        }
674        assert_eq!(
675            PropsMode::from_byte(0x00).unwrap_err(),
676            IrError::InvalidPropsMode(0x00)
677        );
678        assert_eq!(
679            PropsMode::from_byte(0x04).unwrap_err(),
680            IrError::InvalidPropsMode(0x04)
681        );
682    }
683
684    #[test]
685    fn slot_source_from_byte() {
686        assert_eq!(SlotSource::from_byte(0x00).unwrap(), SlotSource::Server);
687        assert_eq!(SlotSource::from_byte(0x01).unwrap(), SlotSource::Client);
688        assert_eq!(
689            SlotSource::from_byte(0x02).unwrap_err(),
690            IrError::InvalidSlotSource(0x02)
691        );
692        assert_eq!(
693            SlotSource::from_byte(0xFF).unwrap_err(),
694            IrError::InvalidSlotSource(0xFF)
695        );
696    }
697}