mech_core/program/compiler/
sections.rs

1use crate::*;
2use super::*;
3
4// Byetecode Compiler
5// ============================================================================
6
7// Format:
8// 1. Header
9// 2. Features
10// 3. Types
11// 4. Constants
12// 5. Symbols
13// 6. Instructions
14// 7. Dictionary
15
16// 1. Header
17// ----------------------------------------------------------------------------
18
19#[repr(C)]
20#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
21#[derive(Debug, Clone, Eq, PartialEq)]
22pub struct ByteCodeHeader {
23  pub magic:        [u8; 4],   // e.g., b"MECH"
24  pub version:        u8,      // bytecode format version
25  pub mech_ver:       u16,     // Mech language version
26  pub flags:          u16,     // reserved/feature bit
27  pub reg_count:      u32,     // total virtual registers used
28  pub instr_count:    u32,     // number of instructions
29  
30  pub feature_count:  u32,     // number of feature flags  
31  pub feature_off:    u64,     // offset to feature flags (array of u64)
32
33  pub types_count:    u32,     // number of types
34  pub types_off:      u64,     // offset to type section
35
36  pub const_count:    u32,     // number of constants (entries
37  pub const_tbl_off:  u64,     // offset to constant table (array of entries)
38  pub const_tbl_len:  u64,     // bytes in constant table area (entries only)
39  pub const_blob_off: u64,     // offset to raw constant blob data
40  pub const_blob_len: u64,     // bytes in blob (payloads
41
42  pub symbols_len:    u64,     // number of symbols
43  pub symbols_off:    u64,     // offset to symbol section
44                               
45  pub instr_off:      u64,     // offset to instruction stream
46  pub instr_len:      u64,     // bytes of instruction stream
47
48  pub dict_off:       u64,     // offset to dictionary
49  pub dict_len:       u64,     // bytes in dictionary
50
51  pub reserved:       u32,     // pad/alignment
52}
53
54impl ByteCodeHeader {
55  // Header byte size when serialized. This is the number of bytes `write_to` will write.
56  // (Computed from the sum of sizes of each field written in little-endian.)
57  pub const HEADER_SIZE: usize = 4  // magic
58    + 1   // version
59    + 2   // mech_ver
60    + 2   // flags
61    + 4   // reg_count
62    + 4   // instr_count
63    + 4   // feature_count
64    + 8   // feature_off
65    + 4   // types_count
66    + 8   // types_off
67    + 4   // const_count
68    + 8   // const_tbl_off
69    + 8   // const_tbl_len
70    + 8   // const_blob_off
71    + 8   // const_blob_len
72    + 8   // symbols_len
73    + 8   // symbosl_off
74    + 8   // instr_off
75    + 8   // instr_len
76    + 8   // dict_off
77    + 8   // dict_len
78    + 4;  // reserved
79
80  // Serialize header using little-endian encoding.
81  pub fn write_to(&self, w: &mut impl Write) -> MResult<()> {
82    // magic (4 bytes)
83    w.write_all(&self.magic)?;
84
85    // small fields
86    w.write_u8(self.version)?;
87    w.write_u16::<LittleEndian>(self.mech_ver)?;
88    w.write_u16::<LittleEndian>(self.flags)?;
89
90    // counts
91    w.write_u32::<LittleEndian>(self.reg_count)?;
92    w.write_u32::<LittleEndian>(self.instr_count)?;
93
94    // features (count + offset)
95    w.write_u32::<LittleEndian>(self.feature_count)?;
96    w.write_u64::<LittleEndian>(self.feature_off)?;
97
98    // types
99    w.write_u32::<LittleEndian>(self.types_count)?;
100    w.write_u64::<LittleEndian>(self.types_off)?;
101
102    // constants table / blob
103    w.write_u32::<LittleEndian>(self.const_count)?;
104    w.write_u64::<LittleEndian>(self.const_tbl_off)?;
105    w.write_u64::<LittleEndian>(self.const_tbl_len)?;
106    w.write_u64::<LittleEndian>(self.const_blob_off)?;
107    w.write_u64::<LittleEndian>(self.const_blob_len)?;
108
109    // symbols
110    w.write_u64::<LittleEndian>(self.symbols_len)?;
111    w.write_u64::<LittleEndian>(self.symbols_off)?;
112
113    // instructions
114    w.write_u64::<LittleEndian>(self.instr_off)?;
115    w.write_u64::<LittleEndian>(self.instr_len)?;
116
117    // dictionary
118    w.write_u64::<LittleEndian>(self.dict_off)?;
119    w.write_u64::<LittleEndian>(self.dict_len)?;
120
121    // footer
122    w.write_u32::<LittleEndian>(self.reserved)?;
123    Ok(())
124  }
125
126  // Read a header. Expects the same layout as `write_to`.
127  pub fn read_from(r: &mut impl Read) -> MResult<Self> {
128    let mut magic = [0u8; 4];
129    r.read_exact(&mut magic)?;
130
131    let version = r.read_u8()?;
132    let mech_ver = r.read_u16::<LittleEndian>()?;
133    let flags = r.read_u16::<LittleEndian>()?;
134
135    let reg_count = r.read_u32::<LittleEndian>()?;
136    let instr_count = r.read_u32::<LittleEndian>()?;
137
138    let feature_count = r.read_u32::<LittleEndian>()?;
139    let feature_off = r.read_u64::<LittleEndian>()?;
140
141    let types_count = r.read_u32::<LittleEndian>()?;
142    let types_off = r.read_u64::<LittleEndian>()?;
143
144    let const_count = r.read_u32::<LittleEndian>()?;
145    let const_tbl_off = r.read_u64::<LittleEndian>()?;
146    let const_tbl_len = r.read_u64::<LittleEndian>()?;
147    let const_blob_off = r.read_u64::<LittleEndian>()?;
148    let const_blob_len = r.read_u64::<LittleEndian>()?;
149
150    let symbols_len = r.read_u64::<LittleEndian>()?;
151    let symbols_off = r.read_u64::<LittleEndian>()?;
152
153    let instr_off = r.read_u64::<LittleEndian>()?;
154    let instr_len = r.read_u64::<LittleEndian>()?;
155
156    let dict_off = r.read_u64::<LittleEndian>()?;
157    let dict_len = r.read_u64::<LittleEndian>()?;
158
159    let reserved = r.read_u32::<LittleEndian>()?;
160
161    Ok(Self {
162      magic,
163      version,
164      mech_ver,
165      flags,
166      reg_count,
167      instr_count,
168      feature_count,
169      feature_off,
170      types_count,
171      types_off,
172      const_count,
173      const_tbl_off,
174      const_tbl_len,
175      const_blob_off,
176      const_blob_len,
177      instr_off,
178      instr_len,
179      symbols_len,
180      symbols_off,
181      dict_off,
182      dict_len,
183      reserved,
184    })
185  }
186
187  // Quick check: does the header magic match the expected magic?
188  pub fn validate_magic(&self, expected: &[u8;4]) -> bool {
189    &self.magic == expected
190  }
191}
192
193// 2. Features
194// ----------------------------------------------------------------------------
195
196#[repr(u16)]
197#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
198pub enum FeatureKind {
199  I8=1, I16, I32, I64, I128,
200  U8, U16, U32, U64, U128,
201  F32, F64, C64, R64, Index,
202  String, Bool, Atom,
203  Set, Map, Table, Tuple, Record, Enum,
204  VariableDefine, VariableAssign, KindDefine,
205  KindAnnotation, SubscriptRange, SubscriptFormula,
206  RangeInclusive, RangeExclusive,
207  DotIndexing, Swizzle, LogicalIndexing,
208  Matrix1, Matrix2, Matrix3, Matrix4,
209  Matrix2x3, Matrix3x2,
210  RowVector2, RowVector3, RowVector4,
211  Vector2, Vector3, Vector4,
212  VectorD, MatrixD, RowVectorD,
213  HorzCat, VertCat,
214  Compiler, PrettyPrint, Serde,
215  MatMul, Transpose, Dot, Cross,
216  Add, Sub, Mul, Div, Exp, Mod, Neg, OpAssign,
217  LT, LTE, GT, GTE, EQ, NEQ,
218  And, Or, Xor, Not,
219  Convert, Assign, Access,
220  Functions, Formulas,
221  Custom = 0xFFFF,
222}
223
224#[derive(Debug, Clone, PartialEq, Eq, Hash)]
225pub enum FeatureFlag {
226  Builtin(FeatureKind),
227  Custom(u64),
228}
229
230impl FeatureFlag {
231  pub fn as_u64(&self) -> u64 {
232    match self {
233      FeatureFlag::Builtin(f) => *f as u64,
234      FeatureFlag::Custom(c) => *c,
235    }
236  }
237}
238
239// 3. Type Section
240// ----------------------------------------------------------------------------
241
242#[repr(u16)]
243#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
244#[derive(Debug, Clone, Copy, Eq, PartialEq)]
245pub enum TypeTag {
246  U8=1, U16, U32, U64, U128, I8, I16, I32, I64, I128,
247  F32, F64, C64, R64, String, Bool, Id, Index, Empty, Any,
248  MatrixU8, MatrixU16, MatrixU32, MatrixU64, MatrixU128,
249  MatrixI8, MatrixI16, MatrixI32, MatrixI64, MatrixI128,
250  MatrixF32, MatrixF64, MatrixC64, MatrixR64, MatrixBool, 
251  MatrixString, MatrixIndex,
252  EnumTag, Record, Map, Atom, 
253  Table, Tuple, Reference, Set, OptionT,
254}
255
256impl TypeTag {
257  pub fn from_u16(tag: u16) -> Option<Self> {
258    match tag {
259      1 => Some(TypeTag::U8), 2 => Some(TypeTag::U16), 3 => Some(TypeTag::U32), 4 => Some(TypeTag::U64), 5 => Some(TypeTag::U128),
260      6 => Some(TypeTag::I8), 7 => Some(TypeTag::I16), 8 => Some(TypeTag::I32), 9 => Some(TypeTag::I64), 10 => Some(TypeTag::I128),
261      11 => Some(TypeTag::F32), 12 => Some(TypeTag::F64), 13 => Some(TypeTag::C64), 14 => Some(TypeTag::R64),
262      15 => Some(TypeTag::String), 16 => Some(TypeTag::Bool), 17 => Some(TypeTag::Id), 18 => Some(TypeTag::Index), 19 => Some(TypeTag::Empty), 20 => Some(TypeTag::Any),
263      21 => Some(TypeTag::MatrixU8), 22 => Some(TypeTag::MatrixU16), 23 => Some(TypeTag::MatrixU32), 24 => Some(TypeTag::MatrixU64), 25 => Some(TypeTag::MatrixU128),
264      26 => Some(TypeTag::MatrixI8), 27 => Some(TypeTag::MatrixI16), 28 => Some(TypeTag::MatrixI32), 29 => Some(TypeTag::MatrixI64), 30 => Some(TypeTag::MatrixI128),
265      31 => Some(TypeTag::MatrixF32), 32 => Some(TypeTag::MatrixF64), 33 => Some(TypeTag::MatrixC64), 34 => Some(TypeTag::MatrixR64), 35 => Some(TypeTag::MatrixBool), 
266      36 => Some(TypeTag::MatrixString), 37 => Some(TypeTag::MatrixIndex),
267      38 => Some(TypeTag::EnumTag), 39 => Some(TypeTag::Record), 40 => Some(TypeTag::Map), 41 => Some(TypeTag::Atom), 
268      42 => Some(TypeTag::Table), 43 => Some(TypeTag::Tuple), 44 => Some(TypeTag::Reference), 45 => Some(TypeTag::Set), 46 => Some(TypeTag::OptionT),
269      _ => None,
270    }
271  }
272}
273
274#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
275#[derive(Debug, Clone, Eq, PartialEq)]
276pub struct TypeEntry {
277  pub tag: TypeTag,
278  pub bytes: Vec<u8>,
279}
280impl TypeEntry {
281  pub fn byte_len(&self) -> u64 {
282    2 + self.bytes.len() as u64
283  }
284}
285
286pub type TypeId = u32;
287
288#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
289#[derive(Default, Debug, Clone, Eq, PartialEq)]
290pub struct TypeSection {
291  pub interner: HashMap<ValueKind, TypeId>,
292  pub entries:  Vec<TypeEntry>, // index is TypeId
293}
294    
295impl TypeSection {
296
297  pub fn new() -> Self {
298    Self { interner: HashMap::new(), entries: Vec::new() }
299  }
300
301  pub fn get_or_intern(&mut self, vk: &ValueKind) -> TypeId {
302    if let Some(id) = self.interner.get(vk) { return *id; }
303    // recursively intern children and build payload
304    let (tag, mut bytes) = encode_value_kind(self, vk);
305    let id = self.entries.len() as u32;
306    self.entries.push(TypeEntry { tag, bytes });
307    self.interner.insert(vk.clone(), id);
308    id
309  }
310
311  pub fn write_to(&self, w: &mut impl Write) -> MResult<()> {
312    w.write_u32::<LittleEndian>(self.entries.len() as u32)?;
313    for e in &self.entries {
314      w.write_u16::<LittleEndian>(e.tag as u16)?;
315      w.write_u16::<LittleEndian>(0)?;
316      w.write_u32::<LittleEndian>(1)?;
317      w.write_u32::<LittleEndian>(e.bytes.len() as u32)?;
318      w.write_all(&e.bytes)?;
319    }
320    Ok(())
321  }
322
323  pub fn byte_len(&self) -> u64 {
324    4 + self.entries.iter().map(|e| 12 + e.bytes.len() as u64).sum::<u64>()
325  }
326}
327
328// 4. Constants
329// ----------------------------------------------------------------------------
330
331#[repr(u8)]
332#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
333#[derive(Debug, Clone, Copy, Eq, PartialEq)]
334pub enum ConstEncoding { 
335  Inline = 1 
336}
337
338#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
339#[derive(Debug, Clone, Eq, PartialEq)]
340pub struct ConstEntry {
341  pub type_id: u32,
342  pub enc:     ConstEncoding,
343  pub align:   u8,
344  pub flags:   u8,
345  pub reserved:u16,
346  pub offset:  u64,
347  pub length:  u64,
348}
349
350impl ConstEntry {
351  pub fn write_to(&self, w: &mut impl Write) -> MResult<()> {
352    w.write_u32::<LittleEndian>(self.type_id)?;
353    w.write_u8(self.enc as u8)?;
354    w.write_u8(self.align)?;
355    w.write_u8(self.flags)?;
356    w.write_u8(0)?; // pad to 4 bytes for the small fields
357    w.write_u64::<LittleEndian>(self.offset)?;
358    w.write_u64::<LittleEndian>(self.length)?;
359    Ok(())
360  }
361  pub fn byte_len() -> u64 { 4 + 1 + 1 + 1 + 1 + 8 + 8 } // = 24 bytes
362}
363
364// 5. Symbol Table
365// ----------------------------------------------------------------------------
366
367pub struct SymbolEntry {
368  pub id: u64,          // unique identifier for the symbol
369  pub mutable: bool,
370  pub reg: Register,    // register index this symbol maps to
371}
372
373impl SymbolEntry {
374
375  pub fn new(id: u64, mutable: bool, reg: Register) -> Self {
376    Self { id, mutable, reg }
377  }
378
379  pub fn write_to(&self, w: &mut impl Write) -> MResult<()> {
380    w.write_u64::<LittleEndian>(self.id)?;
381    w.write_u8(if self.mutable { 1 } else { 0 })?;
382    w.write_u32::<LittleEndian>(self.reg)?;
383    Ok(())
384  }
385}
386
387// 6. Instruction Encoding (fixed forms)
388// ----------------------------------------------------------------------------
389
390#[repr(u8)]
391#[derive(Debug, Clone, Copy, PartialEq, Eq)]
392pub enum OpCode {
393  ConstLoad = 0x01,
394  NullOp    = 0x10,
395  Unop      = 0x20,
396  Binop     = 0x30,
397  Ternop    = 0x40,
398  Quadop    = 0x50,
399  VarArg    = 0x60,
400  Return    = 0xFF,
401}
402
403impl OpCode {
404  pub fn from_u8(num: u8) -> Option<OpCode> {
405    match num {
406      0x01 => Some(OpCode::ConstLoad),
407      0x10 => Some(OpCode::NullOp),
408      0x20 => Some(OpCode::Unop),
409      0x30 => Some(OpCode::Binop),
410      0x40 => Some(OpCode::Ternop),
411      0x50 => Some(OpCode::Quadop),
412      0x60 => Some(OpCode::VarArg),
413      0xFF => Some(OpCode::Return),
414      _    => None,
415    }
416  }
417}
418
419#[derive(Debug, Clone)]
420pub enum EncodedInstr {
421  ConstLoad { dst: u32, const_id: u32 },                               // [u64 opcode][u32 dst][u32 const_id]
422  NullOp    { fxn_id: u64, dst: u32 },                                 // [u64 opcode][u64 fxn_id][u32 dst]
423  UnOp      { fxn_id: u64, dst: u32, src: u32 },                       // [u64 opcode][u32 dst][u32 src]
424  BinOp     { fxn_id: u64, dst: u32, lhs: u32, rhs: u32 },             // [u64 opcode][u32 dst][u32 lhs][u32 rhs]
425  TernOp    { fxn_id: u64, dst: u32, a: u32, b: u32, c: u32 },         // [u64 opcode][u32 dst][u32 a][u32 b][u32 c]
426  QuadOp    { fxn_id: u64, dst: u32, a: u32, b: u32, c: u32, d: u32 }, // [u64 opcode][u32 dst][u32 a][u32 b][u32 c][u32 d]
427  VarArg    { fxn_id: u64, dst: u32, args: Vec<u32> },                 // [u64 opcode][u64 fxn_id][u32 dst][u32 arg_count][u32 args...]
428  Ret       { src: u32 },                                              // [u64 opcode][u32 src]
429}
430
431impl EncodedInstr {
432  pub fn byte_len(&self) -> u64 {
433    match self {
434      EncodedInstr::ConstLoad{..} => 1 + 4 + 4,
435      EncodedInstr::NullOp{..}    => 1 + 8 + 4,
436      EncodedInstr::UnOp{..}      => 1 + 8 + 4 + 4,
437      EncodedInstr::BinOp{..}     => 1 + 8 + 4 + 4 + 4,
438      EncodedInstr::TernOp{..}    => 1 + 8 + 4 + 4 + 4 + 4,
439      EncodedInstr::QuadOp{..}    => 1 + 8 + 4 + 4 + 4 + 4 + 4,
440      EncodedInstr::VarArg{ args, .. } => 1 + 8 + 4 + 4 + (4 * args.len() as u64),
441      EncodedInstr::Ret{..}       => 1 + 4,
442    }
443  }
444  pub fn write_to(&self, w: &mut impl Write) -> MResult<()> {
445    match self {
446      EncodedInstr::ConstLoad{ dst, const_id } => {
447        w.write_u8(OpCode::ConstLoad as u8)?;
448        w.write_u32::<LittleEndian>(*dst)?;
449        w.write_u32::<LittleEndian>(*const_id)?;
450      }
451      EncodedInstr::NullOp{ fxn_id, dst } => {
452        w.write_u8(OpCode::NullOp as u8)?;
453        w.write_u64::<LittleEndian>(*fxn_id)?;
454        w.write_u32::<LittleEndian>(*dst)?;
455      }
456      EncodedInstr::UnOp{ fxn_id, dst, src } => {
457        w.write_u8(OpCode::Unop as u8)?;
458        w.write_u64::<LittleEndian>(*fxn_id)?;
459        w.write_u32::<LittleEndian>(*dst)?;
460        w.write_u32::<LittleEndian>(*src)?;
461      }
462      EncodedInstr::BinOp{ fxn_id, dst, lhs, rhs } => {
463        w.write_u8(OpCode::Binop as u8)?;
464        w.write_u64::<LittleEndian>(*fxn_id)?;
465        w.write_u32::<LittleEndian>(*dst)?;
466        w.write_u32::<LittleEndian>(*lhs)?;
467        w.write_u32::<LittleEndian>(*rhs)?;
468      }
469      EncodedInstr::TernOp{ fxn_id, dst, a, b, c } => {
470        w.write_u8(OpCode::Ternop as u8)?;
471        w.write_u64::<LittleEndian>(*fxn_id)?;
472        w.write_u32::<LittleEndian>(*dst)?;
473        w.write_u32::<LittleEndian>(*a)?;
474        w.write_u32::<LittleEndian>(*b)?;
475        w.write_u32::<LittleEndian>(*c)?;
476      }
477      EncodedInstr::QuadOp{ fxn_id, dst, a, b, c, d } => {
478        w.write_u8(OpCode::Quadop as u8)?;
479        w.write_u64::<LittleEndian>(*fxn_id)?;
480        w.write_u32::<LittleEndian>(*dst)?;
481        w.write_u32::<LittleEndian>(*a)?;
482        w.write_u32::<LittleEndian>(*b)?;
483        w.write_u32::<LittleEndian>(*c)?;
484        w.write_u32::<LittleEndian>(*d)?;
485      }
486      EncodedInstr::VarArg{ fxn_id, dst, args } => {
487        w.write_u8(OpCode::VarArg as u8)?;
488        w.write_u64::<LittleEndian>(*fxn_id)?;
489        w.write_u32::<LittleEndian>(*dst)?;
490        w.write_u32::<LittleEndian>(args.len() as u32)?;
491        for a in args {
492          w.write_u32::<LittleEndian>(*a)?;
493        }
494      }
495      EncodedInstr::Ret{ src } => {
496        w.write_u8(OpCode::Return as u8)?;
497        w.write_u32::<LittleEndian>(*src)?;
498      }
499    }
500    Ok(())
501  }
502}
503
504// 7. Dictionary
505// ----------------------------------------------------------------------------
506
507#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
508#[derive(Debug, Clone, Eq, PartialEq)]
509pub struct DictEntry {
510  pub id: u64,          // unique identifier for the dictionary entry
511  pub name: String,     // name of the entry
512} 
513
514impl DictEntry {
515  pub fn new(id: u64, name: &str) -> Self {
516    Self { id, name: name.to_string() }
517  }
518
519  pub fn write_to(&self, w: &mut impl Write) -> MResult<()> {
520    w.write_u64::<LittleEndian>(self.id)?;
521    let name_bytes = self.name.as_bytes();
522    w.write_u32::<LittleEndian>(name_bytes.len() as u32)?;
523    w.write_all(name_bytes)?;
524    Ok(())
525  }
526}
527
528