mech_core/program/compiler/
sections.rs

1use crate::*;
2use super::*;
3
4// Byetecode Compiler
5// ============================================================================
6
7// Format:
8// 1. Header
9// 2. Features
10// 3. Types
11// 4. Constants
12// 5. Symbols
13// 6. Instructions
14// 7. Dictionary
15
16// 1. Header
17// ----------------------------------------------------------------------------
18
19#[repr(C)]
20#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
21#[derive(Debug, Clone, Eq, PartialEq)]
22pub struct ByteCodeHeader {
23  pub magic:        [u8; 4],   // e.g., b"MECH"
24  pub version:        u8,      // bytecode format version
25  pub mech_ver:       u16,     // Mech language version
26  pub flags:          u16,     // reserved/feature bit
27  pub reg_count:      u32,     // total virtual registers used
28  pub instr_count:    u32,     // number of instructions
29  
30  pub feature_count:  u32,     // number of feature flags  
31  pub feature_off:    u64,     // offset to feature flags (array of u64)
32
33  pub types_count:    u32,     // number of types
34  pub types_off:      u64,     // offset to type section
35
36  pub const_count:    u32,     // number of constants (entries
37  pub const_tbl_off:  u64,     // offset to constant table (array of entries)
38  pub const_tbl_len:  u64,     // bytes in constant table area (entries only)
39  pub const_blob_off: u64,     // offset to raw constant blob data
40  pub const_blob_len: u64,     // bytes in blob (payloads
41
42  pub symbols_len:    u64,     // number of symbols
43  pub symbols_off:    u64,     // offset to symbol section
44                               
45  pub instr_off:      u64,     // offset to instruction stream
46  pub instr_len:      u64,     // bytes of instruction stream
47
48  pub dict_off:       u64,     // offset to dictionary
49  pub dict_len:       u64,     // bytes in dictionary
50
51  pub reserved:       u32,     // pad/alignment
52}
53
54impl ByteCodeHeader {
55  // Header byte size when serialized. This is the number of bytes `write_to` will write.
56  // (Computed from the sum of sizes of each field written in little-endian.)
57  pub const HEADER_SIZE: usize = 4  // magic
58    + 1   // version
59    + 2   // mech_ver
60    + 2   // flags
61    + 4   // reg_count
62    + 4   // instr_count
63    + 4   // feature_count
64    + 8   // feature_off
65    + 4   // types_count
66    + 8   // types_off
67    + 4   // const_count
68    + 8   // const_tbl_off
69    + 8   // const_tbl_len
70    + 8   // const_blob_off
71    + 8   // const_blob_len
72    + 8   // symbols_len
73    + 8   // symbosl_off
74    + 8   // instr_off
75    + 8   // instr_len
76    + 8   // dict_off
77    + 8   // dict_len
78    + 4;  // reserved
79
80  // Serialize header using little-endian encoding.
81  pub fn write_to(&self, w: &mut impl Write) -> MResult<()> {
82    // magic (4 bytes)
83    w.write_all(&self.magic)?;
84
85    // small fields
86    w.write_u8(self.version)?;
87    w.write_u16::<LittleEndian>(self.mech_ver)?;
88    w.write_u16::<LittleEndian>(self.flags)?;
89
90    // counts
91    w.write_u32::<LittleEndian>(self.reg_count)?;
92    w.write_u32::<LittleEndian>(self.instr_count)?;
93
94    // features (count + offset)
95    w.write_u32::<LittleEndian>(self.feature_count)?;
96    w.write_u64::<LittleEndian>(self.feature_off)?;
97
98    // types
99    w.write_u32::<LittleEndian>(self.types_count)?;
100    w.write_u64::<LittleEndian>(self.types_off)?;
101
102    // constants table / blob
103    w.write_u32::<LittleEndian>(self.const_count)?;
104    w.write_u64::<LittleEndian>(self.const_tbl_off)?;
105    w.write_u64::<LittleEndian>(self.const_tbl_len)?;
106    w.write_u64::<LittleEndian>(self.const_blob_off)?;
107    w.write_u64::<LittleEndian>(self.const_blob_len)?;
108
109    // symbols
110    w.write_u64::<LittleEndian>(self.symbols_len)?;
111    w.write_u64::<LittleEndian>(self.symbols_off)?;
112
113    // instructions
114    w.write_u64::<LittleEndian>(self.instr_off)?;
115    w.write_u64::<LittleEndian>(self.instr_len)?;
116
117    // dictionary
118    w.write_u64::<LittleEndian>(self.dict_off)?;
119    w.write_u64::<LittleEndian>(self.dict_len)?;
120
121    // footer
122    w.write_u32::<LittleEndian>(self.reserved)?;
123    Ok(())
124  }
125
126  // Read a header. Expects the same layout as `write_to`.
127  pub fn read_from(r: &mut impl Read) -> MResult<Self> {
128    let mut magic = [0u8; 4];
129    r.read_exact(&mut magic)?;
130
131    let version = r.read_u8()?;
132    let mech_ver = r.read_u16::<LittleEndian>()?;
133    let flags = r.read_u16::<LittleEndian>()?;
134
135    let reg_count = r.read_u32::<LittleEndian>()?;
136    let instr_count = r.read_u32::<LittleEndian>()?;
137
138    let feature_count = r.read_u32::<LittleEndian>()?;
139    let feature_off = r.read_u64::<LittleEndian>()?;
140
141    let types_count = r.read_u32::<LittleEndian>()?;
142    let types_off = r.read_u64::<LittleEndian>()?;
143
144    let const_count = r.read_u32::<LittleEndian>()?;
145    let const_tbl_off = r.read_u64::<LittleEndian>()?;
146    let const_tbl_len = r.read_u64::<LittleEndian>()?;
147    let const_blob_off = r.read_u64::<LittleEndian>()?;
148    let const_blob_len = r.read_u64::<LittleEndian>()?;
149
150    let symbols_len = r.read_u64::<LittleEndian>()?;
151    let symbols_off = r.read_u64::<LittleEndian>()?;
152
153    let instr_off = r.read_u64::<LittleEndian>()?;
154    let instr_len = r.read_u64::<LittleEndian>()?;
155
156    let dict_off = r.read_u64::<LittleEndian>()?;
157    let dict_len = r.read_u64::<LittleEndian>()?;
158
159    let reserved = r.read_u32::<LittleEndian>()?;
160
161    Ok(Self {
162      magic,
163      version,
164      mech_ver,
165      flags,
166      reg_count,
167      instr_count,
168      feature_count,
169      feature_off,
170      types_count,
171      types_off,
172      const_count,
173      const_tbl_off,
174      const_tbl_len,
175      const_blob_off,
176      const_blob_len,
177      instr_off,
178      instr_len,
179      symbols_len,
180      symbols_off,
181      dict_off,
182      dict_len,
183      reserved,
184    })
185  }
186
187  // Quick check: does the header magic match the expected magic?
188  pub fn validate_magic(&self, expected: &[u8;4]) -> bool {
189    &self.magic == expected
190  }
191}
192
193// 2. Features
194// ----------------------------------------------------------------------------
195
196#[repr(u16)]
197#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
198pub enum FeatureKind {
199  I8=1, I16, I32, I64, I128,
200  U8, U16, U32, U64, U128,
201  F32, F64, C64, R64,
202  String, Bool,
203  Set, Map, Table, Tuple, Record, Enum,
204  VariableDefine, VariableAssign, KindDefine,
205  KindAnnotation, SubscriptRange, SubscriptFormula,
206  DotIndexing, Swizzle, 
207  Matrix1, Matrix2, Matrix3, Matrix4,
208  Matrix2x3, Matrix3x2,
209  RowVector2, RowVector3, RowVector4,
210  Vector2, Vector3, Vector4,
211  VectorD, MatrixD, RowVectorD,
212  HorzCat, VertCat,
213  Compiler, PrettyPrint, Serde,
214  MatMul, Transpose, Dot, Cross,
215  Add, Sub, Mul, Div,
216  LT, LTE, GT, GTE, EQ, NEQ,
217  And, Or, Xor, Not,
218  Custom = 0xFFFF,
219}
220
221#[derive(Debug, Clone, PartialEq, Eq, Hash)]
222pub enum FeatureFlag {
223  Builtin(FeatureKind),
224  Custom(u64),
225}
226
227impl FeatureFlag {
228  pub fn as_u64(&self) -> u64 {
229    match self {
230      FeatureFlag::Builtin(f) => *f as u64,
231      FeatureFlag::Custom(c) => *c,
232    }
233  }
234}
235
236// 3. Type Section
237// ----------------------------------------------------------------------------
238
239#[repr(u16)]
240#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
241#[derive(Debug, Clone, Copy, Eq, PartialEq)]
242pub enum TypeTag {
243  U8=1, U16, U32, U64, U128, I8, I16, I32, I64, I128,
244  F32, F64, ComplexNumber, RationalNumber, String, Bool, Id, Index, Empty, Any,
245  Matrix, EnumTag, Record, Map, Atom, Table, Tuple, Reference, Set, OptionT,
246}
247
248impl TypeTag {
249  pub fn from_u16(tag: u16) -> Option<Self> {
250    match tag {
251      1 => Some(TypeTag::U8), 2 => Some(TypeTag::U16), 3 => Some(TypeTag::U32),
252      4 => Some(TypeTag::U64), 5 => Some(TypeTag::U128), 6 => Some(TypeTag::I8),
253      7 => Some(TypeTag::I16), 8 => Some(TypeTag::I32), 9 => Some(TypeTag::I64),
254      10 => Some(TypeTag::I128), 11 => Some(TypeTag::F32), 12 => Some(TypeTag::F64),
255      13 => Some(TypeTag::ComplexNumber), 14 => Some(TypeTag::RationalNumber),
256      15 => Some(TypeTag::String), 16 => Some(TypeTag::Bool),
257      17 => Some(TypeTag::Id), 18 => Some(TypeTag::Index),
258      19 => Some(TypeTag::Empty), 20 => Some(TypeTag::Any),
259      21 => Some(TypeTag::Matrix), 22 => Some(TypeTag::EnumTag),
260      23 => Some(TypeTag::Record), 24 => Some(TypeTag::Map),
261      25 => Some(TypeTag::Atom), 26 => Some(TypeTag::Table),
262      27 => Some(TypeTag::Tuple), 28 => Some(TypeTag::Reference),
263      29 => Some(TypeTag::Set), 30 => Some(TypeTag::OptionT),
264      _ => None,
265    }
266  }
267}
268
269#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
270#[derive(Debug, Clone, Eq, PartialEq)]
271pub struct TypeEntry {
272  pub tag: TypeTag,
273  pub bytes: Vec<u8>,
274}
275impl TypeEntry {
276  pub fn byte_len(&self) -> u64 {
277    2 + self.bytes.len() as u64
278  }
279}
280
281pub type TypeId = u32;
282
283#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
284#[derive(Default, Debug, Clone, Eq, PartialEq)]
285pub struct TypeSection {
286  pub interner: HashMap<ValueKind, TypeId>,
287  pub entries:  Vec<TypeEntry>, // index is TypeId
288}
289    
290impl TypeSection {
291
292  pub fn new() -> Self {
293    Self { interner: HashMap::new(), entries: Vec::new() }
294  }
295
296  pub fn get_or_intern(&mut self, vk: &ValueKind) -> TypeId {
297    if let Some(id) = self.interner.get(vk) { return *id; }
298    // recursively intern children and build payload
299    let (tag, mut bytes) = encode_value_kind(self, vk);
300    let id = self.entries.len() as u32;
301    self.entries.push(TypeEntry { tag, bytes });
302    self.interner.insert(vk.clone(), id);
303    id
304  }
305
306  pub fn write_to(&self, w: &mut impl Write) -> MResult<()> {
307    w.write_u32::<LittleEndian>(self.entries.len() as u32)?;
308    for e in &self.entries {
309      w.write_u16::<LittleEndian>(e.tag as u16)?;
310      w.write_u16::<LittleEndian>(0)?;
311      w.write_u32::<LittleEndian>(1)?;
312      w.write_u32::<LittleEndian>(e.bytes.len() as u32)?;
313      w.write_all(&e.bytes)?;
314    }
315    Ok(())
316  }
317
318  pub fn byte_len(&self) -> u64 {
319    4 + self.entries.iter().map(|e| 12 + e.bytes.len() as u64).sum::<u64>()
320  }
321}
322
323// 4. Constants
324// ----------------------------------------------------------------------------
325
326#[repr(u8)]
327#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
328#[derive(Debug, Clone, Copy, Eq, PartialEq)]
329pub enum ConstEncoding { 
330  Inline = 1 
331}
332
333#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
334#[derive(Debug, Clone, Eq, PartialEq)]
335pub struct ConstEntry {
336  pub type_id: u32,
337  pub enc:     ConstEncoding,
338  pub align:   u8,
339  pub flags:   u8,
340  pub reserved:u16,
341  pub offset:  u64,
342  pub length:  u64,
343}
344
345impl ConstEntry {
346  pub fn write_to(&self, w: &mut impl Write) -> MResult<()> {
347    w.write_u32::<LittleEndian>(self.type_id)?;
348    w.write_u8(self.enc as u8)?;
349    w.write_u8(self.align)?;
350    w.write_u8(self.flags)?;
351    w.write_u8(0)?; // pad to 4 bytes for the small fields
352    w.write_u64::<LittleEndian>(self.offset)?;
353    w.write_u64::<LittleEndian>(self.length)?;
354    Ok(())
355  }
356  pub fn byte_len() -> u64 { 4 + 1 + 1 + 1 + 1 + 8 + 8 } // = 24 bytes
357}
358
359// 5. Symbol Table
360// ----------------------------------------------------------------------------
361
362pub struct SymbolEntry {
363  pub id: u64,          // unique identifier for the symbol
364  pub reg: Register,    // register index this symbol maps to
365}
366
367impl SymbolEntry {
368
369  pub fn new(id: u64, reg: Register) -> Self {
370    Self { id, reg }
371  }
372
373  pub fn write_to(&self, w: &mut impl Write) -> MResult<()> {
374    w.write_u64::<LittleEndian>(self.id)?;
375    w.write_u32::<LittleEndian>(self.reg)?;
376    Ok(())
377  }
378}
379
380// 6. Instruction Encoding (fixed forms)
381// ----------------------------------------------------------------------------
382
383#[repr(u8)]
384#[derive(Debug, Clone, Copy, PartialEq, Eq)]
385pub enum OpCode {
386  ConstLoad = 0x01,
387  Unop      = 0x10,
388  Binop     = 0x20,
389  Ternop    = 0x30,
390  Quadop    = 0x40,
391  Return    = 0xFF,
392}
393
394impl OpCode {
395  pub fn from_u8(num: u8) -> Option<OpCode> {
396    match num {
397      0x01 => Some(OpCode::ConstLoad),
398      0x10 => Some(OpCode::Unop),
399      0x20 => Some(OpCode::Binop),
400      0x30 => Some(OpCode::Ternop),
401      0x40 => Some(OpCode::Quadop),
402      0xFF => Some(OpCode::Return),
403      _    => None,
404    }
405  }
406}
407
408#[derive(Debug, Clone)]
409pub enum EncodedInstr {
410  ConstLoad { dst: u32, const_id: u32 },                              // [u64 opcode][u32 dst][u32 const_id]
411  UnOp      { fxn_id: u64, dst: u32, src: u32 },                      // [u64 opcode][u32 dst][u32 src]
412  BinOp     { fxn_id: u64, dst: u32, lhs: u32, rhs: u32 },            // [u64 opcode][u32 dst][u32 lhs][u32 rhs]
413  TernOp    { fxn_id: u64, dst: u32, a: u32, b: u32, c: u32 },        // [u64 opcode][u32 dst][u32 a][u32 b][u32 c]
414  QuadOp   { fxn_id: u64, dst: u32, a: u32, b: u32, c: u32, d: u32 }, // [u64 opcode][u32 dst][u32 a][u32 b][u32 c][u32 d]
415  Ret       { src: u32 },                                             // [u64 opcode][u32 src]
416}
417
418impl EncodedInstr {
419  pub fn byte_len(&self) -> u64 {
420    match self {
421      EncodedInstr::ConstLoad{..} => 1 + 4 + 4,
422      EncodedInstr::UnOp{..}      => 1 + 8 + 4 + 4,
423      EncodedInstr::BinOp{..}     => 1 + 8 + 4 + 4 + 4,
424      EncodedInstr::TernOp{..}    => 1 + 8 + 4 + 4 + 4 + 4,
425      EncodedInstr::QuadOp{..}    => 1 + 8 + 4 + 4 + 4 + 4 + 4,
426      EncodedInstr::Ret{..}       => 1 + 4,
427    }
428  }
429  pub fn write_to(&self, w: &mut impl Write) -> MResult<()> {
430    match self {
431      EncodedInstr::ConstLoad{ dst, const_id } => {
432        w.write_u8(OpCode::ConstLoad as u8)?;
433        w.write_u32::<LittleEndian>(*dst)?;
434        w.write_u32::<LittleEndian>(*const_id)?;
435      }
436      EncodedInstr::UnOp{ fxn_id, dst, src } => {
437        w.write_u8(OpCode::Unop as u8)?;
438        w.write_u64::<LittleEndian>(*fxn_id)?;
439        w.write_u32::<LittleEndian>(*dst)?;
440        w.write_u32::<LittleEndian>(*src)?;
441      }
442      EncodedInstr::BinOp{ fxn_id, dst, lhs, rhs } => {
443        w.write_u8(OpCode::Binop as u8)?;
444        w.write_u64::<LittleEndian>(*fxn_id)?;
445        w.write_u32::<LittleEndian>(*dst)?;
446        w.write_u32::<LittleEndian>(*lhs)?;
447        w.write_u32::<LittleEndian>(*rhs)?;
448      }
449      EncodedInstr::TernOp{ fxn_id, dst, a, b, c } => {
450        w.write_u8(OpCode::Ternop as u8)?;
451        w.write_u64::<LittleEndian>(*fxn_id)?;
452        w.write_u32::<LittleEndian>(*dst)?;
453        w.write_u32::<LittleEndian>(*a)?;
454        w.write_u32::<LittleEndian>(*b)?;
455        w.write_u32::<LittleEndian>(*c)?;
456      }
457      EncodedInstr::QuadOp{ fxn_id, dst, a, b, c, d } => {
458        w.write_u8(OpCode::Quadop as u8)?;
459        w.write_u64::<LittleEndian>(*fxn_id)?;
460        w.write_u32::<LittleEndian>(*dst)?;
461        w.write_u32::<LittleEndian>(*a)?;
462        w.write_u32::<LittleEndian>(*b)?;
463        w.write_u32::<LittleEndian>(*c)?;
464        w.write_u32::<LittleEndian>(*d)?;
465      }
466      EncodedInstr::Ret{ src } => {
467        w.write_u8(OpCode::Return as u8)?;
468        w.write_u32::<LittleEndian>(*src)?;
469      }
470    }
471    Ok(())
472  }
473}
474
475// 7. Dictionary
476// ----------------------------------------------------------------------------
477
478#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
479#[derive(Debug, Clone, Eq, PartialEq)]
480pub struct DictEntry {
481  pub id: u64,          // unique identifier for the dictionary entry
482  pub name: String,     // name of the entry
483} 
484
485impl DictEntry {
486  pub fn new(id: u64, name: &str) -> Self {
487    Self { id, name: name.to_string() }
488  }
489
490  pub fn write_to(&self, w: &mut impl Write) -> MResult<()> {
491    w.write_u64::<LittleEndian>(self.id)?;
492    let name_bytes = self.name.as_bytes();
493    w.write_u32::<LittleEndian>(name_bytes.len() as u32)?;
494    w.write_all(name_bytes)?;
495    Ok(())
496  }
497}
498
499