mech_core/program/compiler/
context.rs

1use crate::*;
2use super::*;
3
4#[derive(Debug)]
5pub struct CompileCtx {
6  // pointer identity -> register index
7  pub reg_map: HashMap<usize, Register>,
8  // symbol identity -> register index
9  pub symbols: HashMap<u64, Register>,
10  // symbol identity -> pointer identity
11  pub symbol_ptrs: HashMap<u64, usize>,
12  // symbol identity -> symbol name
13  pub dictionary: HashMap<u64, String>,
14  pub types: TypeSection,
15  pub features: HashSet<FeatureFlag>,
16  pub const_entries: Vec<ConstEntry>,
17  pub const_blob: Vec<u8>,
18  pub instrs: Vec<EncodedInstr>,
19  pub next_reg: Register,
20}
21
22#[cfg(feature = "compiler")]
23impl CompileCtx {
24  pub fn new() -> Self {
25    Self {
26      reg_map: HashMap::new(),
27      symbols: HashMap::new(),
28      dictionary: HashMap::new(),
29      types: TypeSection::new(),
30      symbol_ptrs: HashMap::new(),
31      features: HashSet::new(),
32      const_entries: Vec::new(),
33      const_blob: Vec::new(),
34      instrs: Vec::new(),
35      next_reg: 0,
36    }
37  }
38
39  pub fn clear(&mut self) {
40    self.reg_map.clear();
41    self.symbols.clear();
42    self.dictionary.clear();
43    self.types = TypeSection::new();
44    self.features.clear();
45    self.const_entries.clear();
46    self.const_blob.clear();
47    self.instrs.clear();
48    self.next_reg = 0;
49  }
50
51  pub fn define_symbol(&mut self, id: usize, reg: Register, name: &str) {
52    let symbol_id = hash_str(name);
53    self.symbols.insert(symbol_id, reg);
54    self.symbol_ptrs.insert(symbol_id, id);
55    self.dictionary.insert(symbol_id, name.to_string());
56  }
57
58  pub fn alloc_register_for_ptr(&mut self, ptr: usize) -> Register {
59    if let Some(&r) = self.reg_map.get(&ptr) { return r; }
60    let r = self.next_reg;
61    self.next_reg += 1;
62    self.reg_map.insert(ptr, r);
63    r
64  }
65
66  pub fn emit_const_load(&mut self, dst: Register, const_id: u32) {
67    self.instrs.push(EncodedInstr::ConstLoad { dst, const_id });
68  }
69  pub fn emit_unop(&mut self, fxn_id: u64, dst: Register, src: Register) {
70    self.instrs.push(EncodedInstr::UnOp { fxn_id, dst, src });
71  }
72  pub fn emit_binop(&mut self, fxn_id: u64, dst: Register, lhs: Register, rhs: Register) {
73    self.instrs.push(EncodedInstr::BinOp { fxn_id, dst, lhs, rhs });
74  }
75  pub fn emit_ternop(&mut self, fxn_id: u64, dst: Register, a: Register, b: Register, c: Register) {
76    self.instrs.push(EncodedInstr::TernOp { fxn_id, dst, a, b, c });
77  }
78  pub fn emit_quadop(&mut self, fxn_id: u64, dst: Register, a: Register, b: Register, c: Register, d: Register) {
79    self.instrs.push(EncodedInstr::QuadOp { fxn_id, dst, a, b, c, d });
80  }
81  pub fn emit_ret(&mut self, src: Register) {
82    self.instrs.push(EncodedInstr::Ret { src })
83  }
84  
85  pub fn compile_const(&mut self, bytes: &[u8], value_kind: ValueKind) -> MResult<u32> {
86    let type_id = self.types.get_or_intern(&value_kind);
87    let align = value_kind.align();
88    let next_blob_len = self.const_blob.len() as u64;
89    let padded_off = align_up(next_blob_len, align as u64);
90    if padded_off > next_blob_len {
91      // add zero bytes padding to align the next write
92      self.const_blob.resize(padded_off as usize, 0);
93    }
94    self.features.insert(FeatureFlag::Builtin(value_kind.to_feature_kind()));
95    let offset = self.const_blob.len() as u64;
96    self.const_blob.extend_from_slice(bytes);
97    let length = (self.const_blob.len() as u64) - offset;    
98    let entry = ConstEntry {
99      type_id,
100      enc: ConstEncoding::Inline,
101      align: align as u8,
102      flags: 0,
103      reserved: 0,
104      offset,
105      length,
106    };
107    let const_id = self.const_entries.len() as u32;
108    self.const_entries.push(entry);
109    Ok(const_id)    
110  }
111
112  pub fn compile(&mut self) -> MResult<Vec<u8>> {
113
114    let header_size = ByteCodeHeader::HEADER_SIZE as u64;
115    let feat_bytes_len: u64 = 4 + (self.features.len() as u64) * 8;
116    let types_bytes_len: u64 = self.types.byte_len();
117    let const_tbl_len: u64 = (self.const_entries.len() as u64) * ConstEntry::byte_len();
118    let const_blob_len: u64 = self.const_blob.len() as u64;
119    let symbols_len: u64 = (self.symbols.len() as u64) * 12; // 8 bytes for id, 4 for reg
120    let instr_bytes_len: u64 = self.instrs.iter().map(|i| i.byte_len()).sum();
121    let dict_len: u64 = self.dictionary.values().map(|s| s.len() as u64 + 12).sum(); // 8 bytes for id, 4 for string length
122
123    let mut offset = header_size;                           // bytes in header
124    let feature_off = offset; offset += feat_bytes_len;     // offset to feature section
125    let types_off = offset; offset += types_bytes_len;      // offset to types section
126    let const_tbl_off = offset; offset += const_tbl_len;    // offset to constant table
127    let const_blob_off = offset; offset += const_blob_len;  // offset to constant blob
128    let symbols_off = offset; offset += symbols_len;        // offset to symbol section
129    let instr_off = offset; offset += instr_bytes_len;      // offset to instruction stream
130    let dict_off = offset; offset += dict_len;              // offset to dictionary section
131    
132    let file_len_before_trailer = offset;
133    let trailer_len = 4u64;
134    let full_file_len = file_len_before_trailer + trailer_len;
135
136    // The header!
137    let header = ByteCodeHeader {
138      magic: *b"MECH",
139      version: 1,             
140      mech_ver: parse_version_to_u16(env!("CARGO_PKG_VERSION")).unwrap(),
141      flags: 0,
142      reg_count: self.next_reg,
143      instr_count: self.instrs.len() as u32,
144      feature_count: self.features.len() as u32,
145      feature_off,
146      
147      types_count: self.types.entries.len() as u32,
148      types_off,
149
150      const_count: self.const_entries.len() as u32,
151      const_tbl_off,
152      const_tbl_len,
153      const_blob_off,
154      const_blob_len,
155
156      symbols_len,
157      symbols_off,
158
159      instr_off,
160      instr_len: instr_bytes_len,
161
162      dict_len,
163      dict_off,
164
165      reserved: 0,
166    };
167    
168    let mut buf = Cursor::new(Vec::<u8>::with_capacity(full_file_len as usize));
169
170    // 1. Write the header
171    header.write_to(&mut buf)?;
172
173    // 2. Write features
174    buf.write_u32::<LittleEndian>(self.features.len() as u32)?;
175    for f in &self.features {
176      buf.write_u64::<LittleEndian>(f.as_u64())?;
177    }
178
179    // 3. Write types
180    self.types.write_to(&mut buf)?;
181
182    // 4. write consts
183    for entry in &self.const_entries {
184      entry.write_to(&mut buf)?;
185    }
186
187    if !self.const_blob.is_empty() {
188      buf.write_all(&self.const_blob)?;
189    }
190
191    // 5. write symbols
192    for (id, reg) in &self.symbols {
193      let entry = SymbolEntry::new(*id, *reg);
194      entry.write_to(&mut buf)?;
195    }
196
197    // 6. write instructions. This is where the action is!
198    for ins in &self.instrs {
199      ins.write_to(&mut buf)?;
200    }
201
202    // 7. write dictionary
203    for (id, name) in &self.dictionary {
204      let dict_entry = DictEntry::new(*id, name);
205      dict_entry.write_to(&mut buf)?;
206    }
207
208    // sanity check: the position should equal file_len_before_trailer
209    let pos = buf.position();
210    if pos != file_len_before_trailer {
211      return Err(MechError {file: file!().to_string(),tokens: vec![],msg: format!("Buffer position mismatch: expected {}, got {}", file_len_before_trailer, pos),id: line!(),kind: MechErrorKind::GenericError("Buffer position mismatch".to_string()),});
212    }
213
214    let bytes_so_far = buf.get_ref().as_slice();
215    let checksum = crc32fast::hash(bytes_so_far);
216    buf.write_u32::<LittleEndian>(checksum)?;
217
218    if buf.position() != full_file_len {
219      return Err(MechError {file: file!().to_string(),tokens: vec![],msg: format!("Final buffer length mismatch: expected {}, got {}", full_file_len, buf.position()),id: line!(),kind: MechErrorKind::GenericError("Final buffer length mismatch".to_string()),});
220    }
221
222    Ok(buf.into_inner())
223  }
224}
225
226#[inline]
227fn align_up(offset: u64, align: u64) -> u64 {
228  if align == 0 { return offset; }
229  ((offset + align - 1) / align) * align
230}