mech_core/program/
program.rs

1use crate::*;
2use super::*;
3use byteorder::{LittleEndian, WriteBytesExt, ReadBytesExt};
4use std::io::Write;
5use std::io::{self, SeekFrom, Seek, Cursor};
6#[cfg(not(feature = "no_std"))]
7use std::fs::File;
8#[cfg(not(feature = "no_std"))]
9use std::path::Path;
10#[cfg(feature = "matrix")]
11use crate::matrix::Matrix;
12use indexmap::IndexMap;
13
14#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
15#[derive(Debug, Clone, Eq, PartialEq)]
16pub struct ParsedProgram {
17  pub header: ByteCodeHeader,
18  pub features: Vec<u64>,
19  pub types: TypeSection,
20  pub const_entries: Vec<ParsedConstEntry>,
21  pub const_blob: Vec<u8>,
22  pub instr_bytes: Vec<u8>,
23  pub symbols: HashMap<u64, Register>,
24  pub mutable_symbols: HashSet<u64>,
25  pub instrs: Vec<DecodedInstr>,
26  pub dictionary: HashMap<u64, String>,
27}
28
29impl ParsedProgram {
30
31 pub fn to_bytes(&self) -> MResult<Vec<u8>> {
32    let mut buf = Cursor::new(Vec::<u8>::new());
33
34    // 1. Header
35    self.header.write_to(&mut buf)?;
36
37    // 2. Features
38    buf.write_u32::<LittleEndian>(self.features.len() as u32)?;
39    for f in &self.features {
40      buf.write_u64::<LittleEndian>(*f)?;
41    }
42
43    // 3. Types
44    self.types.write_to(&mut buf)?;
45
46    // 4. Const entries
47    for entry in &self.const_entries {
48      entry.write_to(&mut buf)?;
49    }
50
51    // 5. Const blob
52    if !self.const_blob.is_empty() {
53      buf.write_all(&self.const_blob)?;
54    }
55
56    // 6. Symbols
57    for (id, reg) in &self.symbols {
58      let mutable = self.mutable_symbols.contains(id);
59      let entry = SymbolEntry::new(*id, mutable, *reg);
60      entry.write_to(&mut buf)?;
61    }
62
63    // 7. Instructions
64    for ins in &self.instrs {
65      ins.write_to(&mut buf)?;
66    }
67
68    // 8. Dictionary
69    for (id, name) in &self.dictionary {
70      let dict_entry = DictEntry::new(*id, name);
71      dict_entry.write_to(&mut buf)?;
72    }
73
74    // 9. CRC32 trailer
75    let bytes_so_far = buf.get_ref().as_slice();
76    let checksum = crc32fast::hash(bytes_so_far);
77    buf.write_u32::<LittleEndian>(checksum)?;
78
79    Ok(buf.into_inner())
80  }
81
82  pub fn from_bytes(bytes: &[u8]) -> MResult<ParsedProgram> {
83    load_program_from_bytes(bytes)
84  }
85
86  pub fn validate(&self) -> MResult<()> {
87    // Check magic number
88    if !self.header.validate_magic(b"MECH") {
89      return Err(MechError {file: file!().to_string(), tokens: vec![], msg: "Invalid magic number".to_string(), id: line!(), kind: MechErrorKind::GenericError("Invalid magic number".to_string())});
90    }
91    // Check version number
92    if self.header.version != 1 {
93      return Err(MechError {file: file!().to_string(), tokens: vec![], msg: "Unsupported bytecode version".to_string(), id: line!(), kind: MechErrorKind::GenericError("Unsupported bytecode version".to_string())});
94    }
95    // Check mech version
96    if self.header.mech_ver != parse_version_to_u16(env!("CARGO_PKG_VERSION")).unwrap() {
97      return Err(MechError {file: file!().to_string(), tokens: vec![], msg: "Incompatible Mech version".to_string(), id: line!(), kind: MechErrorKind::GenericError("Incompatible Mech version".to_string())});
98    }
99    Ok(())
100  }
101
102  pub fn decode_const_entries(&self) -> MResult<Vec<Value>> {
103    let mut out = Vec::with_capacity(self.const_entries.len());
104    let blob_len = self.const_blob.len() as u64;
105
106    for const_entry in &self.const_entries {
107      // Only support Inline encoding for now
108      if const_entry.enc != ConstEncoding::Inline as u8 {
109        return Err(MechError {file: file!().to_string(),tokens: vec![],msg: "Unsupported constant encoding".to_string(),id: line!(),kind: MechErrorKind::GenericError("Unsupported constant encoding".to_string())});
110      }
111
112      // Bounds check
113      if const_entry.offset.checked_add(const_entry.length).is_none() {
114          return Err(MechError {file: file!().to_string(),tokens: vec![],msg: "Constant entry out of bounds".to_string(),id: line!(),kind: MechErrorKind::GenericError("Constant entry out of bounds".to_string())});
115      }
116      let end = const_entry.offset + const_entry.length;
117      if end > blob_len {
118        return Err(MechError {file: file!().to_string(),tokens: vec![],msg: "Constant entry out of bounds".to_string(),id: line!(),kind: MechErrorKind::GenericError("Constant entry out of bounds".to_string())});
119      }
120
121      // Alignment check (if your alignment semantics differ, change this)
122      if !check_alignment(const_entry.offset, const_entry.align) {
123        return Err(MechError {file: file!().to_string(),tokens: vec![],msg: "Constant entry alignment error".to_string(),id: line!(),kind: MechErrorKind::GenericError("Constant entry alignment error".to_string())});
124      }
125
126      // Copy bytes out (we clone into Vec<u8> to own data)
127      let start = const_entry.offset as usize;
128      let len = const_entry.length as usize;
129      let data = self.const_blob[start .. start + len].to_vec();
130
131      // get the type from the id
132      let ty = &self.types.entries[const_entry.type_id as usize];
133
134      let val: Value = match ty.tag {
135        #[cfg(feature = "bool")]
136        TypeTag::Bool => {
137          if data.len() != 1 {
138            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Bool const entry must be 1 byte".to_string(), id: line!(), kind: MechErrorKind::GenericError("Bool const entry must be 1 byte".to_string())});
139          }
140          let value = data[0] != 0;
141          Value::Bool(Ref::new(value))
142        },
143        #[cfg(feature = "string")]
144        TypeTag::String => {
145          if data.len() < 4 {
146            return Err(MechError {file: file!().to_string(),tokens: vec![],msg: "String constant too short".to_string(),id: line!(),kind: MechErrorKind::GenericError("String constant too short".to_string()) });
147          }
148          let s = String::from_le(&data);
149          Value::String(Ref::new(s))
150        }
151        #[cfg(feature = "u8")]
152        TypeTag::U8 => {
153          if data.len() != 1 {
154            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "U8 const entry must be 1 byte".to_string(), id: line!(), kind: MechErrorKind::GenericError("U8 const entry must be 1 byte".to_string())});
155          }
156          let value = data[0];
157          Value::U8(Ref::new(value))
158        },
159        #[cfg(feature = "u16")]
160        TypeTag::U16 => {
161          if data.len() != 2 {
162            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "U16 const entry must be 2 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("U16 const entry must be 2 bytes".to_string())});
163          }
164          let value = u16::from_le_bytes(data.try_into().unwrap());
165          Value::U16(Ref::new(value))
166        },
167        #[cfg(feature = "u32")]
168        TypeTag::U32 => {
169          if data.len() != 4 {
170            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "U32 const entry must be 4 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("U32 const entry must be 4 bytes".to_string())});
171          }
172          let value = u32::from_le_bytes(data.try_into().unwrap());
173          Value::U32(Ref::new(value))
174        },
175        #[cfg(feature = "u64")]
176        TypeTag::U64 => {
177          if data.len() != 8 {
178            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "U64 const entry must be 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("U64 const entry must be 8 bytes".to_string())});
179          }
180          let value = u64::from_le_bytes(data.try_into().unwrap());
181          Value::U64(Ref::new(value))
182        },
183        #[cfg(feature = "u128")]
184        TypeTag::U128 => {
185          if data.len() != 16 {
186            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "U128 const entry must be 16 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("U128 const entry must be 16 bytes".to_string())});
187          }
188          let value = u128::from_le_bytes(data.try_into().unwrap());
189          Value::U128(Ref::new(value))
190        },
191        #[cfg(feature = "i8")]
192        TypeTag::I8 => {
193          if data.len() != 1 {
194            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "I8 const entry must be 1 byte".to_string(), id: line!(), kind: MechErrorKind::GenericError("I8 const entry must be 1 byte".to_string())});
195          }
196          let value = data[0] as i8;
197          Value::I8(Ref::new(value))
198        },
199        #[cfg(feature = "i16")]
200        TypeTag::I16 => {
201          if data.len() != 2 {
202            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "I16 const entry must be 2 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("I16 const entry must be 2 bytes".to_string())});
203          }
204          let value = i16::from_le_bytes(data.try_into().unwrap());
205          Value::I16(Ref::new(value))
206        },
207        #[cfg(feature = "i32")]
208        TypeTag::I32 => {
209          if data.len() != 4 {
210            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "I32 const entry must be 4 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("I32 const entry must be 4 bytes".to_string())});
211          }
212          let value = i32::from_le_bytes(data.try_into().unwrap());
213          Value::I32(Ref::new(value))
214        },
215        #[cfg(feature = "i64")]
216        TypeTag::I64 => {
217          if data.len() != 8 {
218            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "I64 const entry must be 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("I64 const entry must be 8 bytes".to_string())});
219          }
220          let value = i64::from_le_bytes(data.try_into().unwrap());
221          Value::I64(Ref::new(value))
222        }
223        #[cfg(feature = "i128")]
224        TypeTag::I128 => {
225          if data.len() != 16 {
226            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "I128 const entry must be 16 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("I128 const entry must be 16 bytes".to_string())});
227          }
228          let value = i128::from_le_bytes(data.try_into().unwrap());
229          Value::I128(Ref::new(value))
230        },
231        #[cfg(feature = "f32")]
232        TypeTag::F32 => {
233          if data.len() != 4 {
234            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "F32 const entry must be 4 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("F32 const entry must be 4 bytes".to_string())});
235          }
236          let value = f32::from_le_bytes(data.try_into().unwrap());
237          Value::F32(Ref::new(F32::new(value)))
238        },
239        #[cfg(feature = "f64")]
240        TypeTag::F64 => {
241          if data.len() != 8 {
242            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "F64 const entry must be 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("F64 const entry must be 8 bytes".to_string())});
243          }
244          let value = f64::from_le_bytes(data.try_into().unwrap());
245          Value::F64(Ref::new(F64::new(value)))
246        },
247        #[cfg(feature = "complex")]
248        TypeTag::C64 => {
249          if data.len() != 16 {
250            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "C64 const entry must be 8 bytes real + 8 bytes imag".to_string(), id: line!(), kind: MechErrorKind::GenericError("C64 const entry must be 8 bytes real + 8 bytes imag".to_string())});
251          }
252          let real = f64::from_le_bytes(data[0..8].try_into().unwrap());
253          let imag = f64::from_le_bytes(data[8..16].try_into().unwrap());
254          Value::C64(Ref::new(C64::new(real, imag)))
255        },
256        #[cfg(feature = "rational")]
257        TypeTag::R64 => {
258          if data.len() != 16 {
259            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "R64 const entry must be 8 bytes numerator + 8 bytes denominator".to_string(), id: line!(), kind: MechErrorKind::GenericError("R64 const entry must be 8 bytes numerator + 8 bytes denominator".to_string())});
260          }
261          let numer = i64::from_le_bytes(data[0..8].try_into().unwrap());
262          let denom = i64::from_le_bytes(data[8..16].try_into().unwrap());
263          Value::R64(Ref::new(R64::new(numer, denom)))
264        },
265        #[cfg(all(feature = "matrix", feature = "string"))]
266        TypeTag::MatrixString => {
267          if data.len() < 8 {
268            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
269          }
270          let matrix = Matrix::<String>::from_le(&data);
271          Value::MatrixString(matrix)
272        }
273        #[cfg(all(feature = "matrix", feature = "bool"))]
274        TypeTag::MatrixBool => {
275          if data.len() < 1 {
276            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
277          }
278          let matrix = Matrix::<bool>::from_le(&data);
279          Value::MatrixBool(matrix)
280        }
281        #[cfg(all(feature = "matrix", feature = "u8"))]
282        TypeTag::MatrixU8 => {
283          if data.len() < 1 {
284            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
285          }
286          let matrix = Matrix::<u8>::from_le(&data);
287          Value::MatrixU8(matrix)
288        }
289        #[cfg(all(feature = "matrix", feature = "i8"))]
290        TypeTag::MatrixI8 => {
291          if data.len() < 8 {
292            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
293          }
294          let matrix = Matrix::<i8>::from_le(&data);
295          Value::MatrixI8(matrix)
296        }
297        #[cfg(all(feature = "matrix", feature = "f32"))]
298        TypeTag::MatrixF32 => {
299          if data.len() < 8 {
300            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
301          }
302          let matrix = Matrix::<F32>::from_le(&data);
303          Value::MatrixF32(matrix)
304        }
305        #[cfg(all(feature = "matrix", feature = "f64"))]
306        TypeTag::MatrixF64 => {
307          if data.len() < 8 {
308            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
309          }
310          let matrix = Matrix::<F64>::from_le(&data);
311          Value::MatrixF64(matrix)
312        }
313        #[cfg(all(feature = "matrix", feature = "u16"))]
314        TypeTag::MatrixU16 => {
315          if data.len() < 8 {
316            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
317          }
318          let matrix = Matrix::<u16>::from_le(&data);
319          Value::MatrixU16(matrix)
320        },
321        #[cfg(all(feature = "matrix", feature = "u32"))]
322        TypeTag::MatrixU32 => {
323          if data.len() < 8 {
324            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
325          }
326          let matrix = Matrix::<u32>::from_le(&data);
327          Value::MatrixU32(matrix)
328        },
329        #[cfg(all(feature = "matrix", feature = "u64"))]
330        TypeTag::MatrixU64 => {
331          if data.len() < 8 {
332            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
333          }
334          let matrix = Matrix::<u64>::from_le(&data);
335          Value::MatrixU64(matrix)
336        },
337        #[cfg(all(feature = "matrix", feature = "u128"))]
338        TypeTag::MatrixU128 => {
339          if data.len() < 8 {
340            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
341          }
342          let matrix = Matrix::<u128>::from_le(&data);
343          Value::MatrixU128(matrix)
344        },
345        #[cfg(all(feature = "matrix", feature = "i16"))]
346        TypeTag::MatrixI16 => {
347          if data.len() < 8 {
348            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
349          }
350          let matrix = Matrix::<i16>::from_le(&data);
351          Value::MatrixI16(matrix)
352        },
353        #[cfg(all(feature = "matrix", feature = "i32"))]
354        TypeTag::MatrixI32 => {
355          if data.len() < 8 {
356            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
357          }
358          let matrix = Matrix::<i32>::from_le(&data);
359          Value::MatrixI32(matrix)
360        },
361        #[cfg(all(feature = "matrix", feature = "i64"))]
362        TypeTag::MatrixI64 => {
363          if data.len() < 8 {
364            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
365          }
366          let matrix = Matrix::<i64>::from_le(&data);
367          Value::MatrixI64(matrix)
368        },
369        #[cfg(all(feature = "matrix", feature = "i128"))]
370        TypeTag::MatrixI128 => {
371          if data.len() < 8 {
372            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
373          }
374          let matrix = Matrix::<i128>::from_le(&data);
375          Value::MatrixI128(matrix)
376        },
377        #[cfg(all(feature = "matrix", feature = "c64"))]
378        TypeTag::MatrixC64 => {
379          if data.len() < 8 {
380            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
381          }
382          let matrix = Matrix::<C64>::from_le(&data);
383          Value::MatrixC64(matrix)
384        },
385        #[cfg(all(feature = "matrix", feature = "r64"))]
386        TypeTag::MatrixR64 => {
387          if data.len() < 8 {
388            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
389          }
390          let matrix = Matrix::<R64>::from_le(&data);
391          Value::MatrixR64(matrix)
392        },
393        #[cfg(feature = "matrix")]
394        TypeTag::MatrixIndex => {
395          if data.len() < 8 {
396            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
397          }
398          let matrix = Matrix::<usize>::from_le(&data);
399          Value::MatrixIndex(matrix)
400        },
401        TypeTag::Index => {
402          if data.len() != 8 {
403            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Index const entry must be 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Index const entry must be 8 bytes".to_string())});
404          }
405          let value = u64::from_le_bytes(data.try_into().unwrap()) as usize;
406          Value::Index(Ref::new(value))
407        },
408        #[cfg(feature = "set")]
409        TypeTag::Set => {
410          if data.len() < 4 {
411            return Err(MechError {file: file!().to_string(),tokens: vec![],msg: "Set const entry must be at least 4 bytes (num elements)".to_string(),id: line!(),kind: MechErrorKind::GenericError("Set const entry must be at least 4 bytes (num elements)".to_string()),});
412          }
413          let set = MechSet::from_le(&data);
414          Value::Set(Ref::new(set))
415        },
416        #[cfg(feature = "table")]
417        TypeTag::Table => {
418          if data.len() < 8 {
419            return Err(MechError {file: file!().to_string(),tokens: vec![],msg: "Table const entry must be at least 8 bytes (rows + cols)".to_string(),id: line!(),kind: MechErrorKind::GenericError("Table const entry must be at least 8 bytes (rows + cols)".to_string()),});
420          }
421          let table = MechTable::from_le(&data);
422          Value::Table(Ref::new(table))
423        }
424        _ => return Err(MechError{file: file!().to_string(), tokens: vec![], msg: format!("Unsupported constant type {:?}", ty.tag), id: line!(), kind: MechErrorKind::GenericError(format!("Unsupported constant type {:?}", ty.tag))}),
425      };
426      out.push(val);
427    }
428    Ok(out)
429  }
430}
431
432fn check_alignment(offset: u64, align: u8) -> bool {
433  // treat align==0 as invalid
434  let align_val = align as u64;
435  if align_val == 0 { return false; }
436  (offset % align_val) == 0
437}
438
439// Load Program
440// ----------------------------------------------------------------------------
441
442#[cfg(not(feature = "no_std"))]
443pub fn load_program_from_file(path: impl AsRef<Path>) -> MResult<ParsedProgram> {
444  let path = path.as_ref();
445  let mut f = File::open(path)?;
446
447  // total length for bounds checks
448  let total_len = f.metadata()?.len();
449
450  // Verify CRC trailer first; ensures fully readable, too.
451  verify_crc_trailer_seek(&mut f, total_len)?;
452
453  // Parse from the start
454  f.seek(SeekFrom::Start(0))?;
455  load_program_from_reader(&mut f, total_len)
456}
457
458pub fn load_program_from_bytes(bytes: &[u8]) -> MResult<ParsedProgram> {
459  let total_len = bytes.len() as u64;
460
461  let mut cur = Cursor::new(bytes);
462  verify_crc_trailer_seek(&mut cur, total_len)?;
463
464  // Parse from the start
465  cur.seek(SeekFrom::Start(0))?;
466  load_program_from_reader(&mut cur, total_len)
467}
468
469fn load_program_from_reader<R: Read + Seek>(r: &mut R, total_len: u64) -> MResult<ParsedProgram> {
470  r.seek(SeekFrom::Start(0))?;
471  let mut header_buf = vec![0u8; ByteCodeHeader::HEADER_SIZE];
472  r.read_exact(&mut header_buf)?;
473
474  // 1) read header blob
475  let mut header_cursor = Cursor::new(&header_buf[..]);
476  let header = ByteCodeHeader::read_from(&mut header_cursor)?;
477
478  // quick magic check
479  if !header.validate_magic(&(*b"MECH")) {
480    return Err(MechError {
481      file: file!().to_string(),
482      tokens: vec![],
483      msg: format!("Invalid magic in bytecode header: expected 'MECH', got {:?}", header.magic),
484      id: line!(),
485      kind: MechErrorKind::GenericError("Invalid magic".to_string()),
486    });
487  }
488
489  // 2. read features
490  let mut features = Vec::new();
491  if header.feature_off != 0 && header.feature_off + 4 <= total_len.saturating_sub(4) {
492    r.seek(SeekFrom::Start(header.feature_off))?;
493    let c = r.read_u32::<LittleEndian>()? as usize;
494    for _ in 0..c {
495      let v = r.read_u64::<LittleEndian>()?;
496      features.push(v);
497    }
498  }
499
500  // 3. read types
501  let mut types = TypeSection::new();
502  if header.types_off != 0 && header.types_off + 4 <= total_len.saturating_sub(4) {
503    r.seek(SeekFrom::Start(header.types_off))?;
504    let types_count = r.read_u32::<LittleEndian>()? as usize;
505    for _ in 0..types_count {
506      let tag = r.read_u16::<LittleEndian>()?;
507      let _reserved = r.read_u16::<LittleEndian>()?; // reserved, always 0
508      let _version = r.read_u32::<LittleEndian>()?; // version, always 1
509      let bytes_len = r.read_u32::<LittleEndian>()? as usize;
510      let mut bytes = vec![0u8; bytes_len];
511      r.read_exact(&mut bytes)?;
512      if let Some(tag) = TypeTag::from_u16(tag) {
513        types.entries.push(TypeEntry { tag, bytes });
514      } else {
515        return Err(MechError {
516          file: file!().to_string(),
517          tokens: vec![],
518          msg: format!("Unknown type tag: {}", tag),
519          id: line!(),
520          kind: MechErrorKind::GenericError("Unknown type tag".to_string()),
521        });
522      }
523    }
524  }
525
526  // 4. read const table
527  let mut const_entries = Vec::new();
528  if header.const_tbl_off != 0 && header.const_tbl_len > 0 {
529    r.seek(SeekFrom::Start(header.const_tbl_off))?;
530    let mut tbl_bytes = vec![0u8; header.const_tbl_len as usize];
531    r.read_exact(&mut tbl_bytes)?;
532    let cur = Cursor::new(&tbl_bytes[..]);
533    const_entries = parse_const_entries(cur, header.const_count as usize)?;
534  }
535
536  // read const blob
537  let mut const_blob = vec![];
538  if header.const_blob_off != 0 && header.const_blob_len > 0 {
539    r.seek(SeekFrom::Start(header.const_blob_off))?;
540    const_blob.resize(header.const_blob_len as usize, 0);
541    r.read_exact(&mut const_blob)?;
542  }
543
544  // 5. read symbols
545  let mut symbols = HashMap::new();
546  let mut mutable_symbols = HashSet::new();
547  if header.symbols_off != 0 && header.symbols_len > 0 {
548    r.seek(SeekFrom::Start(header.symbols_off))?;
549    let mut symbols_bytes = vec![0u8; header.symbols_len as usize];
550    r.read_exact(&mut symbols_bytes)?;
551    let mut cur = Cursor::new(&symbols_bytes[..]);
552    for _ in 0..(header.symbols_len / 12) {
553      let id = cur.read_u64::<LittleEndian>()?;
554      let mutable = cur.read_u8()? != 0;
555      let reg = cur.read_u32::<LittleEndian>()?;
556      symbols.insert(id, reg);
557      if mutable {
558        mutable_symbols.insert(id);
559      }
560    }
561  }
562
563  // 6. read instr bytes
564  let mut instr_bytes = vec![];
565  if header.instr_off != 0 && header.instr_len > 0 {
566    r.seek(SeekFrom::Start(header.instr_off))?;
567    instr_bytes.resize(header.instr_len as usize, 0);
568    r.read_exact(&mut instr_bytes)?;
569  }
570
571  // 7. read dictionary
572  let mut dictionary = HashMap::new();
573  if header.dict_off != 0 && header.dict_len > 0 {
574    r.seek(SeekFrom::Start(header.dict_off))?;
575    let mut dict_bytes = vec![0u8; header.dict_len as usize];
576    r.read_exact(&mut dict_bytes)?;
577    let mut cur = Cursor::new(&dict_bytes[..]);
578    while cur.position() < dict_bytes.len() as u64 {
579      let id = cur.read_u64::<LittleEndian>()?;
580      let name_len = cur.read_u32::<LittleEndian>()? as usize;
581      let mut name_bytes = vec![0u8; name_len];
582      cur.read_exact(&mut name_bytes)?;
583      let name = String::from_utf8(name_bytes).map_err(|_| MechError {
584        file: file!().to_string(),
585        tokens: vec![],
586        msg: "Invalid UTF-8 in dictionary entry".to_string(),
587        id: line!(),
588        kind: MechErrorKind::GenericError("Invalid UTF-8".to_string()),
589      })?;
590      dictionary.insert(id, name);
591    }
592  }
593
594  // decode instructions
595  let instrs = decode_instructions(Cursor::new(&instr_bytes[..]))?;
596  
597  Ok(ParsedProgram { header, features, types, const_entries, const_blob, instr_bytes, symbols, mutable_symbols, instrs, dictionary })
598}
599
600pub fn decode_version_from_u16(v: u16) -> (u16, u16, u16) {
601  let major = (v >> 13) & 0b111;
602  let minor = (v >> 8) & 0b1_1111;
603  let patch = v & 0xFF;
604  (major, minor, patch)
605}
606
607#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
608#[derive(Debug, Clone, Eq, PartialEq)]
609pub struct ParsedConstEntry {
610  pub type_id: u32,
611  pub enc: u8,
612  pub align: u8,
613  pub flags: u8,
614  pub reserved: u8,
615  pub offset: u64,
616  pub length: u64,
617}
618
619
620impl ParsedConstEntry {
621  pub fn write_to<W: Write>(&self, w: &mut W) -> MResult<()> {
622    // type_id (u32)
623    w.write_u32::<LittleEndian>(self.type_id)
624      .map_err(|e| MechError {
625        file: file!().to_string(),
626        tokens: vec![],
627        msg: e.to_string(),
628        id: line!(),
629        kind: MechErrorKind::GenericError(e.to_string()),
630      })?;
631    // enc, align, flags, reserved (u8 each)
632    w.write_u8(self.enc).map_err(|e| MechError {
633      file: file!().to_string(),
634      tokens: vec![],
635      msg: e.to_string(),
636      id: line!(),
637      kind: MechErrorKind::GenericError(e.to_string()),
638    })?;
639    w.write_u8(self.align).map_err(|e| MechError {
640      file: file!().to_string(),
641      tokens: vec![],
642      msg: e.to_string(),
643      id: line!(),
644      kind: MechErrorKind::GenericError(e.to_string()),
645    })?;
646    w.write_u8(self.flags).map_err(|e| MechError {
647      file: file!().to_string(),
648      tokens: vec![],
649      msg: e.to_string(),
650      id: line!(),
651      kind: MechErrorKind::GenericError(e.to_string()),
652    })?;
653    w.write_u8(self.reserved).map_err(|e| MechError {
654      file: file!().to_string(),
655      tokens: vec![],
656      msg: e.to_string(),
657      id: line!(),
658      kind: MechErrorKind::GenericError(e.to_string()),
659    })?;
660    // offset (u64)
661    w.write_u64::<LittleEndian>(self.offset).map_err(|e| MechError {
662      file: file!().to_string(),
663      tokens: vec![],
664      msg: e.to_string(),
665      id: line!(),
666      kind: MechErrorKind::GenericError(e.to_string()),
667    })?;
668    // length (u64)
669    w.write_u64::<LittleEndian>(self.length).map_err(|e| MechError {
670      file: file!().to_string(),
671      tokens: vec![],
672      msg: e.to_string(),
673      id: line!(),
674      kind: MechErrorKind::GenericError(e.to_string()),
675    })?;
676
677    Ok(())
678  }
679}
680
681fn parse_const_entries(mut cur: Cursor<&[u8]>, count: usize) -> io::Result<Vec<ParsedConstEntry>> {
682  let mut out = Vec::with_capacity(count);
683  for _ in 0..count {
684    let type_id = cur.read_u32::<LittleEndian>()?;
685    let enc = cur.read_u8()?;
686    let align = cur.read_u8()?;
687    let flags = cur.read_u8()?;
688    let reserved = cur.read_u8()?;
689    let offset = cur.read_u64::<LittleEndian>()?;
690    let length = cur.read_u64::<LittleEndian>()?;
691    out.push(ParsedConstEntry { type_id, enc, align, flags, reserved, offset, length });
692  }
693  Ok(out)
694}
695
696pub fn verify_crc_trailer_seek<R: Read + Seek>(r: &mut R, total_len: u64) -> MResult<()> {
697  if total_len < 4 {
698    return Err(MechError {
699      file: file!().to_string(),
700      tokens: vec![],
701      msg: "File too short to contain CRC trailer".to_string(),
702      id: line!(),
703      kind: MechErrorKind::GenericError("File too short".to_string()),
704    });
705  }
706
707  // Read expected CRC from the last 4 bytes
708  r.seek(SeekFrom::Start(total_len - 4))?;
709  let expected_crc = r.read_u32::<LittleEndian>()?;
710
711  // Compute CRC over the prefix (everything except the last 4 bytes).
712  r.seek(SeekFrom::Start(0))?;
713  let payload_len = (total_len - 4) as usize;
714  let mut buf = vec![0u8; payload_len];
715  r.read_exact(&mut buf)?;
716
717  let file_crc = crc32fast::hash(&buf);
718  if file_crc != expected_crc {
719    Err(MechError {
720      file: file!().to_string(),
721      tokens: vec![],
722      msg: format!("CRC mismatch: expected {}, got {}", expected_crc, file_crc),
723      id: line!(),
724      kind: MechErrorKind::GenericError("CRC mismatch".to_string()),
725    })
726  } else {
727    Ok(())
728  }
729}
730
731#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
732#[derive(Debug, Clone, Eq, PartialEq)]
733pub enum DecodedInstr {
734  ConstLoad { dst: u32, const_id: u32 },
735  NullOp {fxn_id: u64, dst: u32 },
736  UnOp { fxn_id: u64, dst: u32, src: u32 },
737  BinOp { fxn_id: u64, dst: u32, lhs: u32, rhs: u32 },
738  TernOp { fxn_id: u64, dst: u32, a: u32, b: u32, c: u32 },
739  QuadOp { fxn_id: u64, dst: u32, a: u32, b: u32, c: u32, d: u32 },
740  VarArg { fxn_id: u64, dst: u32, args: Vec<u32> },
741  Ret { src: u32 },
742  Unknown { opcode: u8, rest: Vec<u8> }, // unknown opcode or dynamic form
743}
744
745impl DecodedInstr {
746 pub fn from_u8(num: u8) -> Option<DecodedInstr> {
747    match OpCode::from_u8(num) {
748      Some(OpCode::ConstLoad) => Some(DecodedInstr::ConstLoad { dst: 0, const_id: 0 }),
749      Some(OpCode::NullOp) => Some(DecodedInstr::NullOp { fxn_id: 0, dst: 0 }),
750      Some(OpCode::Unop) => Some(DecodedInstr::UnOp { fxn_id: 0, dst: 0, src: 0 }),
751      Some(OpCode::Binop) => Some(DecodedInstr::BinOp { fxn_id: 0, dst: 0, lhs: 0, rhs: 0 }),
752      Some(OpCode::Ternop) => Some(DecodedInstr::TernOp { fxn_id: 0, dst: 0, a: 0, b: 0, c: 0 }),
753      Some(OpCode::Quadop) => Some(DecodedInstr::QuadOp { fxn_id: 0, dst: 0, a: 0, b: 0, c: 0, d: 0 }),
754      Some(OpCode::VarArg) => Some(DecodedInstr::VarArg { fxn_id: 0, dst: 0, args: vec![] }),
755      Some(OpCode::Return) => Some(DecodedInstr::Ret { src: 0 }),
756      _ => None,
757    }
758  }
759}
760
761fn decode_instructions(mut cur: Cursor<&[u8]>) -> MResult<Vec<DecodedInstr>> {
762  let mut out = Vec::new();
763  while (cur.position() as usize) < cur.get_ref().len() {
764    // read opcode (u64)
765    let pos_before = cur.position();
766    // if remaining < 8, can't read opcode
767    let rem = cur.get_ref().len() - pos_before as usize;
768    if rem < 8 {
769      return Err(MechError {
770        file: file!().to_string(),
771        tokens: vec![],
772        msg: "Truncated instruction: cannot read opcode".to_string(),
773        id: line!(),
774        kind: MechErrorKind::GenericError("Truncated instruction".to_string()),
775      });
776    }
777    let opcode_byte = cur.read_u8()?;
778    match OpCode::from_u8(opcode_byte) {
779      Some(OpCode::ConstLoad) => {
780        // need 4+4 bytes
781        let dst = cur.read_u32::<LittleEndian>()?;
782        let const_id = cur.read_u32::<LittleEndian>()?;
783        out.push(DecodedInstr::ConstLoad { dst, const_id });
784      }
785      Some(OpCode::Return) => {
786        let src = cur.read_u32::<LittleEndian>()?;
787        out.push(DecodedInstr::Ret { src });
788      }
789      Some(OpCode::NullOp) => {
790        // need 8+4 bytes
791        let fxn_id = cur.read_u64::<LittleEndian>()?;
792        let dst = cur.read_u32::<LittleEndian>()?;
793        out.push(DecodedInstr::NullOp { fxn_id: fxn_id, dst });
794      }
795      Some(OpCode::Unop) => {
796        // need 8+4+4 bytes
797        let fxn_id = cur.read_u64::<LittleEndian>()?;
798        let dst = cur.read_u32::<LittleEndian>()?;
799        let src = cur.read_u32::<LittleEndian>()?;
800        out.push(DecodedInstr::UnOp { fxn_id: fxn_id, dst, src });
801      }
802      Some(OpCode::Binop) => {
803        // need 8+4+4+4 bytes
804        let fxn_id = cur.read_u64::<LittleEndian>()?;
805        let dst = cur.read_u32::<LittleEndian>()?;
806        let lhs = cur.read_u32::<LittleEndian>()?;
807        let rhs = cur.read_u32::<LittleEndian>()?;
808        out.push(DecodedInstr::BinOp { fxn_id: fxn_id, dst, lhs, rhs });
809      }
810      Some(OpCode::Ternop) => {
811        // need 8+4+4+4+4 bytes
812        let fxn_id = cur.read_u64::<LittleEndian>()?;
813        let dst = cur.read_u32::<LittleEndian>()?;
814        let a = cur.read_u32::<LittleEndian>()?;
815        let b = cur.read_u32::<LittleEndian>()?;
816        let c = cur.read_u32::<LittleEndian>()?;
817        out.push(DecodedInstr::TernOp { fxn_id: fxn_id, dst, a, b, c });
818      }
819      Some(OpCode::Quadop) => {
820        // need 8+4+4+4+4+4 bytes
821        let fxn_id = cur.read_u64::<LittleEndian>()?;
822        let dst = cur.read_u32::<LittleEndian>()?;
823        let a = cur.read_u32::<LittleEndian>()?;
824        let b = cur.read_u32::<LittleEndian>()?;
825        let c = cur.read_u32::<LittleEndian>()?;
826        let d = cur.read_u32::<LittleEndian>()?;
827        out.push(DecodedInstr::QuadOp { fxn_id: fxn_id, dst, a, b, c, d });
828      }
829      Some(OpCode::VarArg) => {
830        // need at least 8+4+4 bytes
831        let fxn_id = cur.read_u64::<LittleEndian>()?;
832        let dst = cur.read_u32::<LittleEndian>()?;
833        let arg_count = cur.read_u32::<LittleEndian>()? as usize;
834        let mut args = Vec::with_capacity(arg_count);
835        for _ in 0..arg_count {
836          let a = cur.read_u32::<LittleEndian>()?;
837          args.push(a);
838        }
839        out.push(DecodedInstr::VarArg { fxn_id: fxn_id, dst, args });
840      }
841      unknown => {
842        return Err(MechError {
843          file: file!().to_string(),
844          tokens: vec![],
845          msg: format!("Unknown opcode: {:?}", unknown),
846          id: line!(),
847          kind: MechErrorKind::GenericError("Unknown opcode".to_string()),
848        });
849      }
850    }
851  }
852  Ok(out)
853}
854
855impl DecodedInstr {
856  pub fn write_to<W: Write>(&self, w: &mut W) -> MResult<()> {
857    match self {
858      DecodedInstr::ConstLoad { dst, const_id } => {
859        w.write_u8(OpCode::ConstLoad as u8)?;
860        w.write_u32::<LittleEndian>(*dst)?;
861        w.write_u32::<LittleEndian>(*const_id)?;
862      }
863      DecodedInstr::NullOp { fxn_id, dst } => {
864        w.write_u8(OpCode::NullOp as u8)?;
865        w.write_u64::<LittleEndian>(*fxn_id)?;
866        w.write_u32::<LittleEndian>(*dst)?;
867      }
868      DecodedInstr::UnOp { fxn_id, dst, src } => {
869        w.write_u8(OpCode::Unop as u8)?;
870        w.write_u64::<LittleEndian>(*fxn_id)?;
871        w.write_u32::<LittleEndian>(*dst)?;
872        w.write_u32::<LittleEndian>(*src)?;
873      }
874      DecodedInstr::BinOp { fxn_id, dst, lhs, rhs } => {
875        w.write_u8(OpCode::Binop as u8)?;
876        w.write_u64::<LittleEndian>(*fxn_id)?;
877        w.write_u32::<LittleEndian>(*dst)?;
878        w.write_u32::<LittleEndian>(*lhs)?;
879        w.write_u32::<LittleEndian>(*rhs)?;
880      }
881      DecodedInstr::TernOp { fxn_id, dst, a, b, c } => {
882        w.write_u8(OpCode::Ternop as u8)?;
883        w.write_u64::<LittleEndian>(*fxn_id)?;
884        w.write_u32::<LittleEndian>(*dst)?;
885        w.write_u32::<LittleEndian>(*a)?;
886        w.write_u32::<LittleEndian>(*b)?;
887        w.write_u32::<LittleEndian>(*c)?;
888      }
889      DecodedInstr::QuadOp { fxn_id, dst, a, b, c, d } => {
890        w.write_u8(OpCode::Quadop as u8)?;
891        w.write_u64::<LittleEndian>(*fxn_id)?;
892        w.write_u32::<LittleEndian>(*dst)?;
893        w.write_u32::<LittleEndian>(*a)?;
894        w.write_u32::<LittleEndian>(*b)?;
895        w.write_u32::<LittleEndian>(*c)?;
896        w.write_u32::<LittleEndian>(*d)?;
897      }
898      DecodedInstr::VarArg { fxn_id, dst, args } => {
899        w.write_u8(OpCode::VarArg as u8)?;
900        w.write_u64::<LittleEndian>(*fxn_id)?;
901        w.write_u32::<LittleEndian>(*dst)?;
902        w.write_u32::<LittleEndian>(args.len() as u32)?;
903        for a in args {
904          w.write_u32::<LittleEndian>(*a)?;
905        }
906      }
907      DecodedInstr::Ret { src } => {
908        w.write_u8(OpCode::Return as u8)?;
909        w.write_u32::<LittleEndian>(*src)?;
910      }
911      DecodedInstr::Unknown { opcode, rest } => {
912        w.write_u8(*opcode)?;
913        w.write_all(rest)?;
914      }
915    }
916    Ok(())
917  }
918}