mech_core/program/
program.rs

1use crate::*;
2use super::*;
3use byteorder::{LittleEndian, WriteBytesExt, ReadBytesExt};
4use std::io::Write;
5use std::io::{self, SeekFrom, Seek, Cursor};
6#[cfg(not(feature = "no_std"))]
7use std::fs::File;
8#[cfg(not(feature = "no_std"))]
9use std::path::Path;
10#[cfg(feature = "matrix")]
11use crate::matrix::Matrix;
12
13#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
14#[derive(Debug, Clone, Eq, PartialEq)]
15pub struct ParsedProgram {
16  pub header: ByteCodeHeader,
17  pub features: Vec<u64>,
18  pub types: TypeSection,
19  pub const_entries: Vec<ParsedConstEntry>,
20  pub const_blob: Vec<u8>,
21  pub instr_bytes: Vec<u8>,
22  pub symbols: HashMap<u64, Register>,
23  pub mutable_symbols: HashSet<u64>,
24  pub instrs: Vec<DecodedInstr>,
25  pub dictionary: HashMap<u64, String>,
26}
27
28impl ParsedProgram {
29
30 pub fn to_bytes(&self) -> MResult<Vec<u8>> {
31    let mut buf = Cursor::new(Vec::<u8>::new());
32
33    // 1. Header
34    self.header.write_to(&mut buf)?;
35
36    // 2. Features
37    buf.write_u32::<LittleEndian>(self.features.len() as u32)?;
38    for f in &self.features {
39      buf.write_u64::<LittleEndian>(*f)?;
40    }
41
42    // 3. Types
43    self.types.write_to(&mut buf)?;
44
45    // 4. Const entries
46    for entry in &self.const_entries {
47      entry.write_to(&mut buf)?;
48    }
49
50    // 5. Const blob
51    if !self.const_blob.is_empty() {
52      buf.write_all(&self.const_blob)?;
53    }
54
55    // 6. Symbols
56    for (id, reg) in &self.symbols {
57      let mutable = self.mutable_symbols.contains(id);
58      let entry = SymbolEntry::new(*id, mutable, *reg);
59      entry.write_to(&mut buf)?;
60    }
61
62    // 7. Instructions
63    for ins in &self.instrs {
64      ins.write_to(&mut buf)?;
65    }
66
67    // 8. Dictionary
68    for (id, name) in &self.dictionary {
69      let dict_entry = DictEntry::new(*id, name);
70      dict_entry.write_to(&mut buf)?;
71    }
72
73    // 9. CRC32 trailer
74    let bytes_so_far = buf.get_ref().as_slice();
75    let checksum = crc32fast::hash(bytes_so_far);
76    buf.write_u32::<LittleEndian>(checksum)?;
77
78    Ok(buf.into_inner())
79  }
80
81  pub fn from_bytes(bytes: &[u8]) -> MResult<ParsedProgram> {
82    load_program_from_bytes(bytes)
83  }
84
85  pub fn validate(&self) -> MResult<()> {
86    // Check magic number
87    if !self.header.validate_magic(b"MECH") {
88      return Err(MechError {file: file!().to_string(), tokens: vec![], msg: "Invalid magic number".to_string(), id: line!(), kind: MechErrorKind::GenericError("Invalid magic number".to_string())});
89    }
90    // Check version number
91    if self.header.version != 1 {
92      return Err(MechError {file: file!().to_string(), tokens: vec![], msg: "Unsupported bytecode version".to_string(), id: line!(), kind: MechErrorKind::GenericError("Unsupported bytecode version".to_string())});
93    }
94    // Check mech version
95    if self.header.mech_ver != parse_version_to_u16(env!("CARGO_PKG_VERSION")).unwrap() {
96      return Err(MechError {file: file!().to_string(), tokens: vec![], msg: "Incompatible Mech version".to_string(), id: line!(), kind: MechErrorKind::GenericError("Incompatible Mech version".to_string())});
97    }
98    Ok(())
99  }
100
101  pub fn decode_const_entries(&self) -> MResult<Vec<Value>> {
102    let mut out = Vec::with_capacity(self.const_entries.len());
103    let blob_len = self.const_blob.len() as u64;
104
105    for const_entry in &self.const_entries {
106      // Only support Inline encoding for now
107      if const_entry.enc != ConstEncoding::Inline as u8 {
108        return Err(MechError {file: file!().to_string(),tokens: vec![],msg: "Unsupported constant encoding".to_string(),id: line!(),kind: MechErrorKind::GenericError("Unsupported constant encoding".to_string())});
109      }
110
111      // Bounds check
112      if const_entry.offset.checked_add(const_entry.length).is_none() {
113          return Err(MechError {file: file!().to_string(),tokens: vec![],msg: "Constant entry out of bounds".to_string(),id: line!(),kind: MechErrorKind::GenericError("Constant entry out of bounds".to_string())});
114      }
115      let end = const_entry.offset + const_entry.length;
116      if end > blob_len {
117        return Err(MechError {file: file!().to_string(),tokens: vec![],msg: "Constant entry out of bounds".to_string(),id: line!(),kind: MechErrorKind::GenericError("Constant entry out of bounds".to_string())});
118      }
119
120      // Alignment check (if your alignment semantics differ, change this)
121      if !check_alignment(const_entry.offset, const_entry.align) {
122        return Err(MechError {file: file!().to_string(),tokens: vec![],msg: "Constant entry alignment error".to_string(),id: line!(),kind: MechErrorKind::GenericError("Constant entry alignment error".to_string())});
123      }
124
125      // Copy bytes out (we clone into Vec<u8> to own data)
126      let start = const_entry.offset as usize;
127      let len = const_entry.length as usize;
128      let data = self.const_blob[start .. start + len].to_vec();
129
130      // get the type from the id
131      let ty = &self.types.entries[const_entry.type_id as usize];
132
133      let val: Value = match ty.tag {
134        #[cfg(feature = "bool")]
135        TypeTag::Bool => {
136          if data.len() != 1 {
137            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Bool const entry must be 1 byte".to_string(), id: line!(), kind: MechErrorKind::GenericError("Bool const entry must be 1 byte".to_string())});
138          }
139          let value = data[0] != 0;
140          Value::Bool(Ref::new(value))
141        },
142        #[cfg(feature = "string")]
143        TypeTag::String => {
144          if data.len() < 4 {
145            return Err(MechError {file: file!().to_string(),tokens: vec![],msg: "String constant too short".to_string(),id: line!(),kind: MechErrorKind::GenericError("String constant too short".to_string()) });
146          }
147          let s = String::from_le(&data);
148          Value::String(Ref::new(s))
149        }
150        #[cfg(feature = "u8")]
151        TypeTag::U8 => {
152          if data.len() != 1 {
153            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "U8 const entry must be 1 byte".to_string(), id: line!(), kind: MechErrorKind::GenericError("U8 const entry must be 1 byte".to_string())});
154          }
155          let value = data[0];
156          Value::U8(Ref::new(value))
157        },
158        #[cfg(feature = "u16")]
159        TypeTag::U16 => {
160          if data.len() != 2 {
161            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "U16 const entry must be 2 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("U16 const entry must be 2 bytes".to_string())});
162          }
163          let value = u16::from_le_bytes(data.try_into().unwrap());
164          Value::U16(Ref::new(value))
165        },
166        #[cfg(feature = "u32")]
167        TypeTag::U32 => {
168          if data.len() != 4 {
169            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "U32 const entry must be 4 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("U32 const entry must be 4 bytes".to_string())});
170          }
171          let value = u32::from_le_bytes(data.try_into().unwrap());
172          Value::U32(Ref::new(value))
173        },
174        #[cfg(feature = "u64")]
175        TypeTag::U64 => {
176          if data.len() != 8 {
177            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "U64 const entry must be 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("U64 const entry must be 8 bytes".to_string())});
178          }
179          let value = u64::from_le_bytes(data.try_into().unwrap());
180          Value::U64(Ref::new(value))
181        },
182        #[cfg(feature = "u128")]
183        TypeTag::U128 => {
184          if data.len() != 16 {
185            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "U128 const entry must be 16 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("U128 const entry must be 16 bytes".to_string())});
186          }
187          let value = u128::from_le_bytes(data.try_into().unwrap());
188          Value::U128(Ref::new(value))
189        },
190        #[cfg(feature = "i8")]
191        TypeTag::I8 => {
192          if data.len() != 1 {
193            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "I8 const entry must be 1 byte".to_string(), id: line!(), kind: MechErrorKind::GenericError("I8 const entry must be 1 byte".to_string())});
194          }
195          let value = data[0] as i8;
196          Value::I8(Ref::new(value))
197        },
198        #[cfg(feature = "i16")]
199        TypeTag::I16 => {
200          if data.len() != 2 {
201            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "I16 const entry must be 2 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("I16 const entry must be 2 bytes".to_string())});
202          }
203          let value = i16::from_le_bytes(data.try_into().unwrap());
204          Value::I16(Ref::new(value))
205        },
206        #[cfg(feature = "i32")]
207        TypeTag::I32 => {
208          if data.len() != 4 {
209            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "I32 const entry must be 4 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("I32 const entry must be 4 bytes".to_string())});
210          }
211          let value = i32::from_le_bytes(data.try_into().unwrap());
212          Value::I32(Ref::new(value))
213        },
214        #[cfg(feature = "i64")]
215        TypeTag::I64 => {
216          if data.len() != 8 {
217            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "I64 const entry must be 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("I64 const entry must be 8 bytes".to_string())});
218          }
219          let value = i64::from_le_bytes(data.try_into().unwrap());
220          Value::I64(Ref::new(value))
221        }
222        #[cfg(feature = "i128")]
223        TypeTag::I128 => {
224          if data.len() != 16 {
225            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "I128 const entry must be 16 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("I128 const entry must be 16 bytes".to_string())});
226          }
227          let value = i128::from_le_bytes(data.try_into().unwrap());
228          Value::I128(Ref::new(value))
229        },
230        #[cfg(feature = "f32")]
231        TypeTag::F32 => {
232          if data.len() != 4 {
233            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "F32 const entry must be 4 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("F32 const entry must be 4 bytes".to_string())});
234          }
235          let value = f32::from_le_bytes(data.try_into().unwrap());
236          Value::F32(Ref::new(F32::new(value)))
237        },
238        #[cfg(feature = "f64")]
239        TypeTag::F64 => {
240          if data.len() != 8 {
241            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "F64 const entry must be 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("F64 const entry must be 8 bytes".to_string())});
242          }
243          let value = f64::from_le_bytes(data.try_into().unwrap());
244          Value::F64(Ref::new(F64::new(value)))
245        },
246        #[cfg(feature = "complex")]
247        TypeTag::C64 => {
248          if data.len() != 16 {
249            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "C64 const entry must be 8 bytes real + 8 bytes imag".to_string(), id: line!(), kind: MechErrorKind::GenericError("C64 const entry must be 8 bytes real + 8 bytes imag".to_string())});
250          }
251          let real = f64::from_le_bytes(data[0..8].try_into().unwrap());
252          let imag = f64::from_le_bytes(data[8..16].try_into().unwrap());
253          Value::C64(Ref::new(C64::new(real, imag)))
254        },
255        #[cfg(feature = "rational")]
256        TypeTag::R64 => {
257          if data.len() != 16 {
258            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "R64 const entry must be 8 bytes numerator + 8 bytes denominator".to_string(), id: line!(), kind: MechErrorKind::GenericError("R64 const entry must be 8 bytes numerator + 8 bytes denominator".to_string())});
259          }
260          let numer = i64::from_le_bytes(data[0..8].try_into().unwrap());
261          let denom = i64::from_le_bytes(data[8..16].try_into().unwrap());
262          Value::R64(Ref::new(R64::new(numer, denom)))
263        },
264        #[cfg(all(feature = "matrix", feature = "string"))]
265        TypeTag::MatrixString => {
266          if data.len() < 8 {
267            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
268          }
269          let matrix = Matrix::<String>::from_le(&data);
270          Value::MatrixString(matrix)
271        }
272        #[cfg(all(feature = "matrix", feature = "bool"))]
273        TypeTag::MatrixBool => {
274          if data.len() < 1 {
275            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
276          }
277          let matrix = Matrix::<bool>::from_le(&data);
278          Value::MatrixBool(matrix)
279        }
280        #[cfg(all(feature = "matrix", feature = "u8"))]
281        TypeTag::MatrixU8 => {
282          if data.len() < 1 {
283            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
284          }
285          let matrix = Matrix::<u8>::from_le(&data);
286          Value::MatrixU8(matrix)
287        }
288        #[cfg(all(feature = "matrix", feature = "i8"))]
289        TypeTag::MatrixI8 => {
290          if data.len() < 8 {
291            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
292          }
293          let matrix = Matrix::<i8>::from_le(&data);
294          Value::MatrixI8(matrix)
295        }
296        #[cfg(all(feature = "matrix", feature = "f32"))]
297        TypeTag::MatrixF32 => {
298          if data.len() < 8 {
299            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
300          }
301          let matrix = Matrix::<F32>::from_le(&data);
302          Value::MatrixF32(matrix)
303        }
304        #[cfg(all(feature = "matrix", feature = "f64"))]
305        TypeTag::MatrixF64 => {
306          if data.len() < 8 {
307            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
308          }
309          let matrix = Matrix::<F64>::from_le(&data);
310          Value::MatrixF64(matrix)
311        }
312        #[cfg(all(feature = "matrix", feature = "u16"))]
313        TypeTag::MatrixU16 => {
314          if data.len() < 8 {
315            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
316          }
317          let matrix = Matrix::<u16>::from_le(&data);
318          Value::MatrixU16(matrix)
319        },
320        #[cfg(all(feature = "matrix", feature = "u32"))]
321        TypeTag::MatrixU32 => {
322          if data.len() < 8 {
323            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
324          }
325          let matrix = Matrix::<u32>::from_le(&data);
326          Value::MatrixU32(matrix)
327        },
328        #[cfg(all(feature = "matrix", feature = "u64"))]
329        TypeTag::MatrixU64 => {
330          if data.len() < 8 {
331            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
332          }
333          let matrix = Matrix::<u64>::from_le(&data);
334          Value::MatrixU64(matrix)
335        },
336        #[cfg(all(feature = "matrix", feature = "u128"))]
337        TypeTag::MatrixU128 => {
338          if data.len() < 8 {
339            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
340          }
341          let matrix = Matrix::<u128>::from_le(&data);
342          Value::MatrixU128(matrix)
343        },
344        #[cfg(all(feature = "matrix", feature = "i16"))]
345        TypeTag::MatrixI16 => {
346          if data.len() < 8 {
347            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
348          }
349          let matrix = Matrix::<i16>::from_le(&data);
350          Value::MatrixI16(matrix)
351        },
352        #[cfg(all(feature = "matrix", feature = "i32"))]
353        TypeTag::MatrixI32 => {
354          if data.len() < 8 {
355            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
356          }
357          let matrix = Matrix::<i32>::from_le(&data);
358          Value::MatrixI32(matrix)
359        },
360        #[cfg(all(feature = "matrix", feature = "i64"))]
361        TypeTag::MatrixI64 => {
362          if data.len() < 8 {
363            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
364          }
365          let matrix = Matrix::<i64>::from_le(&data);
366          Value::MatrixI64(matrix)
367        },
368        #[cfg(all(feature = "matrix", feature = "i128"))]
369        TypeTag::MatrixI128 => {
370          if data.len() < 8 {
371            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
372          }
373          let matrix = Matrix::<i128>::from_le(&data);
374          Value::MatrixI128(matrix)
375        },
376        #[cfg(all(feature = "matrix", feature = "c64"))]
377        TypeTag::MatrixC64 => {
378          if data.len() < 8 {
379            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
380          }
381          let matrix = Matrix::<C64>::from_le(&data);
382          Value::MatrixC64(matrix)
383        },
384        #[cfg(all(feature = "matrix", feature = "r64"))]
385        TypeTag::MatrixR64 => {
386          if data.len() < 8 {
387            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
388          }
389          let matrix = Matrix::<R64>::from_le(&data);
390          Value::MatrixR64(matrix)
391        },
392        #[cfg(feature = "matrix")]
393        TypeTag::MatrixIndex => {
394          if data.len() < 8 {
395            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Matrix const entry must be at least 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Matrix const entry must be at least 8 bytes".to_string())});
396          }
397          let matrix = Matrix::<usize>::from_le(&data);
398          Value::MatrixIndex(matrix)
399        },
400        TypeTag::Index => {
401          if data.len() != 8 {
402            return Err(MechError{file: file!().to_string(), tokens: vec![], msg: "Index const entry must be 8 bytes".to_string(), id: line!(), kind: MechErrorKind::GenericError("Index const entry must be 8 bytes".to_string())});
403          }
404          let value = u64::from_le_bytes(data.try_into().unwrap()) as usize;
405          Value::Index(Ref::new(value))
406        },
407        #[cfg(feature = "set")]
408        TypeTag::Set => {
409          if data.len() < 4 {
410            return Err(MechError {file: file!().to_string(),tokens: vec![],msg: "Set const entry must be at least 4 bytes (num elements)".to_string(),id: line!(),kind: MechErrorKind::GenericError("Set const entry must be at least 4 bytes (num elements)".to_string()),});
411          }
412          let set = MechSet::from_le(&data);
413          Value::Set(Ref::new(set))
414        },
415        #[cfg(feature = "table")]
416        TypeTag::Table => {
417          if data.len() < 8 {
418            return Err(MechError {file: file!().to_string(),tokens: vec![],msg: "Table const entry must be at least 8 bytes (rows + cols)".to_string(),id: line!(),kind: MechErrorKind::GenericError("Table const entry must be at least 8 bytes (rows + cols)".to_string()),});
419          }
420          let table = MechTable::from_le(&data);
421          Value::Table(Ref::new(table))
422        }
423        _ => return Err(MechError{file: file!().to_string(), tokens: vec![], msg: format!("Unsupported constant type {:?}", ty.tag), id: line!(), kind: MechErrorKind::GenericError(format!("Unsupported constant type {:?}", ty.tag))}),
424      };
425      out.push(val);
426    }
427    Ok(out)
428  }
429}
430
431fn check_alignment(offset: u64, align: u8) -> bool {
432  // treat align==0 as invalid
433  let align_val = align as u64;
434  if align_val == 0 { return false; }
435  (offset % align_val) == 0
436}
437
438// Load Program
439// ----------------------------------------------------------------------------
440
441#[cfg(not(feature = "no_std"))]
442pub fn load_program_from_file(path: impl AsRef<Path>) -> MResult<ParsedProgram> {
443  let path = path.as_ref();
444  let mut f = File::open(path)?;
445
446  // total length for bounds checks
447  let total_len = f.metadata()?.len();
448
449  // Verify CRC trailer first; ensures fully readable, too.
450  verify_crc_trailer_seek(&mut f, total_len)?;
451
452  // Parse from the start
453  f.seek(SeekFrom::Start(0))?;
454  load_program_from_reader(&mut f, total_len)
455}
456
457pub fn load_program_from_bytes(bytes: &[u8]) -> MResult<ParsedProgram> {
458  let total_len = bytes.len() as u64;
459
460  let mut cur = Cursor::new(bytes);
461  verify_crc_trailer_seek(&mut cur, total_len)?;
462
463  // Parse from the start
464  cur.seek(SeekFrom::Start(0))?;
465  load_program_from_reader(&mut cur, total_len)
466}
467
468fn load_program_from_reader<R: Read + Seek>(r: &mut R, total_len: u64) -> MResult<ParsedProgram> {
469  r.seek(SeekFrom::Start(0))?;
470  let mut header_buf = vec![0u8; ByteCodeHeader::HEADER_SIZE];
471  r.read_exact(&mut header_buf)?;
472
473  // 1) read header blob
474  let mut header_cursor = Cursor::new(&header_buf[..]);
475  let header = ByteCodeHeader::read_from(&mut header_cursor)?;
476
477  // quick magic check
478  if !header.validate_magic(&(*b"MECH")) {
479    return Err(MechError {
480      file: file!().to_string(),
481      tokens: vec![],
482      msg: format!("Invalid magic in bytecode header: expected 'MECH', got {:?}", header.magic),
483      id: line!(),
484      kind: MechErrorKind::GenericError("Invalid magic".to_string()),
485    });
486  }
487
488  // 2. read features
489  let mut features = Vec::new();
490  if header.feature_off != 0 && header.feature_off + 4 <= total_len.saturating_sub(4) {
491    r.seek(SeekFrom::Start(header.feature_off))?;
492    let c = r.read_u32::<LittleEndian>()? as usize;
493    for _ in 0..c {
494      let v = r.read_u64::<LittleEndian>()?;
495      features.push(v);
496    }
497  }
498
499  // 3. read types
500  let mut types = TypeSection::new();
501  if header.types_off != 0 && header.types_off + 4 <= total_len.saturating_sub(4) {
502    r.seek(SeekFrom::Start(header.types_off))?;
503    let types_count = r.read_u32::<LittleEndian>()? as usize;
504    for _ in 0..types_count {
505      let tag = r.read_u16::<LittleEndian>()?;
506      let _reserved = r.read_u16::<LittleEndian>()?; // reserved, always 0
507      let _version = r.read_u32::<LittleEndian>()?; // version, always 1
508      let bytes_len = r.read_u32::<LittleEndian>()? as usize;
509      let mut bytes = vec![0u8; bytes_len];
510      r.read_exact(&mut bytes)?;
511      if let Some(tag) = TypeTag::from_u16(tag) {
512        types.entries.push(TypeEntry { tag, bytes });
513      } else {
514        return Err(MechError {
515          file: file!().to_string(),
516          tokens: vec![],
517          msg: format!("Unknown type tag: {}", tag),
518          id: line!(),
519          kind: MechErrorKind::GenericError("Unknown type tag".to_string()),
520        });
521      }
522    }
523  }
524
525  // 4. read const table
526  let mut const_entries = Vec::new();
527  if header.const_tbl_off != 0 && header.const_tbl_len > 0 {
528    r.seek(SeekFrom::Start(header.const_tbl_off))?;
529    let mut tbl_bytes = vec![0u8; header.const_tbl_len as usize];
530    r.read_exact(&mut tbl_bytes)?;
531    let cur = Cursor::new(&tbl_bytes[..]);
532    const_entries = parse_const_entries(cur, header.const_count as usize)?;
533  }
534
535  // read const blob
536  let mut const_blob = vec![];
537  if header.const_blob_off != 0 && header.const_blob_len > 0 {
538    r.seek(SeekFrom::Start(header.const_blob_off))?;
539    const_blob.resize(header.const_blob_len as usize, 0);
540    r.read_exact(&mut const_blob)?;
541  }
542
543  // 5. read symbols
544  let mut symbols = HashMap::new();
545  let mut mutable_symbols = HashSet::new();
546  if header.symbols_off != 0 && header.symbols_len > 0 {
547    r.seek(SeekFrom::Start(header.symbols_off))?;
548    let mut symbols_bytes = vec![0u8; header.symbols_len as usize];
549    r.read_exact(&mut symbols_bytes)?;
550    let mut cur = Cursor::new(&symbols_bytes[..]);
551    for _ in 0..(header.symbols_len / 12) {
552      let id = cur.read_u64::<LittleEndian>()?;
553      let mutable = cur.read_u8()? != 0;
554      let reg = cur.read_u32::<LittleEndian>()?;
555      symbols.insert(id, reg);
556      if mutable {
557        mutable_symbols.insert(id);
558      }
559    }
560  }
561
562  // 6. read instr bytes
563  let mut instr_bytes = vec![];
564  if header.instr_off != 0 && header.instr_len > 0 {
565    r.seek(SeekFrom::Start(header.instr_off))?;
566    instr_bytes.resize(header.instr_len as usize, 0);
567    r.read_exact(&mut instr_bytes)?;
568  }
569
570  // 7. read dictionary
571  let mut dictionary = HashMap::new();
572  if header.dict_off != 0 && header.dict_len > 0 {
573    r.seek(SeekFrom::Start(header.dict_off))?;
574    let mut dict_bytes = vec![0u8; header.dict_len as usize];
575    r.read_exact(&mut dict_bytes)?;
576    let mut cur = Cursor::new(&dict_bytes[..]);
577    while cur.position() < dict_bytes.len() as u64 {
578      let id = cur.read_u64::<LittleEndian>()?;
579      let name_len = cur.read_u32::<LittleEndian>()? as usize;
580      let mut name_bytes = vec![0u8; name_len];
581      cur.read_exact(&mut name_bytes)?;
582      let name = String::from_utf8(name_bytes).map_err(|_| MechError {
583        file: file!().to_string(),
584        tokens: vec![],
585        msg: "Invalid UTF-8 in dictionary entry".to_string(),
586        id: line!(),
587        kind: MechErrorKind::GenericError("Invalid UTF-8".to_string()),
588      })?;
589      dictionary.insert(id, name);
590    }
591  }
592
593  // decode instructions
594  let instrs = decode_instructions(Cursor::new(&instr_bytes[..]))?;
595  
596  Ok(ParsedProgram { header, features, types, const_entries, const_blob, instr_bytes, symbols, mutable_symbols, instrs, dictionary })
597}
598
599pub fn decode_version_from_u16(v: u16) -> (u16, u16, u16) {
600  let major = (v >> 13) & 0b111;
601  let minor = (v >> 8) & 0b1_1111;
602  let patch = v & 0xFF;
603  (major, minor, patch)
604}
605
606#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
607#[derive(Debug, Clone, Eq, PartialEq)]
608pub struct ParsedConstEntry {
609  pub type_id: u32,
610  pub enc: u8,
611  pub align: u8,
612  pub flags: u8,
613  pub reserved: u8,
614  pub offset: u64,
615  pub length: u64,
616}
617
618
619impl ParsedConstEntry {
620  pub fn write_to<W: Write>(&self, w: &mut W) -> MResult<()> {
621    // type_id (u32)
622    w.write_u32::<LittleEndian>(self.type_id)
623      .map_err(|e| MechError {
624        file: file!().to_string(),
625        tokens: vec![],
626        msg: e.to_string(),
627        id: line!(),
628        kind: MechErrorKind::GenericError(e.to_string()),
629      })?;
630    // enc, align, flags, reserved (u8 each)
631    w.write_u8(self.enc).map_err(|e| MechError {
632      file: file!().to_string(),
633      tokens: vec![],
634      msg: e.to_string(),
635      id: line!(),
636      kind: MechErrorKind::GenericError(e.to_string()),
637    })?;
638    w.write_u8(self.align).map_err(|e| MechError {
639      file: file!().to_string(),
640      tokens: vec![],
641      msg: e.to_string(),
642      id: line!(),
643      kind: MechErrorKind::GenericError(e.to_string()),
644    })?;
645    w.write_u8(self.flags).map_err(|e| MechError {
646      file: file!().to_string(),
647      tokens: vec![],
648      msg: e.to_string(),
649      id: line!(),
650      kind: MechErrorKind::GenericError(e.to_string()),
651    })?;
652    w.write_u8(self.reserved).map_err(|e| MechError {
653      file: file!().to_string(),
654      tokens: vec![],
655      msg: e.to_string(),
656      id: line!(),
657      kind: MechErrorKind::GenericError(e.to_string()),
658    })?;
659    // offset (u64)
660    w.write_u64::<LittleEndian>(self.offset).map_err(|e| MechError {
661      file: file!().to_string(),
662      tokens: vec![],
663      msg: e.to_string(),
664      id: line!(),
665      kind: MechErrorKind::GenericError(e.to_string()),
666    })?;
667    // length (u64)
668    w.write_u64::<LittleEndian>(self.length).map_err(|e| MechError {
669      file: file!().to_string(),
670      tokens: vec![],
671      msg: e.to_string(),
672      id: line!(),
673      kind: MechErrorKind::GenericError(e.to_string()),
674    })?;
675
676    Ok(())
677  }
678}
679
680fn parse_const_entries(mut cur: Cursor<&[u8]>, count: usize) -> io::Result<Vec<ParsedConstEntry>> {
681  let mut out = Vec::with_capacity(count);
682  for _ in 0..count {
683    let type_id = cur.read_u32::<LittleEndian>()?;
684    let enc = cur.read_u8()?;
685    let align = cur.read_u8()?;
686    let flags = cur.read_u8()?;
687    let reserved = cur.read_u8()?;
688    let offset = cur.read_u64::<LittleEndian>()?;
689    let length = cur.read_u64::<LittleEndian>()?;
690    out.push(ParsedConstEntry { type_id, enc, align, flags, reserved, offset, length });
691  }
692  Ok(out)
693}
694
695pub fn verify_crc_trailer_seek<R: Read + Seek>(r: &mut R, total_len: u64) -> MResult<()> {
696  if total_len < 4 {
697    return Err(MechError {
698      file: file!().to_string(),
699      tokens: vec![],
700      msg: "File too short to contain CRC trailer".to_string(),
701      id: line!(),
702      kind: MechErrorKind::GenericError("File too short".to_string()),
703    });
704  }
705
706  // Read expected CRC from the last 4 bytes
707  r.seek(SeekFrom::Start(total_len - 4))?;
708  let expected_crc = r.read_u32::<LittleEndian>()?;
709
710  // Compute CRC over the prefix (everything except the last 4 bytes).
711  r.seek(SeekFrom::Start(0))?;
712  let payload_len = (total_len - 4) as usize;
713  let mut buf = vec![0u8; payload_len];
714  r.read_exact(&mut buf)?;
715
716  let file_crc = crc32fast::hash(&buf);
717  if file_crc != expected_crc {
718    Err(MechError {
719      file: file!().to_string(),
720      tokens: vec![],
721      msg: format!("CRC mismatch: expected {}, got {}", expected_crc, file_crc),
722      id: line!(),
723      kind: MechErrorKind::GenericError("CRC mismatch".to_string()),
724    })
725  } else {
726    Ok(())
727  }
728}
729
730#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
731#[derive(Debug, Clone, Eq, PartialEq)]
732pub enum DecodedInstr {
733  ConstLoad { dst: u32, const_id: u32 },
734  NullOp {fxn_id: u64, dst: u32 },
735  UnOp { fxn_id: u64, dst: u32, src: u32 },
736  BinOp { fxn_id: u64, dst: u32, lhs: u32, rhs: u32 },
737  TernOp { fxn_id: u64, dst: u32, a: u32, b: u32, c: u32 },
738  QuadOp { fxn_id: u64, dst: u32, a: u32, b: u32, c: u32, d: u32 },
739  VarArg { fxn_id: u64, dst: u32, args: Vec<u32> },
740  Ret { src: u32 },
741  Unknown { opcode: u8, rest: Vec<u8> }, // unknown opcode or dynamic form
742}
743
744impl DecodedInstr {
745 pub fn from_u8(num: u8) -> Option<DecodedInstr> {
746    match OpCode::from_u8(num) {
747      Some(OpCode::ConstLoad) => Some(DecodedInstr::ConstLoad { dst: 0, const_id: 0 }),
748      Some(OpCode::NullOp) => Some(DecodedInstr::NullOp { fxn_id: 0, dst: 0 }),
749      Some(OpCode::Unop) => Some(DecodedInstr::UnOp { fxn_id: 0, dst: 0, src: 0 }),
750      Some(OpCode::Binop) => Some(DecodedInstr::BinOp { fxn_id: 0, dst: 0, lhs: 0, rhs: 0 }),
751      Some(OpCode::Ternop) => Some(DecodedInstr::TernOp { fxn_id: 0, dst: 0, a: 0, b: 0, c: 0 }),
752      Some(OpCode::Quadop) => Some(DecodedInstr::QuadOp { fxn_id: 0, dst: 0, a: 0, b: 0, c: 0, d: 0 }),
753      Some(OpCode::VarArg) => Some(DecodedInstr::VarArg { fxn_id: 0, dst: 0, args: vec![] }),
754      Some(OpCode::Return) => Some(DecodedInstr::Ret { src: 0 }),
755      _ => None,
756    }
757  }
758}
759
760fn decode_instructions(mut cur: Cursor<&[u8]>) -> MResult<Vec<DecodedInstr>> {
761  let mut out = Vec::new();
762  while (cur.position() as usize) < cur.get_ref().len() {
763    // read opcode (u64)
764    let pos_before = cur.position();
765    // if remaining < 8, can't read opcode
766    let rem = cur.get_ref().len() - pos_before as usize;
767    if rem < 8 {
768      return Err(MechError {
769        file: file!().to_string(),
770        tokens: vec![],
771        msg: "Truncated instruction: cannot read opcode".to_string(),
772        id: line!(),
773        kind: MechErrorKind::GenericError("Truncated instruction".to_string()),
774      });
775    }
776    let opcode_byte = cur.read_u8()?;
777    match OpCode::from_u8(opcode_byte) {
778      Some(OpCode::ConstLoad) => {
779        // need 4+4 bytes
780        let dst = cur.read_u32::<LittleEndian>()?;
781        let const_id = cur.read_u32::<LittleEndian>()?;
782        out.push(DecodedInstr::ConstLoad { dst, const_id });
783      }
784      Some(OpCode::Return) => {
785        let src = cur.read_u32::<LittleEndian>()?;
786        out.push(DecodedInstr::Ret { src });
787      }
788      Some(OpCode::NullOp) => {
789        // need 8+4 bytes
790        let fxn_id = cur.read_u64::<LittleEndian>()?;
791        let dst = cur.read_u32::<LittleEndian>()?;
792        out.push(DecodedInstr::NullOp { fxn_id: fxn_id, dst });
793      }
794      Some(OpCode::Unop) => {
795        // need 8+4+4 bytes
796        let fxn_id = cur.read_u64::<LittleEndian>()?;
797        let dst = cur.read_u32::<LittleEndian>()?;
798        let src = cur.read_u32::<LittleEndian>()?;
799        out.push(DecodedInstr::UnOp { fxn_id: fxn_id, dst, src });
800      }
801      Some(OpCode::Binop) => {
802        // need 8+4+4+4 bytes
803        let fxn_id = cur.read_u64::<LittleEndian>()?;
804        let dst = cur.read_u32::<LittleEndian>()?;
805        let lhs = cur.read_u32::<LittleEndian>()?;
806        let rhs = cur.read_u32::<LittleEndian>()?;
807        out.push(DecodedInstr::BinOp { fxn_id: fxn_id, dst, lhs, rhs });
808      }
809      Some(OpCode::Ternop) => {
810        // need 8+4+4+4+4 bytes
811        let fxn_id = cur.read_u64::<LittleEndian>()?;
812        let dst = cur.read_u32::<LittleEndian>()?;
813        let a = cur.read_u32::<LittleEndian>()?;
814        let b = cur.read_u32::<LittleEndian>()?;
815        let c = cur.read_u32::<LittleEndian>()?;
816        out.push(DecodedInstr::TernOp { fxn_id: fxn_id, dst, a, b, c });
817      }
818      Some(OpCode::Quadop) => {
819        // need 8+4+4+4+4+4 bytes
820        let fxn_id = cur.read_u64::<LittleEndian>()?;
821        let dst = cur.read_u32::<LittleEndian>()?;
822        let a = cur.read_u32::<LittleEndian>()?;
823        let b = cur.read_u32::<LittleEndian>()?;
824        let c = cur.read_u32::<LittleEndian>()?;
825        let d = cur.read_u32::<LittleEndian>()?;
826        out.push(DecodedInstr::QuadOp { fxn_id: fxn_id, dst, a, b, c, d });
827      }
828      Some(OpCode::VarArg) => {
829        // need at least 8+4+4 bytes
830        let fxn_id = cur.read_u64::<LittleEndian>()?;
831        let dst = cur.read_u32::<LittleEndian>()?;
832        let arg_count = cur.read_u32::<LittleEndian>()? as usize;
833        let mut args = Vec::with_capacity(arg_count);
834        for _ in 0..arg_count {
835          let a = cur.read_u32::<LittleEndian>()?;
836          args.push(a);
837        }
838        out.push(DecodedInstr::VarArg { fxn_id: fxn_id, dst, args });
839      }
840      unknown => {
841        return Err(MechError {
842          file: file!().to_string(),
843          tokens: vec![],
844          msg: format!("Unknown opcode: {:?}", unknown),
845          id: line!(),
846          kind: MechErrorKind::GenericError("Unknown opcode".to_string()),
847        });
848      }
849    }
850  }
851  Ok(out)
852}
853
854impl DecodedInstr {
855  pub fn write_to<W: Write>(&self, w: &mut W) -> MResult<()> {
856    match self {
857      DecodedInstr::ConstLoad { dst, const_id } => {
858        w.write_u8(OpCode::ConstLoad as u8)?;
859        w.write_u32::<LittleEndian>(*dst)?;
860        w.write_u32::<LittleEndian>(*const_id)?;
861      }
862      DecodedInstr::NullOp { fxn_id, dst } => {
863        w.write_u8(OpCode::NullOp as u8)?;
864        w.write_u64::<LittleEndian>(*fxn_id)?;
865        w.write_u32::<LittleEndian>(*dst)?;
866      }
867      DecodedInstr::UnOp { fxn_id, dst, src } => {
868        w.write_u8(OpCode::Unop as u8)?;
869        w.write_u64::<LittleEndian>(*fxn_id)?;
870        w.write_u32::<LittleEndian>(*dst)?;
871        w.write_u32::<LittleEndian>(*src)?;
872      }
873      DecodedInstr::BinOp { fxn_id, dst, lhs, rhs } => {
874        w.write_u8(OpCode::Binop as u8)?;
875        w.write_u64::<LittleEndian>(*fxn_id)?;
876        w.write_u32::<LittleEndian>(*dst)?;
877        w.write_u32::<LittleEndian>(*lhs)?;
878        w.write_u32::<LittleEndian>(*rhs)?;
879      }
880      DecodedInstr::TernOp { fxn_id, dst, a, b, c } => {
881        w.write_u8(OpCode::Ternop as u8)?;
882        w.write_u64::<LittleEndian>(*fxn_id)?;
883        w.write_u32::<LittleEndian>(*dst)?;
884        w.write_u32::<LittleEndian>(*a)?;
885        w.write_u32::<LittleEndian>(*b)?;
886        w.write_u32::<LittleEndian>(*c)?;
887      }
888      DecodedInstr::QuadOp { fxn_id, dst, a, b, c, d } => {
889        w.write_u8(OpCode::Quadop as u8)?;
890        w.write_u64::<LittleEndian>(*fxn_id)?;
891        w.write_u32::<LittleEndian>(*dst)?;
892        w.write_u32::<LittleEndian>(*a)?;
893        w.write_u32::<LittleEndian>(*b)?;
894        w.write_u32::<LittleEndian>(*c)?;
895        w.write_u32::<LittleEndian>(*d)?;
896      }
897      DecodedInstr::VarArg { fxn_id, dst, args } => {
898        w.write_u8(OpCode::VarArg as u8)?;
899        w.write_u64::<LittleEndian>(*fxn_id)?;
900        w.write_u32::<LittleEndian>(*dst)?;
901        w.write_u32::<LittleEndian>(args.len() as u32)?;
902        for a in args {
903          w.write_u32::<LittleEndian>(*a)?;
904        }
905      }
906      DecodedInstr::Ret { src } => {
907        w.write_u8(OpCode::Return as u8)?;
908        w.write_u32::<LittleEndian>(*src)?;
909      }
910      DecodedInstr::Unknown { opcode, rest } => {
911        w.write_u8(*opcode)?;
912        w.write_all(rest)?;
913      }
914    }
915    Ok(())
916  }
917}