mech_core/program/
program.rs

1use crate::*;
2use super::*;
3use byteorder::{LittleEndian, WriteBytesExt, ReadBytesExt};
4use std::io::Write;
5use std::io::{self, SeekFrom, Seek, Cursor};
6#[cfg(not(feature = "no_std"))]
7use std::fs::File;
8#[cfg(not(feature = "no_std"))]
9use std::path::Path;
10#[cfg(feature = "matrix")]
11use crate::matrix::Matrix;
12
13#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
14#[derive(Debug, Clone, Eq, PartialEq)]
15pub struct ParsedProgram {
16  pub header: ByteCodeHeader,
17  pub features: Vec<u64>,
18  pub types: TypeSection,
19  pub const_entries: Vec<ParsedConstEntry>,
20  pub const_blob: Vec<u8>,
21  pub instr_bytes: Vec<u8>,
22  pub symbols: HashMap<u64, Register>,
23  pub mutable_symbols: HashSet<u64>,
24  pub instrs: Vec<DecodedInstr>,
25  pub dictionary: HashMap<u64, String>,
26}
27
28impl ParsedProgram {
29
30 pub fn to_bytes(&self) -> MResult<Vec<u8>> {
31    let mut buf = Cursor::new(Vec::<u8>::new());
32
33    // 1. Header
34    self.header.write_to(&mut buf)?;
35
36    // 2. Features
37    buf.write_u32::<LittleEndian>(self.features.len() as u32)?;
38    for f in &self.features {
39      buf.write_u64::<LittleEndian>(*f)?;
40    }
41
42    // 3. Types
43    self.types.write_to(&mut buf)?;
44
45    // 4. Const entries
46    for entry in &self.const_entries {
47      entry.write_to(&mut buf)?;
48    }
49
50    // 5. Const blob
51    if !self.const_blob.is_empty() {
52      buf.write_all(&self.const_blob)?;
53    }
54
55    // 6. Symbols
56    for (id, reg) in &self.symbols {
57      let mutable = self.mutable_symbols.contains(id);
58      let entry = SymbolEntry::new(*id, mutable, *reg);
59      entry.write_to(&mut buf)?;
60    }
61
62    // 7. Instructions
63    for ins in &self.instrs {
64      ins.write_to(&mut buf)?;
65    }
66
67    // 8. Dictionary
68    for (id, name) in &self.dictionary {
69      let dict_entry = DictEntry::new(*id, name);
70      dict_entry.write_to(&mut buf)?;
71    }
72
73    // 9. CRC32 trailer
74    let bytes_so_far = buf.get_ref().as_slice();
75    let checksum = crc32fast::hash(bytes_so_far);
76    buf.write_u32::<LittleEndian>(checksum)?;
77
78    Ok(buf.into_inner())
79  }
80
81  pub fn from_bytes(bytes: &[u8]) -> MResult<ParsedProgram> {
82    load_program_from_bytes(bytes)
83  }
84
85  pub fn validate(&self) -> MResult<()> {
86    // Check magic number
87    if !self.header.validate_magic(b"MECH") {
88      return Err(
89        MechError2::new(
90          InvalidMagicNumberError,
91          None,
92        ).with_compiler_loc()
93      );
94    }
95
96    // Check version number
97    if self.header.version != 1 {
98      return Err(
99        MechError2::new(
100          UnsupportedBytecodeVersionError,
101          None,
102        ).with_compiler_loc()
103      );
104    }
105
106    // Check mech version
107    if self.header.mech_ver != parse_version_to_u16(env!("CARGO_PKG_VERSION")).unwrap() {
108      return Err(
109        MechError2::new(
110          IncompatibleMechVersionError,
111          None,
112        ).with_compiler_loc()
113      );
114    }
115
116    Ok(())
117  }
118
119  pub fn decode_const_entries(&self) -> MResult<Vec<Value>> {
120    let mut out = Vec::with_capacity(self.const_entries.len());
121    let blob_len = self.const_blob.len() as u64;
122
123    for const_entry in &self.const_entries {
124      // Encoding check
125      if const_entry.enc != ConstEncoding::Inline as u8 {
126        return Err(
127          MechError2::new(
128            UnsupportedConstantEncodingError,
129            None,
130          ).with_compiler_loc()
131        );
132      }
133
134      // Bounds check #1
135      if const_entry.offset.checked_add(const_entry.length).is_none() {
136        return Err(
137          MechError2::new(
138            ConstantEntryOutOfBoundsError,
139            None,
140          ).with_compiler_loc()
141        );
142      }
143
144      // Bounds check #2
145      let end = const_entry.offset + const_entry.length;
146      if end > blob_len {
147        return Err(
148          MechError2::new(
149            ConstantEntryOutOfBoundsError,
150            None,
151          ).with_compiler_loc()
152        );
153      }
154
155      // Alignment check
156      if !check_alignment(const_entry.offset, const_entry.align) {
157        return Err(
158          MechError2::new(
159            ConstantEntryAlignmentError,
160            None,
161          ).with_compiler_loc()
162        );
163      }
164
165      // Copy bytes out (we clone into Vec<u8> to own data)
166      let start = const_entry.offset as usize;
167      let len = const_entry.length as usize;
168      let data = self.const_blob[start .. start + len].to_vec();
169
170      // get the type from the id
171      let ty = &self.types.entries[const_entry.type_id as usize];
172
173      let val: Value = match ty.tag {
174        #[cfg(feature = "bool")]
175        TypeTag::Bool => {
176          if data.len() != 1 {
177            return Err(MechError2::new(ConstantWrongSizeError {expected: 1,found: data.len(),type_name: "Bool",},None,).with_compiler_loc());
178          }
179          let value = data[0] != 0;
180          Value::Bool(Ref::new(value))
181        }
182        #[cfg(feature = "string")]
183        TypeTag::String => {
184          if data.len() < 4 {
185            return Err(MechError2::new(ConstantWrongSizeError {expected: 4, found: data.len(), type_name: "String",}, None,).with_compiler_loc());
186          }
187          let s = String::from_le(&data);
188          Value::String(Ref::new(s))
189        }
190        #[cfg(feature = "u8")]
191        TypeTag::U8 => {
192          if data.len() != 1 { 
193            return Err(MechError2::new(ConstantWrongSizeError { expected: 1, found: data.len(), type_name: "U8" }, None).with_compiler_loc()); 
194          }
195          let value = data[0];
196          Value::U8(Ref::new(value))
197        },
198        #[cfg(feature = "u16")]
199        TypeTag::U16 => {
200          if data.len() != 2 {
201            return Err(MechError2::new(ConstantWrongSizeError { expected: 2, found: data.len(), type_name: "U16" }, None).with_compiler_loc());
202          }
203          let value = u16::from_le_bytes(data.try_into().unwrap());
204          Value::U16(Ref::new(value))
205        },
206        #[cfg(feature = "u32")]
207        TypeTag::U32 => {
208          if data.len() != 4 {
209            return Err(MechError2::new(ConstantWrongSizeError { expected: 4, found: data.len(), type_name: "U32" }, None).with_compiler_loc());
210          }
211          let value = u32::from_le_bytes(data.try_into().unwrap());
212          Value::U32(Ref::new(value))
213        },
214        #[cfg(feature = "u64")]
215        TypeTag::U64 => {
216          if data.len() != 8 {
217            return Err(MechError2::new(ConstantWrongSizeError { expected: 8, found: data.len(), type_name: "U64" }, None).with_compiler_loc());
218          }
219          let value = u64::from_le_bytes(data.try_into().unwrap());
220          Value::U64(Ref::new(value))
221        },
222        #[cfg(feature = "u128")]
223        TypeTag::U128 => {
224          if data.len() != 16 {
225            return Err(MechError2::new(ConstantWrongSizeError { expected: 16, found: data.len(), type_name: "U128" }, None).with_compiler_loc());
226          }
227          let value = u128::from_le_bytes(data.try_into().unwrap());
228          Value::U128(Ref::new(value))
229        },
230        #[cfg(feature = "i8")]
231        TypeTag::I8 => {
232          if data.len() != 1 {
233            return Err(MechError2::new(ConstantWrongSizeError { expected: 1, found: data.len(), type_name: "I8" }, None).with_compiler_loc());
234          }
235          let value = data[0] as i8;
236          Value::I8(Ref::new(value))
237        },
238        #[cfg(feature = "i16")]
239        TypeTag::I16 => {
240          if data.len() != 2 {
241            return Err(MechError2::new(ConstantWrongSizeError { expected: 2, found: data.len(), type_name: "I16" }, None).with_compiler_loc());
242          }
243          let value = i16::from_le_bytes(data.try_into().unwrap());
244          Value::I16(Ref::new(value))
245        },
246        #[cfg(feature = "i32")]
247        TypeTag::I32 => {
248          if data.len() != 4 {
249            return Err(MechError2::new(ConstantWrongSizeError { expected: 4, found: data.len(), type_name: "I32" }, None).with_compiler_loc());
250          }
251          let value = i32::from_le_bytes(data.try_into().unwrap());
252          Value::I32(Ref::new(value))
253        },
254        #[cfg(feature = "i64")]
255        TypeTag::I64 => {
256          if data.len() != 8 {
257            return Err(MechError2::new(ConstantWrongSizeError { expected: 8, found: data.len(), type_name: "I64" }, None).with_compiler_loc());
258          }
259          let value = i64::from_le_bytes(data.try_into().unwrap());
260          Value::I64(Ref::new(value))
261        },
262        #[cfg(feature = "i128")]
263        TypeTag::I128 => {
264          if data.len() != 16 {
265            return Err(MechError2::new(ConstantWrongSizeError { expected: 16, found: data.len(), type_name: "i128" }, None).with_compiler_loc());
266          }
267          let value = i128::from_le_bytes(data.try_into().unwrap());
268          Value::I128(Ref::new(value))
269        },
270        #[cfg(feature = "f32")]
271        TypeTag::F32 => {
272          if data.len() != 4 {
273            return Err(MechError2::new(ConstantWrongSizeError { expected: 4, found: data.len(), type_name: "f32" }, None).with_compiler_loc());
274          }
275          let value = f32::from_le_bytes(data.try_into().unwrap());
276          Value::F32(Ref::new(value))
277        },
278        #[cfg(feature = "f64")]
279        TypeTag::F64 => {
280          if data.len() != 8 {
281            return Err(MechError2::new(ConstantWrongSizeError { expected: 8, found: data.len(), type_name: "f64" }, None).with_compiler_loc());
282          }
283          let value = f64::from_le_bytes(data.try_into().unwrap());
284          Value::F64(Ref::new(value))
285        },
286        #[cfg(feature = "complex")]
287        TypeTag::C64 => {
288          if data.len() != 16 {
289            return Err(MechError2::new(ConstantWrongSizeError { expected: 16, found: data.len(), type_name: "c64" }, None).with_compiler_loc());
290          }
291          let real = f64::from_le_bytes(data[0..8].try_into().unwrap());
292          let imag = f64::from_le_bytes(data[8..16].try_into().unwrap());
293          Value::C64(Ref::new(C64::new(real, imag)))
294        },
295        #[cfg(feature = "rational")]
296        TypeTag::R64 => {
297          if data.len() != 16 {
298            return Err(MechError2::new(ConstantWrongSizeError { expected: 16, found: data.len(), type_name: "r64" }, None).with_compiler_loc());
299          }
300          let numer = i64::from_le_bytes(data[0..8].try_into().unwrap());
301          let denom = i64::from_le_bytes(data[8..16].try_into().unwrap());
302          Value::R64(Ref::new(R64::new(numer, denom)))
303        },
304        #[cfg(all(feature = "matrix", feature = "string"))]
305        TypeTag::MatrixString => {
306          if data.len() < 8 {
307            return Err(MechError2::new(ConstantTooShortError { type_name: "[string]" }, None).with_compiler_loc());
308          }
309          let matrix = Matrix::<String>::from_le(&data);
310          Value::MatrixString(matrix)
311        }
312        #[cfg(all(feature = "matrix", feature = "bool"))]
313        TypeTag::MatrixBool => {
314          if data.len() < 1 {
315            return Err(MechError2::new(ConstantTooShortError { type_name: "[bool]" }, None).with_compiler_loc());
316          }
317          let matrix = Matrix::<bool>::from_le(&data);
318          Value::MatrixBool(matrix)
319        }
320        #[cfg(all(feature = "matrix", feature = "u8"))]
321        TypeTag::MatrixU8 => {
322          if data.len() < 1 {
323            return Err(MechError2::new(ConstantTooShortError { type_name: "[u8]" }, None).with_compiler_loc());
324          }
325          let matrix = Matrix::<u8>::from_le(&data);
326          Value::MatrixU8(matrix)
327        }
328        #[cfg(all(feature = "matrix", feature = "i8"))]
329        TypeTag::MatrixI8 => {
330          if data.len() < 1 {
331            return Err(MechError2::new(ConstantTooShortError { type_name: "[i8]" }, None).with_compiler_loc());
332          }
333          let matrix = Matrix::<i8>::from_le(&data);
334          Value::MatrixI8(matrix)
335        }
336        #[cfg(all(feature = "matrix", feature = "f32"))]
337        TypeTag::MatrixF32 => {
338          if data.len() < 4 {
339            return Err(MechError2::new(ConstantTooShortError { type_name: "[f32]" }, None).with_compiler_loc());
340          }
341          let matrix = Matrix::<f32>::from_le(&data);
342          Value::MatrixF32(matrix)
343        }
344        #[cfg(all(feature = "matrix", feature = "f64"))]
345        TypeTag::MatrixF64 => {
346          if data.len() < 8 {
347            return Err(MechError2::new(ConstantTooShortError { type_name: "[f64]" }, None).with_compiler_loc());
348          }
349          let matrix = Matrix::<f64>::from_le(&data);
350          Value::MatrixF64(matrix)
351        }
352        #[cfg(all(feature = "matrix", feature = "u16"))]
353        TypeTag::MatrixU16 => {
354          if data.len() < 2 {
355            return Err(MechError2::new(ConstantTooShortError { type_name: "[u16]" }, None).with_compiler_loc());
356          }
357          let matrix = Matrix::<u16>::from_le(&data);
358          Value::MatrixU16(matrix)
359        }
360        #[cfg(all(feature = "matrix", feature = "u32"))]
361        TypeTag::MatrixU32 => {
362          if data.len() < 4 {
363            return Err(MechError2::new(ConstantTooShortError { type_name: "[u32]" }, None).with_compiler_loc());
364          }
365          let matrix = Matrix::<u32>::from_le(&data);
366          Value::MatrixU32(matrix)
367        }
368        #[cfg(all(feature = "matrix", feature = "u64"))]
369        TypeTag::MatrixU64 => {
370          if data.len() < 8 {
371            return Err(MechError2::new(ConstantTooShortError { type_name: "[u64]" }, None).with_compiler_loc());
372          }
373          let matrix = Matrix::<u64>::from_le(&data);
374          Value::MatrixU64(matrix)
375        }
376        #[cfg(all(feature = "matrix", feature = "u128"))]
377        TypeTag::MatrixU128 => {
378          if data.len() < 16 {
379            return Err(MechError2::new(ConstantTooShortError { type_name: "[u128]" }, None).with_compiler_loc());
380          }
381          let matrix = Matrix::<u128>::from_le(&data);
382          Value::MatrixU128(matrix)
383        }
384        #[cfg(all(feature = "matrix", feature = "i16"))]
385        TypeTag::MatrixI16 => {
386          if data.len() < 2 {
387            return Err(MechError2::new(ConstantTooShortError { type_name: "[i16]" }, None).with_compiler_loc());
388          }
389          let matrix = Matrix::<i16>::from_le(&data);
390          Value::MatrixI16(matrix)
391        }
392        #[cfg(all(feature = "matrix", feature = "i32"))]
393        TypeTag::MatrixI32 => {
394          if data.len() < 4 {
395            return Err(MechError2::new(ConstantTooShortError { type_name: "[i32]" }, None).with_compiler_loc());
396          }
397          let matrix = Matrix::<i32>::from_le(&data);
398          Value::MatrixI32(matrix)
399        }
400        #[cfg(all(feature = "matrix", feature = "i64"))]
401        TypeTag::MatrixI64 => {
402          if data.len() < 8 {
403            return Err(MechError2::new(ConstantTooShortError { type_name: "[i64]" }, None).with_compiler_loc());
404          }
405          let matrix = Matrix::<i64>::from_le(&data);
406          Value::MatrixI64(matrix)
407        },
408        #[cfg(all(feature = "matrix", feature = "i128"))]
409        TypeTag::MatrixI128 => {
410          if data.len() < 8 {
411            return Err(MechError2::new(ConstantTooShortError { type_name: "[i128]" }, None).with_compiler_loc());
412          }
413          let matrix = Matrix::<i128>::from_le(&data);
414          Value::MatrixI128(matrix)
415        },
416        #[cfg(all(feature = "matrix", feature = "c64"))]
417        TypeTag::MatrixC64 => {
418          if data.len() < 8 {
419            return Err(MechError2::new(ConstantTooShortError { type_name: "[c64]" }, None).with_compiler_loc());
420          }
421          let matrix = Matrix::<C64>::from_le(&data);
422          Value::MatrixC64(matrix)
423        },
424        #[cfg(all(feature = "matrix", feature = "r64"))]
425        TypeTag::MatrixR64 => {
426          if data.len() < 8 {
427            return Err(MechError2::new(ConstantTooShortError { type_name: "[r64]" }, None).with_compiler_loc());
428          }
429          let matrix = Matrix::<R64>::from_le(&data);
430          Value::MatrixR64(matrix)
431        },
432        #[cfg(feature = "matrix")]
433        TypeTag::MatrixIndex => {
434          if data.len() < 8 {
435            return Err(MechError2::new(ConstantTooShortError { type_name: "[ix]" }, None).with_compiler_loc());
436          }
437          let matrix = Matrix::<usize>::from_le(&data);
438          Value::MatrixIndex(matrix)
439        },
440        TypeTag::Index => {
441          if data.len() != 8 {
442            return Err(MechError2::new(ConstantWrongSizeError { expected: 8, found: data.len(), type_name: "Index" }, None).with_compiler_loc());
443          }
444          let value = u64::from_le_bytes(data.try_into().unwrap()) as usize;
445          Value::Index(Ref::new(value))
446        },
447        #[cfg(feature = "set")]
448        TypeTag::Set => {
449          if data.len() < 4 {
450            return Err(MechError2::new(ConstantTooShortError { type_name: "set" }, None).with_compiler_loc());
451          }
452          let set = MechSet::from_le(&data);
453          Value::Set(Ref::new(set))
454        },
455        #[cfg(feature = "table")]
456        TypeTag::Table => {
457          if data.len() < 8 {
458            return Err(MechError2::new(ConstantTooShortError { type_name: "table" }, None).with_compiler_loc());
459          }
460          let table = MechTable::from_le(&data);
461          Value::Table(Ref::new(table))
462        }
463        _ => {
464          return Err(
465            MechError2::new(
466              UnsupportedConstantTypeError { type_tag: ty.tag },
467              None,
468            )
469            .with_compiler_loc()
470          );
471        }    
472      };
473      out.push(val);
474    }
475    Ok(out)
476  }
477}
478
479fn check_alignment(offset: u64, align: u8) -> bool {
480  // treat align==0 as invalid
481  let align_val = align as u64;
482  if align_val == 0 { return false; }
483  (offset % align_val) == 0
484}
485
486// Load Program
487// ----------------------------------------------------------------------------
488
489#[cfg(not(feature = "no_std"))]
490pub fn load_program_from_file(path: impl AsRef<Path>) -> MResult<ParsedProgram> {
491  let path = path.as_ref();
492  let mut f = File::open(path)?;
493
494  // total length for bounds checks
495  let total_len = f.metadata()?.len();
496
497  // Verify CRC trailer first; ensures fully readable, too.
498  verify_crc_trailer_seek(&mut f, total_len)?;
499
500  // Parse from the start
501  f.seek(SeekFrom::Start(0))?;
502  load_program_from_reader(&mut f, total_len)
503}
504
505pub fn load_program_from_bytes(bytes: &[u8]) -> MResult<ParsedProgram> {
506  let total_len = bytes.len() as u64;
507
508  let mut cur = Cursor::new(bytes);
509  verify_crc_trailer_seek(&mut cur, total_len)?;
510
511  // Parse from the start
512  cur.seek(SeekFrom::Start(0))?;
513  load_program_from_reader(&mut cur, total_len)
514}
515
516fn load_program_from_reader<R: Read + Seek>(r: &mut R, total_len: u64) -> MResult<ParsedProgram> {
517  r.seek(SeekFrom::Start(0))?;
518  let mut header_buf = vec![0u8; ByteCodeHeader::HEADER_SIZE];
519  r.read_exact(&mut header_buf)?;
520
521  // 1) read header blob
522  let mut header_cursor = Cursor::new(&header_buf[..]);
523  let header = ByteCodeHeader::read_from(&mut header_cursor)?;
524
525  // quick magic check
526  if !header.validate_magic(b"MECH") {
527    return Err(
528      MechError2::new(InvalidMagicNumberError, None)
529        .with_compiler_loc()
530    );
531  }
532
533  // 2. read features
534  let mut features = Vec::new();
535  if header.feature_off != 0 && header.feature_off + 4 <= total_len.saturating_sub(4) {
536    r.seek(SeekFrom::Start(header.feature_off))?;
537    let c = r.read_u32::<LittleEndian>()? as usize;
538    for _ in 0..c {
539      let v = r.read_u64::<LittleEndian>()?;
540      features.push(v);
541    }
542  }
543
544  // 3. read types
545  let mut types = TypeSection::new();
546  if header.types_off != 0 && header.types_off + 4 <= total_len.saturating_sub(4) {
547    r.seek(SeekFrom::Start(header.types_off))?;
548    let types_count = r.read_u32::<LittleEndian>()? as usize;
549    for _ in 0..types_count {
550      let tag = r.read_u16::<LittleEndian>()?;
551      let _reserved = r.read_u16::<LittleEndian>()?; // reserved, always 0
552      let _version = r.read_u32::<LittleEndian>()?; // version, always 1
553      let bytes_len = r.read_u32::<LittleEndian>()? as usize;
554      let mut bytes = vec![0u8; bytes_len];
555      r.read_exact(&mut bytes)?;
556      if let Some(tag) = TypeTag::from_u16(tag) {
557        types.entries.push(TypeEntry { tag, bytes });
558      } else {
559        return Err(
560        MechError2::new(UnknownConstantTypeError { tag }, None)
561          .with_compiler_loc()
562        );
563      }
564    }
565  }
566
567  // 4. read const table
568  let mut const_entries = Vec::new();
569  if header.const_tbl_off != 0 && header.const_tbl_len > 0 {
570    r.seek(SeekFrom::Start(header.const_tbl_off))?;
571    let mut tbl_bytes = vec![0u8; header.const_tbl_len as usize];
572    r.read_exact(&mut tbl_bytes)?;
573    let cur = Cursor::new(&tbl_bytes[..]);
574    const_entries = parse_const_entries(cur, header.const_count as usize)?;
575  }
576
577  // read const blob
578  let mut const_blob = vec![];
579  if header.const_blob_off != 0 && header.const_blob_len > 0 {
580    r.seek(SeekFrom::Start(header.const_blob_off))?;
581    const_blob.resize(header.const_blob_len as usize, 0);
582    r.read_exact(&mut const_blob)?;
583  }
584
585  // 5. read symbols
586  let mut symbols = HashMap::new();
587  let mut mutable_symbols = HashSet::new();
588  if header.symbols_off != 0 && header.symbols_len > 0 {
589    r.seek(SeekFrom::Start(header.symbols_off))?;
590    let mut symbols_bytes = vec![0u8; header.symbols_len as usize];
591    r.read_exact(&mut symbols_bytes)?;
592    let mut cur = Cursor::new(&symbols_bytes[..]);
593    for _ in 0..(header.symbols_len / 12) {
594      let id = cur.read_u64::<LittleEndian>()?;
595      let mutable = cur.read_u8()? != 0;
596      let reg = cur.read_u32::<LittleEndian>()?;
597      symbols.insert(id, reg);
598      if mutable {
599        mutable_symbols.insert(id);
600      }
601    }
602  }
603
604  // 6. read instr bytes
605  let mut instr_bytes = vec![];
606  if header.instr_off != 0 && header.instr_len > 0 {
607    r.seek(SeekFrom::Start(header.instr_off))?;
608    instr_bytes.resize(header.instr_len as usize, 0);
609    r.read_exact(&mut instr_bytes)?;
610  }
611
612  // 7. read dictionary
613  let mut dictionary = HashMap::new();
614  if header.dict_off != 0 && header.dict_len > 0 {
615    r.seek(SeekFrom::Start(header.dict_off))?;
616    let mut dict_bytes = vec![0u8; header.dict_len as usize];
617    r.read_exact(&mut dict_bytes)?;
618    let mut cur = Cursor::new(&dict_bytes[..]);
619    while cur.position() < dict_bytes.len() as u64 {
620      let id = cur.read_u64::<LittleEndian>()?;
621      let name_len = cur.read_u32::<LittleEndian>()? as usize;
622      let mut name_bytes = vec![0u8; name_len];
623      cur.read_exact(&mut name_bytes)?;
624      let name = String::from_utf8(name_bytes).map_err(|_| 
625          MechError2::new(InvalidUtf8InDictError, None)
626            .with_compiler_loc()
627      )?;
628      dictionary.insert(id, name);
629    }
630  }
631
632  // decode instructions
633  let instrs = decode_instructions(Cursor::new(&instr_bytes[..]))?;
634  
635  Ok(ParsedProgram { header, features, types, const_entries, const_blob, instr_bytes, symbols, mutable_symbols, instrs, dictionary })
636}
637
638pub fn decode_version_from_u16(v: u16) -> (u16, u16, u16) {
639  let major = (v >> 13) & 0b111;
640  let minor = (v >> 8) & 0b1_1111;
641  let patch = v & 0xFF;
642  (major, minor, patch)
643}
644
645#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
646#[derive(Debug, Clone, Eq, PartialEq)]
647pub struct ParsedConstEntry {
648  pub type_id: u32,
649  pub enc: u8,
650  pub align: u8,
651  pub flags: u8,
652  pub reserved: u8,
653  pub offset: u64,
654  pub length: u64,
655}
656
657
658impl ParsedConstEntry {
659  pub fn write_to<W: Write>(&self, w: &mut W) -> MResult<()> {
660    // type_id (u32)
661    w.write_u32::<LittleEndian>(self.type_id)
662      .map_err(|e| MechError2::new(ConstEntryWriteIoError { source: format!("{}", e) }, None).with_compiler_loc())?;
663    // enc, align, flags, reserved (u8 each)
664    w.write_u8(self.enc)
665      .map_err(|e| MechError2::new(ConstEntryWriteIoError { source: format!("{}", e) }, None).with_compiler_loc())?;
666    w.write_u8(self.align)
667      .map_err(|e| MechError2::new(ConstEntryWriteIoError { source: format!("{}", e) }, None).with_compiler_loc())?;
668    w.write_u8(self.flags)
669      .map_err(|e| MechError2::new(ConstEntryWriteIoError { source: format!("{}", e) }, None).with_compiler_loc())?;
670    w.write_u8(self.reserved)
671      .map_err(|e| MechError2::new(ConstEntryWriteIoError { source: format!("{}", e) }, None).with_compiler_loc())?;
672    // offset (u64)
673    w.write_u64::<LittleEndian>(self.offset)
674      .map_err(|e| MechError2::new(ConstEntryWriteIoError { source: format!("{}", e) }, None).with_compiler_loc())?;
675    // length (u64)
676    w.write_u64::<LittleEndian>(self.length)
677      .map_err(|e| MechError2::new(ConstEntryWriteIoError { source: format!("{}", e) }, None).with_compiler_loc())?;
678    Ok(())
679  }
680}
681
682fn parse_const_entries(mut cur: Cursor<&[u8]>, count: usize) -> io::Result<Vec<ParsedConstEntry>> {
683  let mut out = Vec::with_capacity(count);
684  for _ in 0..count {
685    let type_id = cur.read_u32::<LittleEndian>()?;
686    let enc = cur.read_u8()?;
687    let align = cur.read_u8()?;
688    let flags = cur.read_u8()?;
689    let reserved = cur.read_u8()?;
690    let offset = cur.read_u64::<LittleEndian>()?;
691    let length = cur.read_u64::<LittleEndian>()?;
692    out.push(ParsedConstEntry { type_id, enc, align, flags, reserved, offset, length });
693  }
694  Ok(out)
695}
696
697pub fn verify_crc_trailer_seek<R: Read + Seek>(r: &mut R, total_len: u64) -> MResult<()> {
698  if total_len < 4 {
699    return Err(MechError2::new(
700      FileTooShortError { total_len, expected_len: 4 },
701      None
702    ).with_compiler_loc());
703  }
704
705  r.seek(SeekFrom::Start(total_len - 4))?;
706  let expected_crc = r.read_u32::<LittleEndian>()?;
707
708  r.seek(SeekFrom::Start(0))?;
709  let payload_len = (total_len - 4) as usize;
710  let mut buf = vec![0u8; payload_len];
711  r.read_exact(&mut buf)?;
712
713  let file_crc = crc32fast::hash(&buf);
714  if file_crc != expected_crc {
715    Err(MechError2::new(
716      CrcMismatchError { expected: expected_crc, found: file_crc },
717      None
718    ).with_compiler_loc())
719  } else {
720    Ok(())
721  }
722}
723
724#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
725#[derive(Debug, Clone, Eq, PartialEq)]
726pub enum DecodedInstr {
727  ConstLoad { dst: u32, const_id: u32 },
728  NullOp {fxn_id: u64, dst: u32 },
729  UnOp { fxn_id: u64, dst: u32, src: u32 },
730  BinOp { fxn_id: u64, dst: u32, lhs: u32, rhs: u32 },
731  TernOp { fxn_id: u64, dst: u32, a: u32, b: u32, c: u32 },
732  QuadOp { fxn_id: u64, dst: u32, a: u32, b: u32, c: u32, d: u32 },
733  VarArg { fxn_id: u64, dst: u32, args: Vec<u32> },
734  Ret { src: u32 },
735  Unknown { opcode: u8, rest: Vec<u8> }, // unknown opcode or dynamic form
736}
737
738impl DecodedInstr {
739 pub fn from_u8(num: u8) -> Option<DecodedInstr> {
740    match OpCode::from_u8(num) {
741      Some(OpCode::ConstLoad) => Some(DecodedInstr::ConstLoad { dst: 0, const_id: 0 }),
742      Some(OpCode::NullOp) => Some(DecodedInstr::NullOp { fxn_id: 0, dst: 0 }),
743      Some(OpCode::Unop) => Some(DecodedInstr::UnOp { fxn_id: 0, dst: 0, src: 0 }),
744      Some(OpCode::Binop) => Some(DecodedInstr::BinOp { fxn_id: 0, dst: 0, lhs: 0, rhs: 0 }),
745      Some(OpCode::Ternop) => Some(DecodedInstr::TernOp { fxn_id: 0, dst: 0, a: 0, b: 0, c: 0 }),
746      Some(OpCode::Quadop) => Some(DecodedInstr::QuadOp { fxn_id: 0, dst: 0, a: 0, b: 0, c: 0, d: 0 }),
747      Some(OpCode::VarArg) => Some(DecodedInstr::VarArg { fxn_id: 0, dst: 0, args: vec![] }),
748      Some(OpCode::Return) => Some(DecodedInstr::Ret { src: 0 }),
749      _ => None,
750    }
751  }
752}
753
754fn decode_instructions(mut cur: Cursor<&[u8]>) -> MResult<Vec<DecodedInstr>> {
755  let mut out = Vec::new();
756  while (cur.position() as usize) < cur.get_ref().len() {
757    // read opcode (u64)
758    let pos_before = cur.position();
759    // if remaining < 8, can't read opcode
760    let rem = cur.get_ref().len() - pos_before as usize;
761    if rem < 8 {
762      return Err(MechError2::new(
763        TruncatedInstructionError,
764        None
765      ).with_compiler_loc());
766    }
767    let opcode_byte = cur.read_u8()?;
768    match OpCode::from_u8(opcode_byte) {
769      Some(OpCode::ConstLoad) => {
770        // need 4+4 bytes
771        let dst = cur.read_u32::<LittleEndian>()?;
772        let const_id = cur.read_u32::<LittleEndian>()?;
773        out.push(DecodedInstr::ConstLoad { dst, const_id });
774      }
775      Some(OpCode::Return) => {
776        let src = cur.read_u32::<LittleEndian>()?;
777        out.push(DecodedInstr::Ret { src });
778      }
779      Some(OpCode::NullOp) => {
780        // need 8+4 bytes
781        let fxn_id = cur.read_u64::<LittleEndian>()?;
782        let dst = cur.read_u32::<LittleEndian>()?;
783        out.push(DecodedInstr::NullOp { fxn_id: fxn_id, dst });
784      }
785      Some(OpCode::Unop) => {
786        // need 8+4+4 bytes
787        let fxn_id = cur.read_u64::<LittleEndian>()?;
788        let dst = cur.read_u32::<LittleEndian>()?;
789        let src = cur.read_u32::<LittleEndian>()?;
790        out.push(DecodedInstr::UnOp { fxn_id: fxn_id, dst, src });
791      }
792      Some(OpCode::Binop) => {
793        // need 8+4+4+4 bytes
794        let fxn_id = cur.read_u64::<LittleEndian>()?;
795        let dst = cur.read_u32::<LittleEndian>()?;
796        let lhs = cur.read_u32::<LittleEndian>()?;
797        let rhs = cur.read_u32::<LittleEndian>()?;
798        out.push(DecodedInstr::BinOp { fxn_id: fxn_id, dst, lhs, rhs });
799      }
800      Some(OpCode::Ternop) => {
801        // need 8+4+4+4+4 bytes
802        let fxn_id = cur.read_u64::<LittleEndian>()?;
803        let dst = cur.read_u32::<LittleEndian>()?;
804        let a = cur.read_u32::<LittleEndian>()?;
805        let b = cur.read_u32::<LittleEndian>()?;
806        let c = cur.read_u32::<LittleEndian>()?;
807        out.push(DecodedInstr::TernOp { fxn_id: fxn_id, dst, a, b, c });
808      }
809      Some(OpCode::Quadop) => {
810        // need 8+4+4+4+4+4 bytes
811        let fxn_id = cur.read_u64::<LittleEndian>()?;
812        let dst = cur.read_u32::<LittleEndian>()?;
813        let a = cur.read_u32::<LittleEndian>()?;
814        let b = cur.read_u32::<LittleEndian>()?;
815        let c = cur.read_u32::<LittleEndian>()?;
816        let d = cur.read_u32::<LittleEndian>()?;
817        out.push(DecodedInstr::QuadOp { fxn_id: fxn_id, dst, a, b, c, d });
818      }
819      Some(OpCode::VarArg) => {
820        // need at least 8+4+4 bytes
821        let fxn_id = cur.read_u64::<LittleEndian>()?;
822        let dst = cur.read_u32::<LittleEndian>()?;
823        let arg_count = cur.read_u32::<LittleEndian>()? as usize;
824        let mut args = Vec::with_capacity(arg_count);
825        for _ in 0..arg_count {
826          let a = cur.read_u32::<LittleEndian>()?;
827          args.push(a);
828        }
829        out.push(DecodedInstr::VarArg { fxn_id: fxn_id, dst, args });
830      }
831      Some(unknown) => {
832        return Err(MechError2::new(
833          UnknownOpcodeError { opcode: unknown },
834          None
835        ).with_compiler_loc());
836      }
837      None => {
838        return Err(MechError2::new(
839          InvalidOpcodeError { opcode: opcode_byte },
840          None
841        ).with_compiler_loc());
842      }
843    }
844  }
845  Ok(out)
846}
847
848impl DecodedInstr {
849  pub fn write_to<W: Write>(&self, w: &mut W) -> MResult<()> {
850    match self {
851      DecodedInstr::ConstLoad { dst, const_id } => {
852        w.write_u8(OpCode::ConstLoad as u8)?;
853        w.write_u32::<LittleEndian>(*dst)?;
854        w.write_u32::<LittleEndian>(*const_id)?;
855      }
856      DecodedInstr::NullOp { fxn_id, dst } => {
857        w.write_u8(OpCode::NullOp as u8)?;
858        w.write_u64::<LittleEndian>(*fxn_id)?;
859        w.write_u32::<LittleEndian>(*dst)?;
860      }
861      DecodedInstr::UnOp { fxn_id, dst, src } => {
862        w.write_u8(OpCode::Unop as u8)?;
863        w.write_u64::<LittleEndian>(*fxn_id)?;
864        w.write_u32::<LittleEndian>(*dst)?;
865        w.write_u32::<LittleEndian>(*src)?;
866      }
867      DecodedInstr::BinOp { fxn_id, dst, lhs, rhs } => {
868        w.write_u8(OpCode::Binop as u8)?;
869        w.write_u64::<LittleEndian>(*fxn_id)?;
870        w.write_u32::<LittleEndian>(*dst)?;
871        w.write_u32::<LittleEndian>(*lhs)?;
872        w.write_u32::<LittleEndian>(*rhs)?;
873      }
874      DecodedInstr::TernOp { fxn_id, dst, a, b, c } => {
875        w.write_u8(OpCode::Ternop as u8)?;
876        w.write_u64::<LittleEndian>(*fxn_id)?;
877        w.write_u32::<LittleEndian>(*dst)?;
878        w.write_u32::<LittleEndian>(*a)?;
879        w.write_u32::<LittleEndian>(*b)?;
880        w.write_u32::<LittleEndian>(*c)?;
881      }
882      DecodedInstr::QuadOp { fxn_id, dst, a, b, c, d } => {
883        w.write_u8(OpCode::Quadop as u8)?;
884        w.write_u64::<LittleEndian>(*fxn_id)?;
885        w.write_u32::<LittleEndian>(*dst)?;
886        w.write_u32::<LittleEndian>(*a)?;
887        w.write_u32::<LittleEndian>(*b)?;
888        w.write_u32::<LittleEndian>(*c)?;
889        w.write_u32::<LittleEndian>(*d)?;
890      }
891      DecodedInstr::VarArg { fxn_id, dst, args } => {
892        w.write_u8(OpCode::VarArg as u8)?;
893        w.write_u64::<LittleEndian>(*fxn_id)?;
894        w.write_u32::<LittleEndian>(*dst)?;
895        w.write_u32::<LittleEndian>(args.len() as u32)?;
896        for a in args {
897          w.write_u32::<LittleEndian>(*a)?;
898        }
899      }
900      DecodedInstr::Ret { src } => {
901        w.write_u8(OpCode::Return as u8)?;
902        w.write_u32::<LittleEndian>(*src)?;
903      }
904      DecodedInstr::Unknown { opcode, rest } => {
905        w.write_u8(*opcode)?;
906        w.write_all(rest)?;
907      }
908    }
909    Ok(())
910  }
911}
912
913
914#[derive(Debug, Clone)]
915pub struct UnsupportedBytecodeVersionError;
916impl MechErrorKind2 for UnsupportedBytecodeVersionError {
917  fn name(&self) -> &str { "UnsupportedBytecodeVersion" }
918  fn message(&self) -> String { "Unsupported bytecode version".to_string() }
919}
920
921#[derive(Debug, Clone)]
922pub struct IncompatibleMechVersionError;
923impl MechErrorKind2 for IncompatibleMechVersionError {
924  fn name(&self) -> &str { "IncompatibleMechVersion" }
925  fn message(&self) -> String { "Incompatible Mech version".to_string() }
926}
927
928#[derive(Debug, Clone)]
929pub struct UnsupportedConstantEncodingError;
930impl MechErrorKind2 for UnsupportedConstantEncodingError {
931  fn name(&self) -> &str { "UnsupportedConstantEncoding" }
932  fn message(&self) -> String { "Unsupported constant encoding".to_string() }
933}
934
935#[derive(Debug, Clone)]
936pub struct ConstantEntryOutOfBoundsError;
937impl MechErrorKind2 for ConstantEntryOutOfBoundsError {
938  fn name(&self) -> &str { "ConstantEntryOutOfBounds" }
939  fn message(&self) -> String { "Constant entry out of bounds".to_string() }
940}
941
942#[derive(Debug, Clone)]
943pub struct ConstantEntryAlignmentError;
944impl MechErrorKind2 for ConstantEntryAlignmentError {
945  fn name(&self) -> &str { "ConstantEntryAlignmentError" }
946  fn message(&self) -> String { "Constant entry alignment error".to_string() }
947}
948
949#[derive(Debug, Clone)]
950pub struct ConstantWrongSizeError {
951  pub expected: usize,
952  pub found: usize,
953  pub type_name: &'static str,
954}
955impl MechErrorKind2 for ConstantWrongSizeError {
956  fn name(&self) -> &str { "ConstantWrongSize" }
957  fn message(&self) -> String {
958    format!(
959      "{} constant wrong size: expected {}, found {}",
960      self.type_name, self.expected, self.found
961    )
962  }
963}
964
965#[derive(Debug, Clone)]
966pub struct ConstantTooShortError {
967  pub type_name: &'static str,
968}
969impl MechErrorKind2 for ConstantTooShortError {
970  fn name(&self) -> &str { "ConstantTooShort" }
971  fn message(&self) -> String {
972    format!("{} constant too short", self.type_name)
973  }
974}
975
976#[derive(Debug, Clone)]
977pub struct UnsupportedConstantTypeError {
978  pub type_tag: TypeTag,
979}
980impl MechErrorKind2 for UnsupportedConstantTypeError {
981  fn name(&self) -> &str { "UnsupportedConstantType" }
982
983  fn message(&self) -> String {
984    format!("Unsupported constant type {:?}", self.type_tag)
985  }
986}
987
988#[derive(Debug, Clone)]
989pub struct CrcMismatchError {
990  pub expected: u32,
991  pub found: u32,
992}
993impl MechErrorKind2 for CrcMismatchError {
994  fn name(&self) -> &str { "CrcMismatch" }
995
996  fn message(&self) -> String {
997    format!("CRC mismatch: expected {}, found {}", self.expected, self.found)
998  }
999}
1000
1001#[derive(Debug, Clone)]
1002pub struct TruncatedInstructionError;
1003impl MechErrorKind2 for TruncatedInstructionError {
1004  fn name(&self) -> &str { "TruncatedInstruction" }
1005  fn message(&self) -> String { "Truncated instruction: cannot read full opcode or operands".to_string() }
1006}
1007
1008#[derive(Debug, Clone)]
1009pub struct UnknownOpcodeError {
1010  pub opcode: OpCode,
1011}
1012impl MechErrorKind2 for UnknownOpcodeError {
1013  fn name(&self) -> &str { "UnknownOpcode" }
1014  fn message(&self) -> String { format!("Unknown opcode: {}", self.opcode) }
1015}
1016
1017#[derive(Debug, Clone)]
1018pub struct FileTooShortError {
1019  pub total_len: u64,
1020  pub expected_len: u64,
1021}
1022impl MechErrorKind2 for FileTooShortError {
1023  fn name(&self) -> &str { "FileTooShort" }
1024  fn message(&self) -> String {
1025    format!(
1026      "File too short: expected at least {}, got {}",
1027      self.expected_len, self.total_len
1028    )
1029  }
1030}
1031
1032#[derive(Debug, Clone)]
1033pub struct InvalidOpcodeError {
1034  pub opcode: u8,
1035}
1036impl MechErrorKind2 for InvalidOpcodeError {
1037  fn name(&self) -> &str { "InvalidOpcode" }
1038  fn message(&self) -> String { format!("Invalid opcode byte: {}", self.opcode) }
1039}
1040
1041#[derive(Debug, Clone)]
1042pub struct UnknownConstantTypeError {
1043  pub tag: u16,
1044}
1045impl MechErrorKind2 for UnknownConstantTypeError {
1046  fn name(&self) -> &str { "UnknownConstantType" }
1047
1048  fn message(&self) -> String {
1049    format!("Unknown constant type: {}", self.tag)
1050  }
1051}
1052
1053#[derive(Debug, Clone)]
1054pub struct InvalidUtf8InDictError;
1055impl MechErrorKind2 for InvalidUtf8InDictError {
1056  fn name(&self) -> &str { "InvalidUtf8InDict" }
1057  fn message(&self) -> String { "Invalid UTF-8 in dictionary entry".to_string() }
1058}
1059
1060#[derive(Debug, Clone)]
1061pub struct ConstEntryWriteIoError {
1062  pub source: String,
1063}
1064impl MechErrorKind2 for ConstEntryWriteIoError {
1065  fn name(&self) -> &str { "ConstEntryWriteIoError" }
1066  fn message(&self) -> String { format!("Failed to write constant entry: {}", self.source) }
1067}