1use hashbrown::HashMap;
2use sema_core::{intern, resolve, SemaError, Span, Spur, Value, ValueView};
3
4use crate::chunk::{Chunk, ExceptionEntry, Function, UpvalueDesc};
5use crate::compiler::CompileResult;
6use crate::opcodes::Op;
7
8pub struct StringTableBuilder {
10 strings: Vec<String>,
11 index: HashMap<String, u32>,
12}
13
14impl Default for StringTableBuilder {
15 fn default() -> Self {
16 Self::new()
17 }
18}
19
20impl StringTableBuilder {
21 pub fn new() -> Self {
22 let mut b = StringTableBuilder {
23 strings: Vec::new(),
24 index: HashMap::new(),
25 };
26 b.intern_str(""); b
28 }
29
30 pub fn intern_str(&mut self, s: &str) -> u32 {
31 if let Some(&idx) = self.index.get(s) {
32 return idx;
33 }
34 let idx = self.strings.len() as u32;
35 self.strings.push(s.to_string());
36 self.index.insert(s.to_string(), idx);
37 idx
38 }
39
40 pub fn intern_spur(&mut self, spur: Spur) -> u32 {
41 let s = resolve(spur);
42 self.intern_str(&s)
43 }
44
45 pub fn finish(self) -> Vec<String> {
46 self.strings
47 }
48}
49
50pub fn build_remap_table(table: &[String]) -> Vec<Spur> {
54 table.iter().map(|s| intern(s)).collect()
55}
56
57const VAL_NIL: u8 = 0x00;
60const VAL_BOOL: u8 = 0x01;
61const VAL_INT: u8 = 0x02;
62const VAL_FLOAT: u8 = 0x03;
63const VAL_STRING: u8 = 0x04;
64const VAL_SYMBOL: u8 = 0x05;
65const VAL_KEYWORD: u8 = 0x06;
66const VAL_CHAR: u8 = 0x07;
67const VAL_LIST: u8 = 0x08;
68const VAL_VECTOR: u8 = 0x09;
69const VAL_MAP: u8 = 0x0A;
70const VAL_HASHMAP: u8 = 0x0B;
71const VAL_BYTEVECTOR: u8 = 0x0C;
72
73const MAX_VALUE_DEPTH: usize = 128;
74
75fn checked_u16(n: usize, what: &str) -> Result<u16, SemaError> {
78 u16::try_from(n).map_err(|_| SemaError::eval(format!("{what} exceeds u16::MAX ({n})")))
79}
80
81fn checked_u32(n: usize, what: &str) -> Result<u32, SemaError> {
82 u32::try_from(n).map_err(|_| SemaError::eval(format!("{what} exceeds u32::MAX ({n})")))
83}
84
85pub fn serialize_value(
88 val: &Value,
89 buf: &mut Vec<u8>,
90 stb: &mut StringTableBuilder,
91) -> Result<(), SemaError> {
92 match val.view() {
93 ValueView::Nil => buf.push(VAL_NIL),
94 ValueView::Bool(b) => {
95 buf.push(VAL_BOOL);
96 buf.push(if b { 1 } else { 0 });
97 }
98 ValueView::Int(n) => {
99 buf.push(VAL_INT);
100 buf.extend_from_slice(&n.to_le_bytes());
101 }
102 ValueView::Float(f) => {
103 buf.push(VAL_FLOAT);
104 buf.extend_from_slice(&f.to_le_bytes());
105 }
106 ValueView::String(s) => {
107 buf.push(VAL_STRING);
108 let idx = stb.intern_str(&s);
109 buf.extend_from_slice(&idx.to_le_bytes());
110 }
111 ValueView::Symbol(spur) => {
112 buf.push(VAL_SYMBOL);
113 let idx = stb.intern_spur(spur);
114 buf.extend_from_slice(&idx.to_le_bytes());
115 }
116 ValueView::Keyword(spur) => {
117 buf.push(VAL_KEYWORD);
118 let idx = stb.intern_spur(spur);
119 buf.extend_from_slice(&idx.to_le_bytes());
120 }
121 ValueView::Char(c) => {
122 buf.push(VAL_CHAR);
123 buf.extend_from_slice(&(c as u32).to_le_bytes());
124 }
125 ValueView::List(items) => {
126 let len = checked_u16(items.len(), "list length")?;
127 buf.push(VAL_LIST);
128 buf.extend_from_slice(&len.to_le_bytes());
129 for item in items.iter() {
130 serialize_value(item, buf, stb)?;
131 }
132 }
133 ValueView::Vector(items) => {
134 let len = checked_u16(items.len(), "vector length")?;
135 buf.push(VAL_VECTOR);
136 buf.extend_from_slice(&len.to_le_bytes());
137 for item in items.iter() {
138 serialize_value(item, buf, stb)?;
139 }
140 }
141 ValueView::Map(map) => {
142 let len = checked_u16(map.len(), "map length")?;
143 buf.push(VAL_MAP);
144 buf.extend_from_slice(&len.to_le_bytes());
145 for (k, v) in map.iter() {
146 serialize_value(k, buf, stb)?;
147 serialize_value(v, buf, stb)?;
148 }
149 }
150 ValueView::HashMap(map) => {
151 let len = checked_u16(map.len(), "hashmap length")?;
152 buf.push(VAL_HASHMAP);
153 buf.extend_from_slice(&len.to_le_bytes());
154 for (k, v) in map.iter() {
155 serialize_value(k, buf, stb)?;
156 serialize_value(v, buf, stb)?;
157 }
158 }
159 ValueView::Bytevector(bv) => {
160 let len = checked_u32(bv.len(), "bytevector length")?;
161 buf.push(VAL_BYTEVECTOR);
162 buf.extend_from_slice(&len.to_le_bytes());
163 buf.extend_from_slice(&bv);
164 }
165 _ => {
167 return Err(SemaError::eval(format!(
168 "cannot serialize {} to bytecode constant pool",
169 val.type_name()
170 )));
171 }
172 }
173 Ok(())
174}
175
176fn read_u8(buf: &[u8], cursor: &mut usize) -> Result<u8, SemaError> {
179 if *cursor >= buf.len() {
180 return Err(SemaError::eval("unexpected end of bytecode data"));
181 }
182 let v = buf[*cursor];
183 *cursor += 1;
184 Ok(v)
185}
186
187fn read_u16_le(buf: &[u8], cursor: &mut usize) -> Result<u16, SemaError> {
188 if *cursor + 2 > buf.len() {
189 return Err(SemaError::eval("unexpected end of bytecode data"));
190 }
191 let v = u16::from_le_bytes([buf[*cursor], buf[*cursor + 1]]);
192 *cursor += 2;
193 Ok(v)
194}
195
196fn read_u32_le(buf: &[u8], cursor: &mut usize) -> Result<u32, SemaError> {
197 if *cursor + 4 > buf.len() {
198 return Err(SemaError::eval("unexpected end of bytecode data"));
199 }
200 let v = u32::from_le_bytes([
201 buf[*cursor],
202 buf[*cursor + 1],
203 buf[*cursor + 2],
204 buf[*cursor + 3],
205 ]);
206 *cursor += 4;
207 Ok(v)
208}
209
210fn read_i64_le(buf: &[u8], cursor: &mut usize) -> Result<i64, SemaError> {
211 if *cursor + 8 > buf.len() {
212 return Err(SemaError::eval("unexpected end of bytecode data"));
213 }
214 let v = i64::from_le_bytes(buf[*cursor..*cursor + 8].try_into().unwrap());
215 *cursor += 8;
216 Ok(v)
217}
218
219fn read_f64_le(buf: &[u8], cursor: &mut usize) -> Result<f64, SemaError> {
220 if *cursor + 8 > buf.len() {
221 return Err(SemaError::eval("unexpected end of bytecode data"));
222 }
223 let v = f64::from_le_bytes(buf[*cursor..*cursor + 8].try_into().unwrap());
224 *cursor += 8;
225 Ok(v)
226}
227
228fn read_bytes(buf: &[u8], cursor: &mut usize, len: usize) -> Result<Vec<u8>, SemaError> {
229 if *cursor + len > buf.len() {
230 return Err(SemaError::eval("unexpected end of bytecode data"));
231 }
232 let v = buf[*cursor..*cursor + len].to_vec();
233 *cursor += len;
234 Ok(v)
235}
236
237pub fn deserialize_value(
238 buf: &[u8],
239 cursor: &mut usize,
240 table: &[String],
241 remap: &[Spur],
242) -> Result<Value, SemaError> {
243 deserialize_value_inner(buf, cursor, table, remap, 0)
244}
245
246fn deserialize_value_inner(
247 buf: &[u8],
248 cursor: &mut usize,
249 table: &[String],
250 remap: &[Spur],
251 depth: usize,
252) -> Result<Value, SemaError> {
253 if depth > MAX_VALUE_DEPTH {
254 return Err(SemaError::eval(format!(
255 "value nesting depth exceeds maximum ({MAX_VALUE_DEPTH})"
256 )));
257 }
258 let tag = read_u8(buf, cursor)?;
259 match tag {
260 VAL_NIL => Ok(Value::nil()),
261 VAL_BOOL => {
262 let b = read_u8(buf, cursor)?;
263 match b {
264 0 => Ok(Value::bool(false)),
265 1 => Ok(Value::bool(true)),
266 _ => Err(SemaError::eval(format!(
267 "invalid bool payload in bytecode: 0x{b:02x}"
268 ))),
269 }
270 }
271 VAL_INT => {
272 let n = read_i64_le(buf, cursor)?;
273 Ok(Value::int(n))
274 }
275 VAL_FLOAT => {
276 let f = read_f64_le(buf, cursor)?;
277 Ok(Value::float(f))
278 }
279 VAL_STRING => {
280 let idx = read_u32_le(buf, cursor)? as usize;
281 if idx >= table.len() {
282 return Err(SemaError::eval(format!(
283 "string table index {idx} out of range (table has {} entries)",
284 table.len()
285 )));
286 }
287 Ok(Value::string(&table[idx]))
288 }
289 VAL_SYMBOL => {
290 let idx = read_u32_le(buf, cursor)? as usize;
291 if idx >= remap.len() {
292 return Err(SemaError::eval(format!(
293 "string table index {idx} out of range for symbol remap"
294 )));
295 }
296 Ok(Value::symbol_from_spur(remap[idx]))
297 }
298 VAL_KEYWORD => {
299 let idx = read_u32_le(buf, cursor)? as usize;
300 if idx >= remap.len() {
301 return Err(SemaError::eval(format!(
302 "string table index {idx} out of range for keyword remap"
303 )));
304 }
305 Ok(Value::keyword_from_spur(remap[idx]))
306 }
307 VAL_CHAR => {
308 let cp = read_u32_le(buf, cursor)?;
309 let c = char::from_u32(cp)
310 .ok_or_else(|| SemaError::eval(format!("invalid unicode code point: {cp}")))?;
311 Ok(Value::char(c))
312 }
313 VAL_LIST => {
314 let count = read_u16_le(buf, cursor)? as usize;
315 let mut items = Vec::with_capacity(count);
316 for _ in 0..count {
317 items.push(deserialize_value_inner(
318 buf,
319 cursor,
320 table,
321 remap,
322 depth + 1,
323 )?);
324 }
325 Ok(Value::list(items))
326 }
327 VAL_VECTOR => {
328 let count = read_u16_le(buf, cursor)? as usize;
329 let mut items = Vec::with_capacity(count);
330 for _ in 0..count {
331 items.push(deserialize_value_inner(
332 buf,
333 cursor,
334 table,
335 remap,
336 depth + 1,
337 )?);
338 }
339 Ok(Value::vector(items))
340 }
341 VAL_MAP => {
342 let n_pairs = read_u16_le(buf, cursor)? as usize;
343 let mut map = std::collections::BTreeMap::new();
344 for _ in 0..n_pairs {
345 let k = deserialize_value_inner(buf, cursor, table, remap, depth + 1)?;
346 let v = deserialize_value_inner(buf, cursor, table, remap, depth + 1)?;
347 map.insert(k, v);
348 }
349 Ok(Value::map(map))
350 }
351 VAL_HASHMAP => {
352 let n_pairs = read_u16_le(buf, cursor)? as usize;
353 let mut entries = Vec::with_capacity(n_pairs);
354 for _ in 0..n_pairs {
355 let k = deserialize_value_inner(buf, cursor, table, remap, depth + 1)?;
356 let v = deserialize_value_inner(buf, cursor, table, remap, depth + 1)?;
357 entries.push((k, v));
358 }
359 Ok(Value::hashmap(entries))
360 }
361 VAL_BYTEVECTOR => {
362 let len = read_u32_le(buf, cursor)? as usize;
363 let data = read_bytes(buf, cursor, len)?;
364 Ok(Value::bytevector(data))
365 }
366 _ => Err(SemaError::eval(format!(
367 "unknown value tag in bytecode: 0x{tag:02x}"
368 ))),
369 }
370}
371
372pub fn serialize_chunk(
375 chunk: &Chunk,
376 buf: &mut Vec<u8>,
377 stb: &mut StringTableBuilder,
378) -> Result<(), SemaError> {
379 let remapped_code = remap_spurs_to_indices(&chunk.code, stb)?;
381 let code_len = checked_u32(remapped_code.len(), "bytecode length")?;
382 buf.extend_from_slice(&code_len.to_le_bytes());
383 buf.extend_from_slice(&remapped_code);
384
385 let n_consts = checked_u16(chunk.consts.len(), "constant pool size")?;
387 buf.extend_from_slice(&n_consts.to_le_bytes());
388 for val in &chunk.consts {
389 serialize_value(val, buf, stb)?;
390 }
391
392 let n_spans = checked_u32(chunk.spans.len(), "span count")?;
394 buf.extend_from_slice(&n_spans.to_le_bytes());
395 for &(pc, ref span) in &chunk.spans {
396 buf.extend_from_slice(&pc.to_le_bytes());
397 let line = checked_u32(span.line, "span line")?;
398 let col = checked_u32(span.col, "span col")?;
399 let end_line = checked_u32(span.end_line, "span end_line")?;
400 let end_col = checked_u32(span.end_col, "span end_col")?;
401 buf.extend_from_slice(&line.to_le_bytes());
402 buf.extend_from_slice(&col.to_le_bytes());
403 buf.extend_from_slice(&end_line.to_le_bytes());
404 buf.extend_from_slice(&end_col.to_le_bytes());
405 }
406
407 buf.extend_from_slice(&chunk.max_stack.to_le_bytes());
409 buf.extend_from_slice(&chunk.n_locals.to_le_bytes());
410
411 let n_exceptions = checked_u16(chunk.exception_table.len(), "exception table size")?;
413 buf.extend_from_slice(&n_exceptions.to_le_bytes());
414 for entry in &chunk.exception_table {
415 buf.extend_from_slice(&entry.try_start.to_le_bytes());
416 buf.extend_from_slice(&entry.try_end.to_le_bytes());
417 buf.extend_from_slice(&entry.handler_pc.to_le_bytes());
418 buf.extend_from_slice(&entry.stack_depth.to_le_bytes());
419 buf.extend_from_slice(&entry.catch_slot.to_le_bytes());
420 }
421
422 Ok(())
423}
424
425pub fn deserialize_chunk(
426 buf: &[u8],
427 cursor: &mut usize,
428 table: &[String],
429 remap: &[Spur],
430) -> Result<Chunk, SemaError> {
431 let code_len = read_u32_le(buf, cursor)? as usize;
433 let remaining = buf.len().saturating_sub(*cursor);
434 if code_len > remaining {
435 return Err(SemaError::eval(format!(
436 "bytecode code_len ({code_len}) exceeds remaining data ({remaining})"
437 )));
438 }
439 let mut code = read_bytes(buf, cursor, code_len)?;
440 remap_indices_to_spurs(&mut code, remap)?;
441
442 let n_consts = read_u16_le(buf, cursor)? as usize;
444 let mut consts = Vec::with_capacity(n_consts);
445 for _ in 0..n_consts {
446 consts.push(deserialize_value(buf, cursor, table, remap)?);
447 }
448
449 let n_spans = read_u32_le(buf, cursor)? as usize;
451 let span_remaining = buf.len().saturating_sub(*cursor);
452 if n_spans
453 .checked_mul(20)
454 .is_none_or(|need| need > span_remaining)
455 {
456 return Err(SemaError::eval(format!(
457 "span count ({n_spans}) exceeds remaining data ({span_remaining} bytes)"
458 )));
459 }
460 let mut spans = Vec::with_capacity(n_spans);
461 for _ in 0..n_spans {
462 let pc = read_u32_le(buf, cursor)?;
463 let line = read_u32_le(buf, cursor)? as usize;
464 let col = read_u32_le(buf, cursor)? as usize;
465 let end_line = read_u32_le(buf, cursor)? as usize;
466 let end_col = read_u32_le(buf, cursor)? as usize;
467 spans.push((pc, Span::new(line, col, end_line, end_col)));
468 }
469
470 let max_stack = read_u16_le(buf, cursor)?;
472 let n_locals = read_u16_le(buf, cursor)?;
473
474 let n_exceptions = read_u16_le(buf, cursor)? as usize;
476 let mut exception_table = Vec::with_capacity(n_exceptions);
477 for _ in 0..n_exceptions {
478 let try_start = read_u32_le(buf, cursor)?;
479 let try_end = read_u32_le(buf, cursor)?;
480 let handler_pc = read_u32_le(buf, cursor)?;
481 let stack_depth = read_u16_le(buf, cursor)?;
482 let catch_slot = read_u16_le(buf, cursor)?;
483 exception_table.push(ExceptionEntry {
484 try_start,
485 try_end,
486 handler_pc,
487 stack_depth,
488 catch_slot,
489 });
490 }
491
492 Ok(Chunk {
493 code,
494 consts,
495 spans,
496 max_stack,
497 n_locals,
498 exception_table,
499 })
500}
501
502const ANONYMOUS_NAME: u32 = 0xFFFF_FFFF;
505
506pub fn serialize_function(
507 func: &Function,
508 buf: &mut Vec<u8>,
509 stb: &mut StringTableBuilder,
510) -> Result<(), SemaError> {
511 match func.name {
513 Some(spur) => {
514 let idx = stb.intern_spur(spur);
515 buf.extend_from_slice(&idx.to_le_bytes());
516 }
517 None => buf.extend_from_slice(&ANONYMOUS_NAME.to_le_bytes()),
518 }
519
520 buf.extend_from_slice(&func.arity.to_le_bytes());
522
523 buf.push(if func.has_rest { 1 } else { 0 });
525
526 let n_upvalues = checked_u16(func.upvalue_descs.len(), "upvalue descriptor count")?;
528 buf.extend_from_slice(&n_upvalues.to_le_bytes());
529 for desc in &func.upvalue_descs {
530 match desc {
531 UpvalueDesc::ParentLocal(idx) => {
532 buf.push(0);
533 buf.extend_from_slice(&idx.to_le_bytes());
534 }
535 UpvalueDesc::ParentUpvalue(idx) => {
536 buf.push(1);
537 buf.extend_from_slice(&idx.to_le_bytes());
538 }
539 }
540 }
541
542 serialize_chunk(&func.chunk, buf, stb)?;
544
545 let n_local_names = checked_u16(func.local_names.len(), "local name count")?;
547 buf.extend_from_slice(&n_local_names.to_le_bytes());
548 for &(slot, spur) in &func.local_names {
549 buf.extend_from_slice(&slot.to_le_bytes());
550 let idx = stb.intern_spur(spur);
551 buf.extend_from_slice(&idx.to_le_bytes());
552 }
553
554 Ok(())
555}
556
557pub fn deserialize_function(
558 buf: &[u8],
559 cursor: &mut usize,
560 table: &[String],
561 remap: &[Spur],
562) -> Result<Function, SemaError> {
563 let name_idx = read_u32_le(buf, cursor)?;
565 let name = if name_idx == ANONYMOUS_NAME {
566 None
567 } else {
568 let idx = name_idx as usize;
569 if idx >= remap.len() {
570 return Err(SemaError::eval(format!(
571 "function name string table index {idx} out of range"
572 )));
573 }
574 Some(remap[idx])
575 };
576
577 let arity = read_u16_le(buf, cursor)?;
579
580 let has_rest_byte = read_u8(buf, cursor)?;
582 let has_rest = match has_rest_byte {
583 0 => false,
584 1 => true,
585 _ => {
586 return Err(SemaError::eval(format!(
587 "invalid has_rest byte: 0x{has_rest_byte:02x}"
588 )));
589 }
590 };
591
592 let n_upvalues = read_u16_le(buf, cursor)? as usize;
594 let mut upvalue_descs = Vec::with_capacity(n_upvalues);
595 for _ in 0..n_upvalues {
596 let kind = read_u8(buf, cursor)?;
597 let index = read_u16_le(buf, cursor)?;
598 match kind {
599 0 => upvalue_descs.push(UpvalueDesc::ParentLocal(index)),
600 1 => upvalue_descs.push(UpvalueDesc::ParentUpvalue(index)),
601 _ => {
602 return Err(SemaError::eval(format!(
603 "invalid upvalue kind: 0x{kind:02x}"
604 )));
605 }
606 }
607 }
608
609 let chunk = deserialize_chunk(buf, cursor, table, remap)?;
611
612 let n_local_names = read_u16_le(buf, cursor)? as usize;
614 let mut local_names = Vec::with_capacity(n_local_names);
615 for _ in 0..n_local_names {
616 let slot = read_u16_le(buf, cursor)?;
617 let name_idx = read_u32_le(buf, cursor)? as usize;
618 if name_idx >= remap.len() {
619 return Err(SemaError::eval(format!(
620 "local name string table index {name_idx} out of range"
621 )));
622 }
623 local_names.push((slot, remap[name_idx]));
624 }
625
626 Ok(Function {
627 name,
628 chunk,
629 upvalue_descs,
630 arity,
631 has_rest,
632 local_names,
633 })
634}
635
636fn spur_to_u32(spur: Spur) -> u32 {
639 spur.into_inner().get()
640}
641
642fn u32_to_spur(bits: u32) -> Spur {
643 use lasso::Key;
644 let idx = bits
645 .checked_sub(1)
646 .expect("invalid Spur bits: 0 is not valid");
647 Spur::try_from_usize(idx as usize).expect("invalid Spur bits")
648}
649
650fn advance_pc(code: &[u8], pc: usize) -> Result<(Op, usize), SemaError> {
652 let Some(op) = Op::from_u8(code[pc]) else {
653 return Err(SemaError::eval(format!(
654 "invalid opcode 0x{:02x} at pc {pc}",
655 code[pc]
656 )));
657 };
658 let next = match op {
659 Op::LoadGlobal | Op::StoreGlobal | Op::DefineGlobal => pc + 5, Op::CallGlobal => pc + 7, Op::Jump | Op::JumpIfFalse | Op::JumpIfTrue => pc + 5, Op::CallNative => pc + 5, Op::MakeClosure => {
664 if pc + 5 > code.len() {
665 return Err(SemaError::eval(format!(
666 "truncated MakeClosure operands at pc {pc}"
667 )));
668 }
669 let n_upvalues = u16::from_le_bytes([code[pc + 3], code[pc + 4]]) as usize;
670 pc + 5 + n_upvalues * 4
671 }
672 Op::Const
673 | Op::LoadLocal
674 | Op::StoreLocal
675 | Op::LoadUpvalue
676 | Op::StoreUpvalue
677 | Op::Call
678 | Op::TailCall
679 | Op::MakeList
680 | Op::MakeVector
681 | Op::MakeMap
682 | Op::MakeHashMap => pc + 3, _ => pc + 1, };
685 if next > code.len() {
686 return Err(SemaError::eval(format!(
687 "truncated operand for {:?} at pc {pc} (need {} bytes, have {})",
688 op,
689 next - pc,
690 code.len() - pc
691 )));
692 }
693 Ok((op, next))
694}
695
696pub fn remap_spurs_to_indices(
699 code: &[u8],
700 stb: &mut StringTableBuilder,
701) -> Result<Vec<u8>, SemaError> {
702 let mut out = code.to_vec();
703 let mut pc = 0;
704 while pc < out.len() {
705 let (op, next) = advance_pc(&out, pc)?;
706 if matches!(
707 op,
708 Op::LoadGlobal | Op::StoreGlobal | Op::DefineGlobal | Op::CallGlobal
709 ) {
710 let spur_bits =
711 u32::from_le_bytes([out[pc + 1], out[pc + 2], out[pc + 3], out[pc + 4]]);
712 let spur = u32_to_spur(spur_bits);
713 let s = resolve(spur);
714 let idx = stb.intern_str(&s);
715 let bytes = idx.to_le_bytes();
716 out[pc + 1] = bytes[0];
717 out[pc + 2] = bytes[1];
718 out[pc + 3] = bytes[2];
719 out[pc + 4] = bytes[3];
720 }
721 pc = next;
722 }
723 Ok(out)
724}
725
726pub fn remap_indices_to_spurs(code: &mut [u8], remap: &[Spur]) -> Result<(), SemaError> {
728 let mut pc = 0;
729 while pc < code.len() {
730 let (op, next) = advance_pc(code, pc)?;
731 if matches!(
732 op,
733 Op::LoadGlobal | Op::StoreGlobal | Op::DefineGlobal | Op::CallGlobal
734 ) {
735 let idx = u32::from_le_bytes([code[pc + 1], code[pc + 2], code[pc + 3], code[pc + 4]])
736 as usize;
737 if idx >= remap.len() {
738 return Err(SemaError::eval(format!(
739 "global spur remap index {idx} out of range at pc {pc}"
740 )));
741 }
742 let spur_bits = spur_to_u32(remap[idx]);
743 let bytes = spur_bits.to_le_bytes();
744 code[pc + 1] = bytes[0];
745 code[pc + 2] = bytes[1];
746 code[pc + 3] = bytes[2];
747 code[pc + 4] = bytes[3];
748 }
749 pc = next;
750 }
751 Ok(())
752}
753
754const MAGIC: [u8; 4] = [0x00, b'S', b'E', b'M'];
757const FORMAT_VERSION: u16 = 1;
758const SECTION_STRING_TABLE: u16 = 0x01;
759const SECTION_FUNCTION_TABLE: u16 = 0x02;
760const SECTION_MAIN_CHUNK: u16 = 0x03;
761
762pub fn serialize_to_bytes(result: &CompileResult, source_hash: u32) -> Result<Vec<u8>, SemaError> {
766 let mut stb = StringTableBuilder::new();
767
768 let mut func_payload = Vec::new();
774 let n_funcs = checked_u32(result.functions.len(), "function count")?;
775 func_payload.extend_from_slice(&n_funcs.to_le_bytes());
776 for func in &result.functions {
777 serialize_function(func, &mut func_payload, &mut stb)?;
778 }
779
780 let mut chunk_payload = Vec::new();
782 serialize_chunk(&result.chunk, &mut chunk_payload, &mut stb)?;
783
784 let string_table = stb.finish();
786 let mut strtab_payload = Vec::new();
787 let n_strings = checked_u32(string_table.len(), "string table size")?;
788 strtab_payload.extend_from_slice(&n_strings.to_le_bytes());
789 for s in &string_table {
790 let bytes = s.as_bytes();
791 let len = checked_u32(bytes.len(), "string length")?;
792 strtab_payload.extend_from_slice(&len.to_le_bytes());
793 strtab_payload.extend_from_slice(bytes);
794 }
795
796 let n_sections: u16 = 3; let mut out = Vec::new();
799
800 out.extend_from_slice(&MAGIC);
802 out.extend_from_slice(&FORMAT_VERSION.to_le_bytes());
803 out.extend_from_slice(&0u16.to_le_bytes()); let (major, minor, patch) = parse_sema_version();
806 out.extend_from_slice(&major.to_le_bytes());
807 out.extend_from_slice(&minor.to_le_bytes());
808 out.extend_from_slice(&patch.to_le_bytes());
809 out.extend_from_slice(&n_sections.to_le_bytes());
810 out.extend_from_slice(&source_hash.to_le_bytes());
811 out.extend_from_slice(&0u32.to_le_bytes()); write_section(&mut out, SECTION_STRING_TABLE, &strtab_payload)?;
815 write_section(&mut out, SECTION_FUNCTION_TABLE, &func_payload)?;
817 write_section(&mut out, SECTION_MAIN_CHUNK, &chunk_payload)?;
819
820 Ok(out)
821}
822
823fn write_section(out: &mut Vec<u8>, section_type: u16, payload: &[u8]) -> Result<(), SemaError> {
824 let len = checked_u32(payload.len(), "section payload length")?;
825 out.extend_from_slice(§ion_type.to_le_bytes());
826 out.extend_from_slice(&len.to_le_bytes());
827 out.extend_from_slice(payload);
828 Ok(())
829}
830
831fn parse_sema_version() -> (u16, u16, u16) {
832 let version = env!("CARGO_PKG_VERSION");
833 let parts: Vec<&str> = version.split('.').collect();
834 let major = parts.first().and_then(|s| s.parse().ok()).unwrap_or(0);
835 let minor = parts.get(1).and_then(|s| s.parse().ok()).unwrap_or(0);
836 let patch = parts.get(2).and_then(|s| s.parse().ok()).unwrap_or(0);
837 (major, minor, patch)
838}
839
840fn validate_bytecode(result: &CompileResult) -> Result<(), SemaError> {
842 validate_chunk_bytecode(&result.chunk, result.functions.len(), 0, "main chunk")?;
843 for (i, func) in result.functions.iter().enumerate() {
844 let label = format!("function {i}");
845 let n_upvalues = func.upvalue_descs.len();
846 validate_chunk_bytecode(&func.chunk, result.functions.len(), n_upvalues, &label)?;
847 }
848 Ok(())
849}
850
851fn validate_chunk_bytecode(
852 chunk: &Chunk,
853 n_functions: usize,
854 n_upvalues: usize,
855 label: &str,
856) -> Result<(), SemaError> {
857 let code = &chunk.code;
858 let n_locals = chunk.n_locals as usize;
859 let mut pc = 0;
860 while pc < code.len() {
861 let (op, next) = advance_pc(code, pc)?;
862 match op {
863 Op::Const => {
864 let idx = u16::from_le_bytes([code[pc + 1], code[pc + 2]]) as usize;
865 if idx >= chunk.consts.len() {
866 return Err(SemaError::eval(format!(
867 "in {label}: Const index {idx} out of range (pool has {} entries) at pc {pc}",
868 chunk.consts.len()
869 )));
870 }
871 }
872 Op::MakeClosure => {
873 let func_id = u16::from_le_bytes([code[pc + 1], code[pc + 2]]) as usize;
874 if func_id >= n_functions {
875 return Err(SemaError::eval(format!(
876 "in {label}: MakeClosure func_id {func_id} out of range ({n_functions} functions) at pc {pc}",
877 )));
878 }
879 }
880 Op::LoadLocal | Op::StoreLocal => {
881 let slot = u16::from_le_bytes([code[pc + 1], code[pc + 2]]) as usize;
882 if slot >= n_locals {
883 return Err(SemaError::eval(format!(
884 "in {label}: local slot {slot} out of range (n_locals={n_locals}) at pc {pc}",
885 )));
886 }
887 }
888 Op::LoadUpvalue | Op::StoreUpvalue => {
889 let slot = u16::from_le_bytes([code[pc + 1], code[pc + 2]]) as usize;
890 if slot >= n_upvalues {
891 return Err(SemaError::eval(format!(
892 "in {label}: upvalue slot {slot} out of range (n_upvalues={n_upvalues}) at pc {pc}",
893 )));
894 }
895 }
896 _ => {}
897 }
898 pc = next;
899 }
900 Ok(())
901}
902
903pub fn deserialize_from_bytes(bytes: &[u8]) -> Result<CompileResult, SemaError> {
905 if bytes.len() < 24 {
906 return Err(SemaError::eval(
907 "bytecode file too short (< 24 bytes header)",
908 ));
909 }
910
911 if bytes[0..4] != MAGIC {
913 return Err(SemaError::eval(
914 "invalid bytecode magic number (expected \\x00SEM)",
915 ));
916 }
917 let format_version = u16::from_le_bytes([bytes[4], bytes[5]]);
918 if format_version != FORMAT_VERSION {
919 return Err(SemaError::eval(format!(
920 "unsupported bytecode format version {format_version} (expected {FORMAT_VERSION}). Recompile from source."
921 )));
922 }
923 let reserved = u32::from_le_bytes([bytes[20], bytes[21], bytes[22], bytes[23]]);
924 if reserved != 0 {
925 return Err(SemaError::eval(format!(
926 "non-zero reserved header field (0x{reserved:08x}); file may be from a newer Sema version"
927 )));
928 }
929 let n_sections = u16::from_le_bytes([bytes[14], bytes[15]]) as usize;
930
931 let mut cursor = 24;
933 let mut string_table: Option<Vec<String>> = None;
934 let mut func_table_data: Option<(usize, usize)> = None; let mut main_chunk_data: Option<(usize, usize)> = None;
936
937 for _ in 0..n_sections {
938 if cursor + 6 > bytes.len() {
939 return Err(SemaError::eval(
940 "unexpected end of bytecode file in section header",
941 ));
942 }
943 let section_type = u16::from_le_bytes([bytes[cursor], bytes[cursor + 1]]);
944 let section_len = u32::from_le_bytes([
945 bytes[cursor + 2],
946 bytes[cursor + 3],
947 bytes[cursor + 4],
948 bytes[cursor + 5],
949 ]) as usize;
950 cursor += 6;
951
952 if cursor + section_len > bytes.len() {
953 return Err(SemaError::eval(format!(
954 "section 0x{section_type:04x} claims {section_len} bytes but only {} remain",
955 bytes.len() - cursor
956 )));
957 }
958
959 match section_type {
960 0x01 => {
961 let section_data = &bytes[cursor..cursor + section_len];
963 let mut sc = 0usize;
964 let count = read_u32_le(section_data, &mut sc)? as usize;
965 let remaining_after_count = section_len.saturating_sub(sc);
968 if count > remaining_after_count / 4 {
969 return Err(SemaError::eval(format!(
970 "string table count ({count}) exceeds section capacity"
971 )));
972 }
973 let mut table = Vec::with_capacity(count);
974 for _ in 0..count {
975 let len = read_u32_le(section_data, &mut sc)? as usize;
976 if sc + len > section_len {
977 return Err(SemaError::eval("string table entry extends past section"));
978 }
979 let s = std::str::from_utf8(§ion_data[sc..sc + len]).map_err(|e| {
980 SemaError::eval(format!("invalid UTF-8 in string table: {e}"))
981 })?;
982 table.push(s.to_string());
983 sc += len;
984 }
985 string_table = Some(table);
986 }
987 0x02 => {
988 func_table_data = Some((cursor, section_len));
989 }
990 0x03 => {
991 main_chunk_data = Some((cursor, section_len));
992 }
993 _ => {
994 }
996 }
997 cursor += section_len;
998 }
999
1000 let table = string_table
1002 .ok_or_else(|| SemaError::eval("bytecode file missing string table section"))?;
1003 if table.is_empty() || !table[0].is_empty() {
1004 return Err(SemaError::eval(
1005 "string table index 0 must be the empty string",
1006 ));
1007 }
1008 let (func_start, func_len) = func_table_data
1009 .ok_or_else(|| SemaError::eval("bytecode file missing function table section"))?;
1010 let (chunk_start, chunk_len) = main_chunk_data
1011 .ok_or_else(|| SemaError::eval("bytecode file missing main chunk section"))?;
1012
1013 let remap = build_remap_table(&table);
1014
1015 let func_section = &bytes[func_start..func_start + func_len];
1017 let mut fc = 0;
1018 let n_funcs = read_u32_le(func_section, &mut fc)? as usize;
1019 if n_funcs > func_len / 4 {
1021 return Err(SemaError::eval(format!(
1022 "function count ({n_funcs}) exceeds section capacity"
1023 )));
1024 }
1025 let mut functions = Vec::with_capacity(n_funcs);
1026 for _ in 0..n_funcs {
1027 functions.push(deserialize_function(func_section, &mut fc, &table, &remap)?);
1028 }
1029 if fc != func_len {
1030 return Err(SemaError::eval(format!(
1031 "function table section has {} unconsumed trailing bytes",
1032 func_len - fc
1033 )));
1034 }
1035
1036 let chunk_section = &bytes[chunk_start..chunk_start + chunk_len];
1038 let mut cc = 0;
1039 let chunk = deserialize_chunk(chunk_section, &mut cc, &table, &remap)?;
1040 if cc != chunk_len {
1041 return Err(SemaError::eval(format!(
1042 "main chunk section has {} unconsumed trailing bytes",
1043 chunk_len - cc
1044 )));
1045 }
1046
1047 let result = CompileResult { chunk, functions };
1048 validate_bytecode(&result)?;
1049 Ok(result)
1050}
1051
1052pub fn is_bytecode_file(bytes: &[u8]) -> bool {
1054 bytes.len() >= 4 && bytes[0..4] == MAGIC
1055}
1056
1057#[cfg(test)]
1058mod tests {
1059 use super::*;
1060 use sema_core::intern;
1061
1062 #[test]
1063 fn test_string_table_builder() {
1064 let mut builder = StringTableBuilder::new();
1065 assert_eq!(builder.intern_str(""), 0);
1067 let idx_hello = builder.intern_str("hello");
1068 let idx_world = builder.intern_str("world");
1069 let idx_hello2 = builder.intern_str("hello");
1070 assert_eq!(idx_hello, idx_hello2); assert_ne!(idx_hello, idx_world);
1072
1073 let table = builder.finish();
1074 assert_eq!(table.len(), 3); assert_eq!(table[0], "");
1076 assert_eq!(table[idx_hello as usize], "hello");
1077 assert_eq!(table[idx_world as usize], "world");
1078 }
1079
1080 #[test]
1081 fn test_string_table_spur_interning() {
1082 let mut builder = StringTableBuilder::new();
1083 let spur = intern("my-var");
1084 let idx = builder.intern_spur(spur);
1085 assert!(idx > 0);
1086 let idx2 = builder.intern_spur(spur);
1087 assert_eq!(idx, idx2);
1088 }
1089
1090 #[test]
1091 fn test_chunk_roundtrip() {
1092 use crate::emit::Emitter;
1093 use crate::opcodes::Op;
1094
1095 let mut e = Emitter::new();
1096 e.emit_const(Value::int(42));
1097 e.emit_const(Value::string("hello"));
1098 e.emit_op(Op::Add);
1099 e.emit_op(Op::Return);
1100 let mut chunk = e.into_chunk();
1101 chunk.n_locals = 2;
1102 chunk.max_stack = 4;
1103
1104 let mut buf = Vec::new();
1105 let mut stb = StringTableBuilder::new();
1106 serialize_chunk(&chunk, &mut buf, &mut stb).unwrap();
1107
1108 let table = stb.finish();
1109 let remap = build_remap_table(&table);
1110 let mut cursor = 0;
1111 let chunk2 = deserialize_chunk(&buf, &mut cursor, &table, &remap).unwrap();
1112
1113 assert_eq!(chunk2.code, chunk.code);
1114 assert_eq!(chunk2.consts.len(), chunk.consts.len());
1115 assert_eq!(chunk2.n_locals, 2);
1116 assert_eq!(chunk2.max_stack, 4);
1117 }
1118
1119 #[test]
1122 fn test_serialize_float_nan() {
1123 let mut buf = Vec::new();
1124 let mut stb = StringTableBuilder::new();
1125 serialize_value(&Value::float(f64::NAN), &mut buf, &mut stb).unwrap();
1126
1127 let table = stb.finish();
1128 let remap = build_remap_table(&table);
1129 let mut cursor = 0;
1130 let v = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1131 assert!(v.as_float().unwrap().is_nan());
1132 }
1133
1134 #[test]
1135 fn test_serialize_float_neg_zero() {
1136 let mut buf = Vec::new();
1137 let mut stb = StringTableBuilder::new();
1138 let neg_zero = Value::float(-0.0);
1139 serialize_value(&neg_zero, &mut buf, &mut stb).unwrap();
1140
1141 let table = stb.finish();
1142 let remap = build_remap_table(&table);
1143 let mut cursor = 0;
1144 let v = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1145 let f = v.as_float().unwrap();
1146 assert!(f.is_sign_negative());
1147 assert_eq!(f.to_bits(), (-0.0f64).to_bits());
1148 }
1149
1150 #[test]
1151 fn test_serialize_float_infinities() {
1152 let mut buf = Vec::new();
1153 let mut stb = StringTableBuilder::new();
1154 serialize_value(&Value::float(f64::INFINITY), &mut buf, &mut stb).unwrap();
1155 serialize_value(&Value::float(f64::NEG_INFINITY), &mut buf, &mut stb).unwrap();
1156
1157 let table = stb.finish();
1158 let remap = build_remap_table(&table);
1159 let mut cursor = 0;
1160 let v1 = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1161 assert_eq!(v1.as_float(), Some(f64::INFINITY));
1162 let v2 = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1163 assert_eq!(v2.as_float(), Some(f64::NEG_INFINITY));
1164 }
1165
1166 #[test]
1169 fn test_serialize_int_extremes() {
1170 let mut buf = Vec::new();
1171 let mut stb = StringTableBuilder::new();
1172 serialize_value(&Value::int(i64::MIN), &mut buf, &mut stb).unwrap();
1173 serialize_value(&Value::int(i64::MAX), &mut buf, &mut stb).unwrap();
1174 serialize_value(&Value::int(0), &mut buf, &mut stb).unwrap();
1175 serialize_value(&Value::int(-1), &mut buf, &mut stb).unwrap();
1176
1177 let table = stb.finish();
1178 let remap = build_remap_table(&table);
1179 let mut cursor = 0;
1180 assert_eq!(
1181 deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1182 Value::int(i64::MIN)
1183 );
1184 assert_eq!(
1185 deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1186 Value::int(i64::MAX)
1187 );
1188 assert_eq!(
1189 deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1190 Value::int(0)
1191 );
1192 assert_eq!(
1193 deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1194 Value::int(-1)
1195 );
1196 }
1197
1198 #[test]
1201 fn test_serialize_empty_collections() {
1202 let mut buf = Vec::new();
1203 let mut stb = StringTableBuilder::new();
1204
1205 serialize_value(&Value::list(vec![]), &mut buf, &mut stb).unwrap();
1206 serialize_value(&Value::vector(vec![]), &mut buf, &mut stb).unwrap();
1207 serialize_value(
1208 &Value::map(std::collections::BTreeMap::new()),
1209 &mut buf,
1210 &mut stb,
1211 )
1212 .unwrap();
1213 serialize_value(&Value::hashmap(vec![]), &mut buf, &mut stb).unwrap();
1214 serialize_value(&Value::bytevector(vec![]), &mut buf, &mut stb).unwrap();
1215
1216 let table = stb.finish();
1217 let remap = build_remap_table(&table);
1218 let mut cursor = 0;
1219
1220 let l = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1221 assert_eq!(l.as_list().unwrap().len(), 0);
1222 let v = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1223 assert_eq!(v.as_vector().unwrap().len(), 0);
1224 let m = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1225 assert_eq!(m.as_map_rc().unwrap().len(), 0);
1226 let hm = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1227 assert_eq!(hm.as_hashmap_rc().unwrap().len(), 0);
1228 let bv = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1229 assert_eq!(bv.as_bytevector().unwrap().len(), 0);
1230 }
1231
1232 #[test]
1235 fn test_serialize_nested_collections() {
1236 let mut buf = Vec::new();
1237 let mut stb = StringTableBuilder::new();
1238
1239 let nested = Value::vector(vec![
1241 Value::list(vec![Value::int(1), Value::int(2)]),
1242 Value::list(vec![Value::string("a"), Value::symbol("b")]),
1243 ]);
1244 serialize_value(&nested, &mut buf, &mut stb).unwrap();
1245
1246 let table = stb.finish();
1247 let remap = build_remap_table(&table);
1248 let mut cursor = 0;
1249 let v = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1250 assert_eq!(v, nested);
1251 }
1252
1253 #[test]
1256 fn test_serialize_char() {
1257 let mut buf = Vec::new();
1258 let mut stb = StringTableBuilder::new();
1259 serialize_value(&Value::char('A'), &mut buf, &mut stb).unwrap();
1260 serialize_value(&Value::char('🦀'), &mut buf, &mut stb).unwrap();
1261
1262 let table = stb.finish();
1263 let remap = build_remap_table(&table);
1264 let mut cursor = 0;
1265 assert_eq!(
1266 deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1267 Value::char('A')
1268 );
1269 assert_eq!(
1270 deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1271 Value::char('🦀')
1272 );
1273 }
1274
1275 #[test]
1278 fn test_serialize_bytevector() {
1279 let mut buf = Vec::new();
1280 let mut stb = StringTableBuilder::new();
1281 let data = vec![0u8, 1, 2, 255, 128, 64];
1282 serialize_value(&Value::bytevector(data.clone()), &mut buf, &mut stb).unwrap();
1283
1284 let table = stb.finish();
1285 let remap = build_remap_table(&table);
1286 let mut cursor = 0;
1287 let v = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1288 assert_eq!(v.as_bytevector().unwrap(), &data);
1289 }
1290
1291 #[test]
1294 fn test_deserialize_invalid_bool() {
1295 let buf = vec![VAL_BOOL, 0x02]; let table: Vec<String> = vec![];
1297 let remap: Vec<Spur> = vec![];
1298 let mut cursor = 0;
1299 let result = deserialize_value(&buf, &mut cursor, &table, &remap);
1300 assert!(result.is_err());
1301 }
1302
1303 #[test]
1304 fn test_deserialize_invalid_char() {
1305 let mut buf = vec![VAL_CHAR];
1307 buf.extend_from_slice(&0xD800u32.to_le_bytes());
1308 let table: Vec<String> = vec![];
1309 let remap: Vec<Spur> = vec![];
1310 let mut cursor = 0;
1311 let result = deserialize_value(&buf, &mut cursor, &table, &remap);
1312 assert!(result.is_err());
1313 }
1314
1315 #[test]
1316 fn test_deserialize_unknown_tag() {
1317 let buf = vec![0xFF];
1318 let table: Vec<String> = vec![];
1319 let remap: Vec<Spur> = vec![];
1320 let mut cursor = 0;
1321 let result = deserialize_value(&buf, &mut cursor, &table, &remap);
1322 assert!(result.is_err());
1323 }
1324
1325 #[test]
1326 fn test_deserialize_truncated_data() {
1327 let buf = vec![VAL_INT, 0x01, 0x02, 0x03];
1329 let table: Vec<String> = vec![];
1330 let remap: Vec<Spur> = vec![];
1331 let mut cursor = 0;
1332 let result = deserialize_value(&buf, &mut cursor, &table, &remap);
1333 assert!(result.is_err());
1334 }
1335
1336 #[test]
1337 fn test_deserialize_string_index_out_of_range() {
1338 let mut buf = vec![VAL_STRING];
1339 buf.extend_from_slice(&99u32.to_le_bytes()); let table = vec!["".to_string()];
1341 let remap = build_remap_table(&table);
1342 let mut cursor = 0;
1343 let result = deserialize_value(&buf, &mut cursor, &table, &remap);
1344 assert!(result.is_err());
1345 }
1346
1347 #[test]
1350 fn test_serialize_runtime_only_type_rejected() {
1351 use sema_core::{Env, Lambda};
1352 let lambda = Value::lambda(Lambda {
1353 params: vec![],
1354 rest_param: None,
1355 body: vec![],
1356 env: Env::new(),
1357 name: None,
1358 });
1359 let mut buf = Vec::new();
1360 let mut stb = StringTableBuilder::new();
1361 let result = serialize_value(&lambda, &mut buf, &mut stb);
1362 assert!(result.is_err());
1363 }
1364
1365 #[test]
1368 fn test_chunk_roundtrip_with_exceptions() {
1369 use crate::chunk::ExceptionEntry;
1370 use crate::emit::Emitter;
1371 use crate::opcodes::Op;
1372
1373 let mut e = Emitter::new();
1374 e.emit_op(Op::Nil);
1375 e.emit_op(Op::Return);
1376 let mut chunk = e.into_chunk();
1377 chunk.exception_table = vec![
1378 ExceptionEntry {
1379 try_start: 0,
1380 try_end: 10,
1381 handler_pc: 20,
1382 stack_depth: 3,
1383 catch_slot: 5,
1384 },
1385 ExceptionEntry {
1386 try_start: 100,
1387 try_end: 200,
1388 handler_pc: 300,
1389 stack_depth: 0,
1390 catch_slot: 7,
1391 },
1392 ];
1393
1394 let mut buf = Vec::new();
1395 let mut stb = StringTableBuilder::new();
1396 serialize_chunk(&chunk, &mut buf, &mut stb).unwrap();
1397
1398 let table = stb.finish();
1399 let remap = build_remap_table(&table);
1400 let mut cursor = 0;
1401 let chunk2 = deserialize_chunk(&buf, &mut cursor, &table, &remap).unwrap();
1402
1403 assert_eq!(chunk2.exception_table.len(), 2);
1404 assert_eq!(chunk2.exception_table[0].try_start, 0);
1405 assert_eq!(chunk2.exception_table[0].try_end, 10);
1406 assert_eq!(chunk2.exception_table[0].handler_pc, 20);
1407 assert_eq!(chunk2.exception_table[0].stack_depth, 3);
1408 assert_eq!(chunk2.exception_table[0].catch_slot, 5);
1409 assert_eq!(chunk2.exception_table[1].try_start, 100);
1410 assert_eq!(chunk2.exception_table[1].handler_pc, 300);
1411 }
1412
1413 #[test]
1414 fn test_chunk_roundtrip_with_spans() {
1415 use crate::emit::Emitter;
1416 use crate::opcodes::Op;
1417
1418 let mut e = Emitter::new();
1419 e.emit_op(Op::Nil);
1420 e.emit_op(Op::Return);
1421 let mut chunk = e.into_chunk();
1422 chunk.spans = vec![(0, Span::point(1, 5)), (1, Span::new(2, 10, 3, 15))];
1423
1424 let mut buf = Vec::new();
1425 let mut stb = StringTableBuilder::new();
1426 serialize_chunk(&chunk, &mut buf, &mut stb).unwrap();
1427
1428 let table = stb.finish();
1429 let remap = build_remap_table(&table);
1430 let mut cursor = 0;
1431 let chunk2 = deserialize_chunk(&buf, &mut cursor, &table, &remap).unwrap();
1432
1433 assert_eq!(chunk2.spans.len(), 2);
1434 assert_eq!(chunk2.spans[0].0, 0);
1435 assert_eq!(chunk2.spans[0].1.line, 1);
1436 assert_eq!(chunk2.spans[0].1.col, 5);
1437 assert_eq!(chunk2.spans[0].1.end_line, 1);
1438 assert_eq!(chunk2.spans[0].1.end_col, 5);
1439 assert_eq!(chunk2.spans[1].0, 1);
1440 assert_eq!(chunk2.spans[1].1.line, 2);
1441 assert_eq!(chunk2.spans[1].1.col, 10);
1442 assert_eq!(chunk2.spans[1].1.end_line, 3);
1443 assert_eq!(chunk2.spans[1].1.end_col, 15);
1444 }
1445
1446 #[test]
1447 fn test_chunk_deserialize_truncated() {
1448 let mut buf = Vec::new();
1450 buf.extend_from_slice(&100u32.to_le_bytes()); buf.extend_from_slice(&[0u8; 4]); let table: Vec<String> = vec![];
1454 let remap: Vec<Spur> = vec![];
1455 let mut cursor = 0;
1456 let result = deserialize_chunk(&buf, &mut cursor, &table, &remap);
1457 assert!(result.is_err());
1458 }
1459
1460 #[test]
1463 fn test_spur_remapping_in_bytecode() {
1464 use crate::emit::Emitter;
1465 use crate::opcodes::Op;
1466
1467 let spur = intern("my-global");
1468 let mut e = Emitter::new();
1469 e.emit_op(Op::LoadGlobal);
1470 e.emit_u32(spur_to_u32(spur));
1471 e.emit_op(Op::Return);
1472 let chunk = e.into_chunk();
1473
1474 let mut buf = Vec::new();
1475 let mut stb = StringTableBuilder::new();
1476 serialize_chunk(&chunk, &mut buf, &mut stb).unwrap();
1477
1478 let table = stb.finish();
1480 let remap = build_remap_table(&table);
1481 let mut cursor = 0;
1482 let chunk2 = deserialize_chunk(&buf, &mut cursor, &table, &remap).unwrap();
1483
1484 let spur2_bits = u32::from_le_bytes([
1485 chunk2.code[1],
1486 chunk2.code[2],
1487 chunk2.code[3],
1488 chunk2.code[4],
1489 ]);
1490 let spur2 = u32_to_spur(spur2_bits);
1491 assert_eq!(sema_core::resolve(spur2), "my-global");
1492 }
1493
1494 #[test]
1495 fn test_spur_remapping_multiple_globals() {
1496 use crate::emit::Emitter;
1497 use crate::opcodes::Op;
1498
1499 let spur_a = intern("alpha");
1500 let spur_b = intern("beta");
1501 let mut e = Emitter::new();
1502 e.emit_op(Op::LoadGlobal);
1503 e.emit_u32(spur_to_u32(spur_a));
1504 e.emit_op(Op::DefineGlobal);
1505 e.emit_u32(spur_to_u32(spur_b));
1506 e.emit_op(Op::Return);
1507 let chunk = e.into_chunk();
1508
1509 let mut buf = Vec::new();
1510 let mut stb = StringTableBuilder::new();
1511 serialize_chunk(&chunk, &mut buf, &mut stb).unwrap();
1512
1513 let table = stb.finish();
1514 let remap = build_remap_table(&table);
1515 let mut cursor = 0;
1516 let chunk2 = deserialize_chunk(&buf, &mut cursor, &table, &remap).unwrap();
1517
1518 let bits_a = u32::from_le_bytes([
1520 chunk2.code[1],
1521 chunk2.code[2],
1522 chunk2.code[3],
1523 chunk2.code[4],
1524 ]);
1525 assert_eq!(sema_core::resolve(u32_to_spur(bits_a)), "alpha");
1526
1527 let bits_b = u32::from_le_bytes([
1528 chunk2.code[6],
1529 chunk2.code[7],
1530 chunk2.code[8],
1531 chunk2.code[9],
1532 ]);
1533 assert_eq!(sema_core::resolve(u32_to_spur(bits_b)), "beta");
1534 }
1535
1536 #[test]
1539 fn test_function_roundtrip() {
1540 use crate::emit::Emitter;
1541 use crate::opcodes::Op;
1542
1543 let mut e = Emitter::new();
1544 e.emit_op(Op::LoadLocal0);
1545 e.emit_op(Op::Return);
1546 let chunk = e.into_chunk();
1547
1548 let func = Function {
1549 name: Some(intern("my-func")),
1550 chunk,
1551 upvalue_descs: vec![UpvalueDesc::ParentLocal(0), UpvalueDesc::ParentUpvalue(1)],
1552 arity: 2,
1553 has_rest: true,
1554 local_names: vec![(0, intern("x")), (1, intern("y"))],
1555 };
1556
1557 let mut buf = Vec::new();
1558 let mut stb = StringTableBuilder::new();
1559 serialize_function(&func, &mut buf, &mut stb).unwrap();
1560
1561 let table = stb.finish();
1562 let remap = build_remap_table(&table);
1563 let mut cursor = 0;
1564 let func2 = deserialize_function(&buf, &mut cursor, &table, &remap).unwrap();
1565
1566 assert_eq!(func2.arity, 2);
1567 assert!(func2.has_rest);
1568 assert_eq!(func2.upvalue_descs.len(), 2);
1569 assert_eq!(func2.local_names.len(), 2);
1570 assert!(func2.name.is_some());
1571 assert_eq!(sema_core::resolve(func2.name.unwrap()), "my-func");
1572 assert_eq!(sema_core::resolve(func2.local_names[0].1), "x");
1573 assert_eq!(sema_core::resolve(func2.local_names[1].1), "y");
1574 }
1575
1576 #[test]
1577 fn test_function_roundtrip_anonymous() {
1578 use crate::emit::Emitter;
1579 use crate::opcodes::Op;
1580
1581 let mut e = Emitter::new();
1582 e.emit_op(Op::Return);
1583 let chunk = e.into_chunk();
1584
1585 let func = Function {
1586 name: None,
1587 chunk,
1588 upvalue_descs: vec![],
1589 arity: 0,
1590 has_rest: false,
1591 local_names: vec![],
1592 };
1593
1594 let mut buf = Vec::new();
1595 let mut stb = StringTableBuilder::new();
1596 serialize_function(&func, &mut buf, &mut stb).unwrap();
1597
1598 let table = stb.finish();
1599 let remap = build_remap_table(&table);
1600 let mut cursor = 0;
1601 let func2 = deserialize_function(&buf, &mut cursor, &table, &remap).unwrap();
1602
1603 assert!(func2.name.is_none());
1604 assert_eq!(func2.arity, 0);
1605 assert!(!func2.has_rest);
1606 assert_eq!(func2.upvalue_descs.len(), 0);
1607 }
1608
1609 #[test]
1612 fn test_full_file_roundtrip() {
1613 use crate::emit::Emitter;
1614 use crate::opcodes::Op;
1615
1616 let mut e = Emitter::new();
1617 e.emit_const(Value::int(42));
1618 e.emit_op(Op::Return);
1619 let chunk = e.into_chunk();
1620 let result = CompileResult {
1621 chunk,
1622 functions: vec![],
1623 };
1624
1625 let bytes = serialize_to_bytes(&result, 0).unwrap();
1626 assert_eq!(&bytes[0..4], b"\x00SEM");
1627
1628 let result2 = deserialize_from_bytes(&bytes).unwrap();
1629 assert_eq!(result2.chunk.consts.len(), 1);
1630 assert_eq!(result2.functions.len(), 0);
1631 }
1632
1633 #[test]
1634 fn test_full_file_with_functions() {
1635 use crate::emit::Emitter;
1636 use crate::opcodes::Op;
1637
1638 let mut e = Emitter::new();
1640 e.emit_op(Op::MakeClosure);
1641 e.emit_u16(0); e.emit_u16(0); e.emit_op(Op::Return);
1644 let chunk = e.into_chunk();
1645
1646 let mut fe = Emitter::new();
1648 fe.emit_op(Op::LoadLocal0);
1649 fe.emit_op(Op::Return);
1650 let func = Function {
1651 name: Some(intern("add-one")),
1652 chunk: fe.into_chunk(),
1653 upvalue_descs: vec![],
1654 arity: 1,
1655 has_rest: false,
1656 local_names: vec![(0, intern("x"))],
1657 };
1658
1659 let result = CompileResult {
1660 chunk,
1661 functions: vec![func],
1662 };
1663
1664 let bytes = serialize_to_bytes(&result, 0xDEAD_BEEF).unwrap();
1665 let result2 = deserialize_from_bytes(&bytes).unwrap();
1666
1667 assert_eq!(result2.functions.len(), 1);
1668 assert_eq!(result2.functions[0].arity, 1);
1669 assert_eq!(
1670 sema_core::resolve(result2.functions[0].name.unwrap()),
1671 "add-one"
1672 );
1673 }
1674
1675 #[test]
1676 fn test_magic_detection() {
1677 assert!(is_bytecode_file(b"\x00SEM\x01\x00"));
1678 assert!(!is_bytecode_file(b"(define x 1)"));
1679 assert!(!is_bytecode_file(b""));
1680 assert!(!is_bytecode_file(b"\x00SE")); }
1682
1683 #[test]
1684 fn test_deserialize_bad_magic() {
1685 let mut bytes = vec![0u8; 24];
1686 bytes[0..4].copy_from_slice(b"NOPE");
1687 let result = deserialize_from_bytes(&bytes);
1688 assert!(result.is_err());
1689 }
1690
1691 #[test]
1692 fn test_deserialize_bad_version() {
1693 let mut bytes = vec![0u8; 24];
1694 bytes[0..4].copy_from_slice(&[0x00, b'S', b'E', b'M']);
1695 bytes[4..6].copy_from_slice(&99u16.to_le_bytes()); let result = deserialize_from_bytes(&bytes);
1697 assert!(result.is_err());
1698 }
1699
1700 #[test]
1701 fn test_deserialize_rejects_nonzero_reserved() {
1702 let mut bytes = vec![0u8; 24];
1703 bytes[0..4].copy_from_slice(&MAGIC);
1704 bytes[4..6].copy_from_slice(&FORMAT_VERSION.to_le_bytes());
1705 bytes[20] = 0xFF;
1707 let result = deserialize_from_bytes(&bytes);
1708 assert!(result.is_err(), "should reject non-zero reserved field");
1709 let err = result.err().unwrap();
1710 assert!(err.to_string().contains("reserved"));
1711 }
1712
1713 #[test]
1714 fn test_deserialize_too_short() {
1715 let result = deserialize_from_bytes(&[0x00, b'S', b'E']);
1716 assert!(result.is_err());
1717 }
1718
1719 #[test]
1720 fn test_full_file_roundtrip_with_globals() {
1721 use crate::emit::Emitter;
1722 use crate::opcodes::Op;
1723
1724 let spur_x = intern("my-var");
1726 let spur_print = intern("println");
1727 let mut e = Emitter::new();
1728 e.emit_const(Value::int(42));
1730 e.emit_op(Op::DefineGlobal);
1731 e.emit_u32(spur_to_u32(spur_x));
1732 e.emit_op(Op::LoadGlobal);
1734 e.emit_u32(spur_to_u32(spur_print));
1735 e.emit_op(Op::LoadGlobal);
1736 e.emit_u32(spur_to_u32(spur_x));
1737 e.emit_const(Value::symbol("test-sym"));
1739 e.emit_const(Value::keyword("test-kw"));
1740 e.emit_op(Op::Return);
1741 let chunk = e.into_chunk();
1742
1743 let result = CompileResult {
1744 chunk,
1745 functions: vec![],
1746 };
1747
1748 let bytes = serialize_to_bytes(&result, 0).unwrap();
1749 let result2 = deserialize_from_bytes(&bytes).unwrap();
1750
1751 let code = &result2.chunk.code;
1754 let mut found_define = false;
1756 let mut found_load_print = false;
1757 let mut pc = 0;
1758 while pc < code.len() {
1759 let (op, next) = advance_pc(code, pc).unwrap();
1760 match op {
1761 Op::DefineGlobal => {
1762 let bits = u32::from_le_bytes([
1763 code[pc + 1],
1764 code[pc + 2],
1765 code[pc + 3],
1766 code[pc + 4],
1767 ]);
1768 assert_eq!(sema_core::resolve(u32_to_spur(bits)), "my-var");
1769 found_define = true;
1770 }
1771 Op::LoadGlobal => {
1772 let bits = u32::from_le_bytes([
1773 code[pc + 1],
1774 code[pc + 2],
1775 code[pc + 3],
1776 code[pc + 4],
1777 ]);
1778 let name = sema_core::resolve(u32_to_spur(bits));
1779 if name == "println" {
1780 found_load_print = true;
1781 }
1782 }
1783 _ => {}
1784 }
1785 pc = next;
1786 }
1787 assert!(found_define, "DefineGlobal 'my-var' not found");
1788 assert!(found_load_print, "LoadGlobal 'println' not found");
1789
1790 assert_eq!(result2.chunk.consts.len(), 3); assert!(result2.chunk.consts[1].as_symbol().is_some());
1793 assert!(result2.chunk.consts[2].as_keyword().is_some());
1794 }
1795
1796 #[test]
1797 fn test_truncated_global_operand_errors_not_panics() {
1798 let code = vec![Op::LoadGlobal as u8, 0x01, 0x00]; let mut stb = StringTableBuilder::new();
1801 let result = remap_spurs_to_indices(&code, &mut stb);
1802 assert!(result.is_err());
1803
1804 let mut code2 = vec![Op::LoadGlobal as u8, 0x01]; let remap = vec![intern("x")];
1807 let result2 = remap_indices_to_spurs(&mut code2, &remap);
1808 assert!(result2.is_err());
1809 }
1810
1811 #[test]
1812 fn test_truncated_make_closure_errors_not_panics() {
1813 let code = vec![Op::MakeClosure as u8, 0x00]; let mut stb = StringTableBuilder::new();
1816 let result = remap_spurs_to_indices(&code, &mut stb);
1817 assert!(result.is_err());
1818 }
1819
1820 #[test]
1821 fn test_missing_required_section_errors() {
1822 let mut bytes = vec![0u8; 24];
1824 bytes[0..4].copy_from_slice(&[0x00, b'S', b'E', b'M']);
1825 bytes[4..6].copy_from_slice(&1u16.to_le_bytes()); bytes[14..16].copy_from_slice(&0u16.to_le_bytes()); let result = deserialize_from_bytes(&bytes);
1828 match &result {
1829 Err(e) => assert!(e.to_string().contains("missing"), "unexpected error: {e}"),
1830 Ok(_) => panic!("expected error for missing sections"),
1831 }
1832 }
1833
1834 #[test]
1837 fn test_string_table_unicode() {
1838 let mut builder = StringTableBuilder::new();
1839 let idx1 = builder.intern_str("こんにちは");
1840 let idx2 = builder.intern_str("🦀");
1841 let idx3 = builder.intern_str("café");
1842
1843 let table = builder.finish();
1844 assert_eq!(table[idx1 as usize], "こんにちは");
1845 assert_eq!(table[idx2 as usize], "🦀");
1846 assert_eq!(table[idx3 as usize], "café");
1847 }
1848
1849 #[test]
1850 fn test_serialize_value_roundtrip_primitives() {
1851 let mut buf = Vec::new();
1852 let mut stb = StringTableBuilder::new();
1853
1854 serialize_value(&Value::nil(), &mut buf, &mut stb).unwrap();
1855 serialize_value(&Value::bool(true), &mut buf, &mut stb).unwrap();
1856 serialize_value(&Value::bool(false), &mut buf, &mut stb).unwrap();
1857 serialize_value(&Value::int(42), &mut buf, &mut stb).unwrap();
1858 serialize_value(&Value::float(3.14), &mut buf, &mut stb).unwrap();
1859 serialize_value(&Value::string("hello"), &mut buf, &mut stb).unwrap();
1860 serialize_value(&Value::symbol("foo"), &mut buf, &mut stb).unwrap();
1861 serialize_value(&Value::keyword("bar"), &mut buf, &mut stb).unwrap();
1862
1863 let table = stb.finish();
1864 let remap = build_remap_table(&table);
1865 let mut cursor = 0;
1866 assert_eq!(
1867 deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1868 Value::nil()
1869 );
1870 assert_eq!(
1871 deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1872 Value::bool(true)
1873 );
1874 assert_eq!(
1875 deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1876 Value::bool(false)
1877 );
1878 assert_eq!(
1879 deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1880 Value::int(42)
1881 );
1882 let f = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1883 assert_eq!(f.as_float(), Some(3.14));
1884 let s = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1885 assert_eq!(s.as_str().unwrap(), "hello");
1886 let sym = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1887 assert!(sym.as_symbol().is_some());
1888 let kw = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1889 assert!(kw.as_keyword().is_some());
1890 }
1891
1892 #[test]
1893 fn test_serialize_value_roundtrip_collections() {
1894 let mut buf = Vec::new();
1895 let mut stb = StringTableBuilder::new();
1896
1897 let list = Value::list(vec![Value::int(1), Value::int(2), Value::int(3)]);
1898 serialize_value(&list, &mut buf, &mut stb).unwrap();
1899
1900 let vec = Value::vector(vec![Value::string("a"), Value::string("b")]);
1901 serialize_value(&vec, &mut buf, &mut stb).unwrap();
1902
1903 let table = stb.finish();
1904 let remap = build_remap_table(&table);
1905 let mut cursor = 0;
1906
1907 let list2 = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1908 assert_eq!(list2, list);
1909
1910 let vec2 = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1911 assert_eq!(vec2, vec);
1912 }
1913
1914 #[test]
1915 fn test_spur_u32_conversion_safe() {
1916 let spur = intern("test-var");
1917 let bits = spur_to_u32(spur);
1918 assert_ne!(bits, 0, "Spur should never be zero (it's NonZeroU32)");
1919 let spur2 = u32_to_spur(bits);
1920 assert_eq!(spur, spur2);
1921 assert_eq!(sema_core::resolve(spur2), "test-var");
1922 }
1923
1924 #[test]
1925 fn test_string_table_section_boundary() {
1926 use crate::emit::Emitter;
1927 use crate::opcodes::Op;
1928
1929 let mut e = Emitter::new();
1930 e.emit_const(Value::int(1));
1931 e.emit_op(Op::Return);
1932 let chunk = e.into_chunk();
1933 let result = CompileResult {
1934 chunk,
1935 functions: vec![],
1936 };
1937 let bytes = serialize_to_bytes(&result, 0).unwrap();
1938
1939 let result2 = deserialize_from_bytes(&bytes);
1941 assert!(result2.is_ok());
1942 }
1943
1944 #[test]
1945 fn test_deserialize_value_depth_limit() {
1946 let depth = 200;
1948 let mut buf = Vec::new();
1949 for _ in 0..depth {
1950 buf.push(0x08); buf.extend_from_slice(&1u16.to_le_bytes()); }
1953 buf.push(0x00); let table = vec!["".to_string()];
1956 let remap = build_remap_table(&table);
1957 let mut cursor = 0;
1958 let result = deserialize_value(&buf, &mut cursor, &table, &remap);
1959 assert!(result.is_err(), "should reject deeply nested values");
1960 assert!(
1961 result.unwrap_err().to_string().contains("depth"),
1962 "error should mention depth limit"
1963 );
1964 }
1965
1966 #[test]
1967 fn test_u32_to_spur_rejects_zero() {
1968 let result = std::panic::catch_unwind(|| u32_to_spur(0));
1969 assert!(
1970 result.is_err(),
1971 "u32_to_spur(0) should panic (was UB before fix)"
1972 );
1973 }
1974
1975 #[test]
1978 fn test_deserialize_rejects_huge_code_len() {
1979 let mut buf = Vec::new();
1980 buf.extend_from_slice(&0xFFFFFFFFu32.to_le_bytes()); let table = vec!["".to_string()];
1982 let remap = build_remap_table(&table);
1983 let mut cursor = 0;
1984 let result = deserialize_chunk(&buf, &mut cursor, &table, &remap);
1985 assert!(result.is_err());
1986 }
1987
1988 #[test]
1989 fn test_deserialize_rejects_huge_string_count() {
1990 let mut section = Vec::new();
1991 section.extend_from_slice(&0xFFFFFFFFu32.to_le_bytes()); let mut bytes = vec![0u8; 24];
1994 bytes[0..4].copy_from_slice(&[0x00, b'S', b'E', b'M']);
1995 bytes[4..6].copy_from_slice(&1u16.to_le_bytes()); bytes[14..16].copy_from_slice(&1u16.to_le_bytes()); bytes.extend_from_slice(&0x01u16.to_le_bytes()); bytes.extend_from_slice(&(section.len() as u32).to_le_bytes());
2000 bytes.extend_from_slice(§ion);
2001
2002 let result = deserialize_from_bytes(&bytes);
2003 assert!(result.is_err());
2004 }
2005
2006 #[test]
2007 fn test_deserialize_rejects_huge_bytevector() {
2008 let mut buf = Vec::new();
2009 buf.push(0x0C); buf.extend_from_slice(&0xFFFFFFFFu32.to_le_bytes()); let table = vec!["".to_string()];
2012 let remap = build_remap_table(&table);
2013 let mut cursor = 0;
2014 let result = deserialize_value(&buf, &mut cursor, &table, &remap);
2015 assert!(result.is_err());
2016 }
2017
2018 #[test]
2019 fn test_deserialize_rejects_nonempty_string_zero() {
2020 let mut bad_bytes = Vec::new();
2021 bad_bytes.extend_from_slice(&[0x00, b'S', b'E', b'M']); bad_bytes.extend_from_slice(&1u16.to_le_bytes()); bad_bytes.extend_from_slice(&0u16.to_le_bytes()); bad_bytes.extend_from_slice(&0u16.to_le_bytes()); bad_bytes.extend_from_slice(&0u16.to_le_bytes()); bad_bytes.extend_from_slice(&0u16.to_le_bytes()); bad_bytes.extend_from_slice(&3u16.to_le_bytes()); bad_bytes.extend_from_slice(&0u32.to_le_bytes()); bad_bytes.extend_from_slice(&0u32.to_le_bytes()); assert_eq!(bad_bytes.len(), 24);
2032
2033 let mut strtab = Vec::new();
2035 strtab.extend_from_slice(&1u32.to_le_bytes()); strtab.extend_from_slice(&3u32.to_le_bytes()); strtab.extend_from_slice(b"bad"); bad_bytes.extend_from_slice(&0x01u16.to_le_bytes()); bad_bytes.extend_from_slice(&(strtab.len() as u32).to_le_bytes());
2040 bad_bytes.extend_from_slice(&strtab);
2041
2042 let mut functab = Vec::new();
2044 functab.extend_from_slice(&0u32.to_le_bytes()); bad_bytes.extend_from_slice(&0x02u16.to_le_bytes());
2046 bad_bytes.extend_from_slice(&(functab.len() as u32).to_le_bytes());
2047 bad_bytes.extend_from_slice(&functab);
2048
2049 let mut chunk_data = Vec::new();
2051 chunk_data.extend_from_slice(&1u32.to_le_bytes()); chunk_data.push(Op::Return as u8);
2053 chunk_data.extend_from_slice(&0u16.to_le_bytes()); chunk_data.extend_from_slice(&0u32.to_le_bytes()); chunk_data.extend_from_slice(&0u16.to_le_bytes()); chunk_data.extend_from_slice(&0u16.to_le_bytes()); chunk_data.extend_from_slice(&0u16.to_le_bytes()); bad_bytes.extend_from_slice(&0x03u16.to_le_bytes());
2059 bad_bytes.extend_from_slice(&(chunk_data.len() as u32).to_le_bytes());
2060 bad_bytes.extend_from_slice(&chunk_data);
2061
2062 let result = deserialize_from_bytes(&bad_bytes);
2063 assert!(
2064 result.is_err(),
2065 "should reject string table with non-empty index 0"
2066 );
2067 let err = result.err().unwrap();
2068 assert!(err.to_string().contains("index 0 must be the empty string"));
2069 }
2070
2071 #[test]
2072 fn test_deserialize_rejects_trailing_section_bytes() {
2073 use crate::emit::Emitter;
2074 use crate::opcodes::Op;
2075
2076 let mut stb = StringTableBuilder::new();
2077 let mut func_payload = Vec::new();
2078 func_payload.extend_from_slice(&0u32.to_le_bytes()); func_payload.extend_from_slice(&[0xDE, 0xAD]); let mut chunk_payload = Vec::new();
2082 let mut e = Emitter::new();
2083 e.emit_op(Op::Nil);
2084 e.emit_op(Op::Return);
2085 let chunk = e.into_chunk();
2086 serialize_chunk(&chunk, &mut chunk_payload, &mut stb).unwrap();
2087
2088 let string_table = stb.finish();
2089 let mut strtab_payload = Vec::new();
2090 strtab_payload.extend_from_slice(&(string_table.len() as u32).to_le_bytes());
2091 for s in &string_table {
2092 let sb = s.as_bytes();
2093 strtab_payload.extend_from_slice(&(sb.len() as u32).to_le_bytes());
2094 strtab_payload.extend_from_slice(sb);
2095 }
2096
2097 let mut out = Vec::new();
2098 out.extend_from_slice(&[0x00, b'S', b'E', b'M']);
2099 out.extend_from_slice(&1u16.to_le_bytes());
2100 out.extend_from_slice(&0u16.to_le_bytes());
2101 out.extend_from_slice(&0u16.to_le_bytes());
2102 out.extend_from_slice(&0u16.to_le_bytes());
2103 out.extend_from_slice(&0u16.to_le_bytes());
2104 out.extend_from_slice(&3u16.to_le_bytes()); out.extend_from_slice(&0u32.to_le_bytes()); out.extend_from_slice(&0u32.to_le_bytes()); out.extend_from_slice(&0x01u16.to_le_bytes());
2110 out.extend_from_slice(&(strtab_payload.len() as u32).to_le_bytes());
2111 out.extend_from_slice(&strtab_payload);
2112 out.extend_from_slice(&0x02u16.to_le_bytes());
2114 out.extend_from_slice(&(func_payload.len() as u32).to_le_bytes());
2115 out.extend_from_slice(&func_payload);
2116 out.extend_from_slice(&0x03u16.to_le_bytes());
2118 out.extend_from_slice(&(chunk_payload.len() as u32).to_le_bytes());
2119 out.extend_from_slice(&chunk_payload);
2120
2121 match deserialize_from_bytes(&out) {
2122 Ok(_) => panic!("should reject trailing bytes in function table section"),
2123 Err(e) => {
2124 let msg = e.to_string();
2125 assert!(
2126 msg.contains("trailing") || msg.contains("unconsumed"),
2127 "error should mention trailing/unconsumed bytes, got: {msg}"
2128 );
2129 }
2130 }
2131 }
2132
2133 #[test]
2136 fn test_validate_rejects_bad_const_index() {
2137 let chunk = Chunk {
2138 code: vec![Op::Const as u8, 0x03, 0x00, Op::Return as u8],
2139 consts: vec![Value::int(1)],
2140 spans: vec![],
2141 max_stack: 1,
2142 n_locals: 0,
2143 exception_table: vec![],
2144 };
2145
2146 let result = CompileResult {
2147 chunk,
2148 functions: vec![],
2149 };
2150 let bytes = serialize_to_bytes(&result, 0).unwrap();
2151 let deser = deserialize_from_bytes(&bytes);
2152 assert!(deser.is_err(), "should reject out-of-bounds const index");
2153 }
2154
2155 #[test]
2156 fn test_validate_rejects_bad_func_id() {
2157 use crate::emit::Emitter;
2158
2159 let mut e = Emitter::new();
2160 e.emit_op(Op::MakeClosure);
2161 e.emit_u16(5); e.emit_u16(0); e.emit_op(Op::Return);
2164 let chunk = e.into_chunk();
2165
2166 let result = CompileResult {
2167 chunk,
2168 functions: vec![],
2169 };
2170 let bytes = serialize_to_bytes(&result, 0).unwrap();
2171 let deser = deserialize_from_bytes(&bytes);
2172 assert!(
2173 deser.is_err(),
2174 "should reject out-of-bounds func_id in MakeClosure"
2175 );
2176 }
2177}