Skip to main content

sema_vm/
serialize.rs

1use hashbrown::HashMap;
2use sema_core::{intern, resolve, SemaError, Span, Spur, Value, ValueView};
3
4use crate::chunk::{Chunk, ExceptionEntry, Function, UpvalueDesc};
5use crate::compiler::CompileResult;
6use crate::opcodes::Op;
7
8/// Builds a deduplicated string table for serialization.
9pub struct StringTableBuilder {
10    strings: Vec<String>,
11    index: HashMap<String, u32>,
12}
13
14impl Default for StringTableBuilder {
15    fn default() -> Self {
16        Self::new()
17    }
18}
19
20impl StringTableBuilder {
21    pub fn new() -> Self {
22        let mut b = StringTableBuilder {
23            strings: Vec::new(),
24            index: HashMap::new(),
25        };
26        b.intern_str(""); // index 0 = empty string
27        b
28    }
29
30    pub fn intern_str(&mut self, s: &str) -> u32 {
31        if let Some(&idx) = self.index.get(s) {
32            return idx;
33        }
34        let idx = self.strings.len() as u32;
35        self.strings.push(s.to_string());
36        self.index.insert(s.to_string(), idx);
37        idx
38    }
39
40    pub fn intern_spur(&mut self, spur: Spur) -> u32 {
41        let s = resolve(spur);
42        self.intern_str(&s)
43    }
44
45    pub fn finish(self) -> Vec<String> {
46        self.strings
47    }
48}
49
50// ── Spur remap table ──────────────────────────────────────────────
51
52/// Build a remap table: for each string table index, intern it to get a process-local Spur.
53pub fn build_remap_table(table: &[String]) -> Vec<Spur> {
54    table.iter().map(|s| intern(s)).collect()
55}
56
57// ── Value tag constants (bytecode format) ─────────────────────────
58
59const VAL_NIL: u8 = 0x00;
60const VAL_BOOL: u8 = 0x01;
61const VAL_INT: u8 = 0x02;
62const VAL_FLOAT: u8 = 0x03;
63const VAL_STRING: u8 = 0x04;
64const VAL_SYMBOL: u8 = 0x05;
65const VAL_KEYWORD: u8 = 0x06;
66const VAL_CHAR: u8 = 0x07;
67const VAL_LIST: u8 = 0x08;
68const VAL_VECTOR: u8 = 0x09;
69const VAL_MAP: u8 = 0x0A;
70const VAL_HASHMAP: u8 = 0x0B;
71const VAL_BYTEVECTOR: u8 = 0x0C;
72
73const MAX_VALUE_DEPTH: usize = 128;
74
75// ── Checked conversions ───────────────────────────────────────────
76
77fn checked_u16(n: usize, what: &str) -> Result<u16, SemaError> {
78    u16::try_from(n).map_err(|_| SemaError::eval(format!("{what} exceeds u16::MAX ({n})")))
79}
80
81fn checked_u32(n: usize, what: &str) -> Result<u32, SemaError> {
82    u32::try_from(n).map_err(|_| SemaError::eval(format!("{what} exceeds u32::MAX ({n})")))
83}
84
85// ── Value serialization ───────────────────────────────────────────
86
87pub fn serialize_value(
88    val: &Value,
89    buf: &mut Vec<u8>,
90    stb: &mut StringTableBuilder,
91) -> Result<(), SemaError> {
92    match val.view() {
93        ValueView::Nil => buf.push(VAL_NIL),
94        ValueView::Bool(b) => {
95            buf.push(VAL_BOOL);
96            buf.push(if b { 1 } else { 0 });
97        }
98        ValueView::Int(n) => {
99            buf.push(VAL_INT);
100            buf.extend_from_slice(&n.to_le_bytes());
101        }
102        ValueView::Float(f) => {
103            buf.push(VAL_FLOAT);
104            buf.extend_from_slice(&f.to_le_bytes());
105        }
106        ValueView::String(s) => {
107            buf.push(VAL_STRING);
108            let idx = stb.intern_str(&s);
109            buf.extend_from_slice(&idx.to_le_bytes());
110        }
111        ValueView::Symbol(spur) => {
112            buf.push(VAL_SYMBOL);
113            let idx = stb.intern_spur(spur);
114            buf.extend_from_slice(&idx.to_le_bytes());
115        }
116        ValueView::Keyword(spur) => {
117            buf.push(VAL_KEYWORD);
118            let idx = stb.intern_spur(spur);
119            buf.extend_from_slice(&idx.to_le_bytes());
120        }
121        ValueView::Char(c) => {
122            buf.push(VAL_CHAR);
123            buf.extend_from_slice(&(c as u32).to_le_bytes());
124        }
125        ValueView::List(items) => {
126            let len = checked_u16(items.len(), "list length")?;
127            buf.push(VAL_LIST);
128            buf.extend_from_slice(&len.to_le_bytes());
129            for item in items.iter() {
130                serialize_value(item, buf, stb)?;
131            }
132        }
133        ValueView::Vector(items) => {
134            let len = checked_u16(items.len(), "vector length")?;
135            buf.push(VAL_VECTOR);
136            buf.extend_from_slice(&len.to_le_bytes());
137            for item in items.iter() {
138                serialize_value(item, buf, stb)?;
139            }
140        }
141        ValueView::Map(map) => {
142            let len = checked_u16(map.len(), "map length")?;
143            buf.push(VAL_MAP);
144            buf.extend_from_slice(&len.to_le_bytes());
145            for (k, v) in map.iter() {
146                serialize_value(k, buf, stb)?;
147                serialize_value(v, buf, stb)?;
148            }
149        }
150        ValueView::HashMap(map) => {
151            let len = checked_u16(map.len(), "hashmap length")?;
152            buf.push(VAL_HASHMAP);
153            buf.extend_from_slice(&len.to_le_bytes());
154            for (k, v) in map.iter() {
155                serialize_value(k, buf, stb)?;
156                serialize_value(v, buf, stb)?;
157            }
158        }
159        ValueView::Bytevector(bv) => {
160            let len = checked_u32(bv.len(), "bytevector length")?;
161            buf.push(VAL_BYTEVECTOR);
162            buf.extend_from_slice(&len.to_le_bytes());
163            buf.extend_from_slice(&bv);
164        }
165        // Runtime-only types cannot appear in bytecode constant pools
166        _ => {
167            return Err(SemaError::eval(format!(
168                "cannot serialize {} to bytecode constant pool",
169                val.type_name()
170            )));
171        }
172    }
173    Ok(())
174}
175
176// ── Value deserialization ─────────────────────────────────────────
177
178fn read_u8(buf: &[u8], cursor: &mut usize) -> Result<u8, SemaError> {
179    if *cursor >= buf.len() {
180        return Err(SemaError::eval("unexpected end of bytecode data"));
181    }
182    let v = buf[*cursor];
183    *cursor += 1;
184    Ok(v)
185}
186
187fn read_u16_le(buf: &[u8], cursor: &mut usize) -> Result<u16, SemaError> {
188    if *cursor + 2 > buf.len() {
189        return Err(SemaError::eval("unexpected end of bytecode data"));
190    }
191    let v = u16::from_le_bytes([buf[*cursor], buf[*cursor + 1]]);
192    *cursor += 2;
193    Ok(v)
194}
195
196fn read_u32_le(buf: &[u8], cursor: &mut usize) -> Result<u32, SemaError> {
197    if *cursor + 4 > buf.len() {
198        return Err(SemaError::eval("unexpected end of bytecode data"));
199    }
200    let v = u32::from_le_bytes([
201        buf[*cursor],
202        buf[*cursor + 1],
203        buf[*cursor + 2],
204        buf[*cursor + 3],
205    ]);
206    *cursor += 4;
207    Ok(v)
208}
209
210fn read_i64_le(buf: &[u8], cursor: &mut usize) -> Result<i64, SemaError> {
211    if *cursor + 8 > buf.len() {
212        return Err(SemaError::eval("unexpected end of bytecode data"));
213    }
214    let v = i64::from_le_bytes(buf[*cursor..*cursor + 8].try_into().unwrap());
215    *cursor += 8;
216    Ok(v)
217}
218
219fn read_f64_le(buf: &[u8], cursor: &mut usize) -> Result<f64, SemaError> {
220    if *cursor + 8 > buf.len() {
221        return Err(SemaError::eval("unexpected end of bytecode data"));
222    }
223    let v = f64::from_le_bytes(buf[*cursor..*cursor + 8].try_into().unwrap());
224    *cursor += 8;
225    Ok(v)
226}
227
228fn read_bytes(buf: &[u8], cursor: &mut usize, len: usize) -> Result<Vec<u8>, SemaError> {
229    if *cursor + len > buf.len() {
230        return Err(SemaError::eval("unexpected end of bytecode data"));
231    }
232    let v = buf[*cursor..*cursor + len].to_vec();
233    *cursor += len;
234    Ok(v)
235}
236
237pub fn deserialize_value(
238    buf: &[u8],
239    cursor: &mut usize,
240    table: &[String],
241    remap: &[Spur],
242) -> Result<Value, SemaError> {
243    deserialize_value_inner(buf, cursor, table, remap, 0)
244}
245
246fn deserialize_value_inner(
247    buf: &[u8],
248    cursor: &mut usize,
249    table: &[String],
250    remap: &[Spur],
251    depth: usize,
252) -> Result<Value, SemaError> {
253    if depth > MAX_VALUE_DEPTH {
254        return Err(SemaError::eval(format!(
255            "value nesting depth exceeds maximum ({MAX_VALUE_DEPTH})"
256        )));
257    }
258    let tag = read_u8(buf, cursor)?;
259    match tag {
260        VAL_NIL => Ok(Value::nil()),
261        VAL_BOOL => {
262            let b = read_u8(buf, cursor)?;
263            match b {
264                0 => Ok(Value::bool(false)),
265                1 => Ok(Value::bool(true)),
266                _ => Err(SemaError::eval(format!(
267                    "invalid bool payload in bytecode: 0x{b:02x}"
268                ))),
269            }
270        }
271        VAL_INT => {
272            let n = read_i64_le(buf, cursor)?;
273            Ok(Value::int(n))
274        }
275        VAL_FLOAT => {
276            let f = read_f64_le(buf, cursor)?;
277            Ok(Value::float(f))
278        }
279        VAL_STRING => {
280            let idx = read_u32_le(buf, cursor)? as usize;
281            if idx >= table.len() {
282                return Err(SemaError::eval(format!(
283                    "string table index {idx} out of range (table has {} entries)",
284                    table.len()
285                )));
286            }
287            Ok(Value::string(&table[idx]))
288        }
289        VAL_SYMBOL => {
290            let idx = read_u32_le(buf, cursor)? as usize;
291            if idx >= remap.len() {
292                return Err(SemaError::eval(format!(
293                    "string table index {idx} out of range for symbol remap"
294                )));
295            }
296            Ok(Value::symbol_from_spur(remap[idx]))
297        }
298        VAL_KEYWORD => {
299            let idx = read_u32_le(buf, cursor)? as usize;
300            if idx >= remap.len() {
301                return Err(SemaError::eval(format!(
302                    "string table index {idx} out of range for keyword remap"
303                )));
304            }
305            Ok(Value::keyword_from_spur(remap[idx]))
306        }
307        VAL_CHAR => {
308            let cp = read_u32_le(buf, cursor)?;
309            let c = char::from_u32(cp)
310                .ok_or_else(|| SemaError::eval(format!("invalid unicode code point: {cp}")))?;
311            Ok(Value::char(c))
312        }
313        VAL_LIST => {
314            let count = read_u16_le(buf, cursor)? as usize;
315            let mut items = Vec::with_capacity(count);
316            for _ in 0..count {
317                items.push(deserialize_value_inner(
318                    buf,
319                    cursor,
320                    table,
321                    remap,
322                    depth + 1,
323                )?);
324            }
325            Ok(Value::list(items))
326        }
327        VAL_VECTOR => {
328            let count = read_u16_le(buf, cursor)? as usize;
329            let mut items = Vec::with_capacity(count);
330            for _ in 0..count {
331                items.push(deserialize_value_inner(
332                    buf,
333                    cursor,
334                    table,
335                    remap,
336                    depth + 1,
337                )?);
338            }
339            Ok(Value::vector(items))
340        }
341        VAL_MAP => {
342            let n_pairs = read_u16_le(buf, cursor)? as usize;
343            let mut map = std::collections::BTreeMap::new();
344            for _ in 0..n_pairs {
345                let k = deserialize_value_inner(buf, cursor, table, remap, depth + 1)?;
346                let v = deserialize_value_inner(buf, cursor, table, remap, depth + 1)?;
347                map.insert(k, v);
348            }
349            Ok(Value::map(map))
350        }
351        VAL_HASHMAP => {
352            let n_pairs = read_u16_le(buf, cursor)? as usize;
353            let mut entries = Vec::with_capacity(n_pairs);
354            for _ in 0..n_pairs {
355                let k = deserialize_value_inner(buf, cursor, table, remap, depth + 1)?;
356                let v = deserialize_value_inner(buf, cursor, table, remap, depth + 1)?;
357                entries.push((k, v));
358            }
359            Ok(Value::hashmap(entries))
360        }
361        VAL_BYTEVECTOR => {
362            let len = read_u32_le(buf, cursor)? as usize;
363            let data = read_bytes(buf, cursor, len)?;
364            Ok(Value::bytevector(data))
365        }
366        _ => Err(SemaError::eval(format!(
367            "unknown value tag in bytecode: 0x{tag:02x}"
368        ))),
369    }
370}
371
372// ── Chunk serialization ───────────────────────────────────────────
373
374pub fn serialize_chunk(
375    chunk: &Chunk,
376    buf: &mut Vec<u8>,
377    stb: &mut StringTableBuilder,
378) -> Result<(), SemaError> {
379    // code — remap Spur operands to string table indices before writing
380    let remapped_code = remap_spurs_to_indices(&chunk.code, stb)?;
381    let code_len = checked_u32(remapped_code.len(), "bytecode length")?;
382    buf.extend_from_slice(&code_len.to_le_bytes());
383    buf.extend_from_slice(&remapped_code);
384
385    // constants
386    let n_consts = checked_u16(chunk.consts.len(), "constant pool size")?;
387    buf.extend_from_slice(&n_consts.to_le_bytes());
388    for val in &chunk.consts {
389        serialize_value(val, buf, stb)?;
390    }
391
392    // spans: Vec<(u32, Span)> where Span { line, col, end_line, end_col }
393    let n_spans = checked_u32(chunk.spans.len(), "span count")?;
394    buf.extend_from_slice(&n_spans.to_le_bytes());
395    for &(pc, ref span) in &chunk.spans {
396        buf.extend_from_slice(&pc.to_le_bytes());
397        let line = checked_u32(span.line, "span line")?;
398        let col = checked_u32(span.col, "span col")?;
399        let end_line = checked_u32(span.end_line, "span end_line")?;
400        let end_col = checked_u32(span.end_col, "span end_col")?;
401        buf.extend_from_slice(&line.to_le_bytes());
402        buf.extend_from_slice(&col.to_le_bytes());
403        buf.extend_from_slice(&end_line.to_le_bytes());
404        buf.extend_from_slice(&end_col.to_le_bytes());
405    }
406
407    // max_stack, n_locals
408    buf.extend_from_slice(&chunk.max_stack.to_le_bytes());
409    buf.extend_from_slice(&chunk.n_locals.to_le_bytes());
410
411    // exception table
412    let n_exceptions = checked_u16(chunk.exception_table.len(), "exception table size")?;
413    buf.extend_from_slice(&n_exceptions.to_le_bytes());
414    for entry in &chunk.exception_table {
415        buf.extend_from_slice(&entry.try_start.to_le_bytes());
416        buf.extend_from_slice(&entry.try_end.to_le_bytes());
417        buf.extend_from_slice(&entry.handler_pc.to_le_bytes());
418        buf.extend_from_slice(&entry.stack_depth.to_le_bytes());
419        buf.extend_from_slice(&entry.catch_slot.to_le_bytes());
420    }
421
422    Ok(())
423}
424
425pub fn deserialize_chunk(
426    buf: &[u8],
427    cursor: &mut usize,
428    table: &[String],
429    remap: &[Spur],
430) -> Result<Chunk, SemaError> {
431    // code — remap string table indices back to process-local Spurs
432    let code_len = read_u32_le(buf, cursor)? as usize;
433    let remaining = buf.len().saturating_sub(*cursor);
434    if code_len > remaining {
435        return Err(SemaError::eval(format!(
436            "bytecode code_len ({code_len}) exceeds remaining data ({remaining})"
437        )));
438    }
439    let mut code = read_bytes(buf, cursor, code_len)?;
440    remap_indices_to_spurs(&mut code, remap)?;
441
442    // constants
443    let n_consts = read_u16_le(buf, cursor)? as usize;
444    let mut consts = Vec::with_capacity(n_consts);
445    for _ in 0..n_consts {
446        consts.push(deserialize_value(buf, cursor, table, remap)?);
447    }
448
449    // spans (each span = 20 bytes: u32 pc + u32 line + u32 col + u32 end_line + u32 end_col)
450    let n_spans = read_u32_le(buf, cursor)? as usize;
451    let span_remaining = buf.len().saturating_sub(*cursor);
452    if n_spans
453        .checked_mul(20)
454        .is_none_or(|need| need > span_remaining)
455    {
456        return Err(SemaError::eval(format!(
457            "span count ({n_spans}) exceeds remaining data ({span_remaining} bytes)"
458        )));
459    }
460    let mut spans = Vec::with_capacity(n_spans);
461    for _ in 0..n_spans {
462        let pc = read_u32_le(buf, cursor)?;
463        let line = read_u32_le(buf, cursor)? as usize;
464        let col = read_u32_le(buf, cursor)? as usize;
465        let end_line = read_u32_le(buf, cursor)? as usize;
466        let end_col = read_u32_le(buf, cursor)? as usize;
467        spans.push((pc, Span::new(line, col, end_line, end_col)));
468    }
469
470    // max_stack, n_locals
471    let max_stack = read_u16_le(buf, cursor)?;
472    let n_locals = read_u16_le(buf, cursor)?;
473
474    // exception table
475    let n_exceptions = read_u16_le(buf, cursor)? as usize;
476    let mut exception_table = Vec::with_capacity(n_exceptions);
477    for _ in 0..n_exceptions {
478        let try_start = read_u32_le(buf, cursor)?;
479        let try_end = read_u32_le(buf, cursor)?;
480        let handler_pc = read_u32_le(buf, cursor)?;
481        let stack_depth = read_u16_le(buf, cursor)?;
482        let catch_slot = read_u16_le(buf, cursor)?;
483        exception_table.push(ExceptionEntry {
484            try_start,
485            try_end,
486            handler_pc,
487            stack_depth,
488            catch_slot,
489        });
490    }
491
492    Ok(Chunk {
493        code,
494        consts,
495        spans,
496        max_stack,
497        n_locals,
498        exception_table,
499    })
500}
501
502// ── Function serialization ────────────────────────────────────────
503
504const ANONYMOUS_NAME: u32 = 0xFFFF_FFFF;
505
506pub fn serialize_function(
507    func: &Function,
508    buf: &mut Vec<u8>,
509    stb: &mut StringTableBuilder,
510) -> Result<(), SemaError> {
511    // name: u32 string table index (0xFFFFFFFF = anonymous)
512    match func.name {
513        Some(spur) => {
514            let idx = stb.intern_spur(spur);
515            buf.extend_from_slice(&idx.to_le_bytes());
516        }
517        None => buf.extend_from_slice(&ANONYMOUS_NAME.to_le_bytes()),
518    }
519
520    // arity: u16
521    buf.extend_from_slice(&func.arity.to_le_bytes());
522
523    // has_rest: u8
524    buf.push(if func.has_rest { 1 } else { 0 });
525
526    // upvalue descriptors
527    let n_upvalues = checked_u16(func.upvalue_descs.len(), "upvalue descriptor count")?;
528    buf.extend_from_slice(&n_upvalues.to_le_bytes());
529    for desc in &func.upvalue_descs {
530        match desc {
531            UpvalueDesc::ParentLocal(idx) => {
532                buf.push(0);
533                buf.extend_from_slice(&idx.to_le_bytes());
534            }
535            UpvalueDesc::ParentUpvalue(idx) => {
536                buf.push(1);
537                buf.extend_from_slice(&idx.to_le_bytes());
538            }
539        }
540    }
541
542    // chunk
543    serialize_chunk(&func.chunk, buf, stb)?;
544
545    // local_names: Vec<(u16, Spur)>
546    let n_local_names = checked_u16(func.local_names.len(), "local name count")?;
547    buf.extend_from_slice(&n_local_names.to_le_bytes());
548    for &(slot, spur) in &func.local_names {
549        buf.extend_from_slice(&slot.to_le_bytes());
550        let idx = stb.intern_spur(spur);
551        buf.extend_from_slice(&idx.to_le_bytes());
552    }
553
554    Ok(())
555}
556
557pub fn deserialize_function(
558    buf: &[u8],
559    cursor: &mut usize,
560    table: &[String],
561    remap: &[Spur],
562) -> Result<Function, SemaError> {
563    // name
564    let name_idx = read_u32_le(buf, cursor)?;
565    let name = if name_idx == ANONYMOUS_NAME {
566        None
567    } else {
568        let idx = name_idx as usize;
569        if idx >= remap.len() {
570            return Err(SemaError::eval(format!(
571                "function name string table index {idx} out of range"
572            )));
573        }
574        Some(remap[idx])
575    };
576
577    // arity
578    let arity = read_u16_le(buf, cursor)?;
579
580    // has_rest
581    let has_rest_byte = read_u8(buf, cursor)?;
582    let has_rest = match has_rest_byte {
583        0 => false,
584        1 => true,
585        _ => {
586            return Err(SemaError::eval(format!(
587                "invalid has_rest byte: 0x{has_rest_byte:02x}"
588            )));
589        }
590    };
591
592    // upvalue descriptors
593    let n_upvalues = read_u16_le(buf, cursor)? as usize;
594    let mut upvalue_descs = Vec::with_capacity(n_upvalues);
595    for _ in 0..n_upvalues {
596        let kind = read_u8(buf, cursor)?;
597        let index = read_u16_le(buf, cursor)?;
598        match kind {
599            0 => upvalue_descs.push(UpvalueDesc::ParentLocal(index)),
600            1 => upvalue_descs.push(UpvalueDesc::ParentUpvalue(index)),
601            _ => {
602                return Err(SemaError::eval(format!(
603                    "invalid upvalue kind: 0x{kind:02x}"
604                )));
605            }
606        }
607    }
608
609    // chunk
610    let chunk = deserialize_chunk(buf, cursor, table, remap)?;
611
612    // local_names
613    let n_local_names = read_u16_le(buf, cursor)? as usize;
614    let mut local_names = Vec::with_capacity(n_local_names);
615    for _ in 0..n_local_names {
616        let slot = read_u16_le(buf, cursor)?;
617        let name_idx = read_u32_le(buf, cursor)? as usize;
618        if name_idx >= remap.len() {
619            return Err(SemaError::eval(format!(
620                "local name string table index {name_idx} out of range"
621            )));
622        }
623        local_names.push((slot, remap[name_idx]));
624    }
625
626    Ok(Function {
627        name,
628        chunk,
629        upvalue_descs,
630        arity,
631        has_rest,
632        local_names,
633    })
634}
635
636// ── Spur remapping in bytecode ────────────────────────────────────
637
638fn spur_to_u32(spur: Spur) -> u32 {
639    spur.into_inner().get()
640}
641
642fn u32_to_spur(bits: u32) -> Spur {
643    use lasso::Key;
644    let idx = bits
645        .checked_sub(1)
646        .expect("invalid Spur bits: 0 is not valid");
647    Spur::try_from_usize(idx as usize).expect("invalid Spur bits")
648}
649
650/// Compute the next PC after the instruction at `code[pc]`, validating operand bounds.
651fn advance_pc(code: &[u8], pc: usize) -> Result<(Op, usize), SemaError> {
652    let Some(op) = Op::from_u8(code[pc]) else {
653        return Err(SemaError::eval(format!(
654            "invalid opcode 0x{:02x} at pc {pc}",
655            code[pc]
656        )));
657    };
658    let next = match op {
659        Op::LoadGlobal | Op::StoreGlobal | Op::DefineGlobal => pc + 5, // op + u32
660        Op::CallGlobal => pc + 7,                                      // op + u32 + u16
661        Op::Jump | Op::JumpIfFalse | Op::JumpIfTrue => pc + 5,         // op + i32
662        Op::CallNative => pc + 5,                                      // op + u16 + u16
663        Op::MakeClosure => {
664            if pc + 5 > code.len() {
665                return Err(SemaError::eval(format!(
666                    "truncated MakeClosure operands at pc {pc}"
667                )));
668            }
669            let n_upvalues = u16::from_le_bytes([code[pc + 3], code[pc + 4]]) as usize;
670            pc + 5 + n_upvalues * 4
671        }
672        Op::Const
673        | Op::LoadLocal
674        | Op::StoreLocal
675        | Op::LoadUpvalue
676        | Op::StoreUpvalue
677        | Op::Call
678        | Op::TailCall
679        | Op::MakeList
680        | Op::MakeVector
681        | Op::MakeMap
682        | Op::MakeHashMap => pc + 3, // op + u16
683        _ => pc + 1, // single-byte
684    };
685    if next > code.len() {
686        return Err(SemaError::eval(format!(
687            "truncated operand for {:?} at pc {pc} (need {} bytes, have {})",
688            op,
689            next - pc,
690            code.len() - pc
691        )));
692    }
693    Ok((op, next))
694}
695
696/// Walk bytecode and rewrite global opcodes: Spur u32 → string table index.
697/// Returns the rewritten code.
698pub fn remap_spurs_to_indices(
699    code: &[u8],
700    stb: &mut StringTableBuilder,
701) -> Result<Vec<u8>, SemaError> {
702    let mut out = code.to_vec();
703    let mut pc = 0;
704    while pc < out.len() {
705        let (op, next) = advance_pc(&out, pc)?;
706        if matches!(
707            op,
708            Op::LoadGlobal | Op::StoreGlobal | Op::DefineGlobal | Op::CallGlobal
709        ) {
710            let spur_bits =
711                u32::from_le_bytes([out[pc + 1], out[pc + 2], out[pc + 3], out[pc + 4]]);
712            let spur = u32_to_spur(spur_bits);
713            let s = resolve(spur);
714            let idx = stb.intern_str(&s);
715            let bytes = idx.to_le_bytes();
716            out[pc + 1] = bytes[0];
717            out[pc + 2] = bytes[1];
718            out[pc + 3] = bytes[2];
719            out[pc + 4] = bytes[3];
720        }
721        pc = next;
722    }
723    Ok(out)
724}
725
726/// Walk bytecode and rewrite global opcodes: string table index → process-local Spur u32.
727pub fn remap_indices_to_spurs(code: &mut [u8], remap: &[Spur]) -> Result<(), SemaError> {
728    let mut pc = 0;
729    while pc < code.len() {
730        let (op, next) = advance_pc(code, pc)?;
731        if matches!(
732            op,
733            Op::LoadGlobal | Op::StoreGlobal | Op::DefineGlobal | Op::CallGlobal
734        ) {
735            let idx = u32::from_le_bytes([code[pc + 1], code[pc + 2], code[pc + 3], code[pc + 4]])
736                as usize;
737            if idx >= remap.len() {
738                return Err(SemaError::eval(format!(
739                    "global spur remap index {idx} out of range at pc {pc}"
740                )));
741            }
742            let spur_bits = spur_to_u32(remap[idx]);
743            let bytes = spur_bits.to_le_bytes();
744            code[pc + 1] = bytes[0];
745            code[pc + 2] = bytes[1];
746            code[pc + 3] = bytes[2];
747            code[pc + 4] = bytes[3];
748        }
749        pc = next;
750    }
751    Ok(())
752}
753
754// ── File format constants ─────────────────────────────────────────
755
756const MAGIC: [u8; 4] = [0x00, b'S', b'E', b'M'];
757const FORMAT_VERSION: u16 = 1;
758const SECTION_STRING_TABLE: u16 = 0x01;
759const SECTION_FUNCTION_TABLE: u16 = 0x02;
760const SECTION_MAIN_CHUNK: u16 = 0x03;
761
762// ── Full file serialization ───────────────────────────────────────
763
764/// Serialize a CompileResult to the .semac binary format.
765pub fn serialize_to_bytes(result: &CompileResult, source_hash: u32) -> Result<Vec<u8>, SemaError> {
766    let mut stb = StringTableBuilder::new();
767
768    // Pre-serialize sections to get their bytes
769    // We need to serialize functions and main chunk first to populate the string table,
770    // then serialize the string table.
771
772    // Function table section payload
773    let mut func_payload = Vec::new();
774    let n_funcs = checked_u32(result.functions.len(), "function count")?;
775    func_payload.extend_from_slice(&n_funcs.to_le_bytes());
776    for func in &result.functions {
777        serialize_function(func, &mut func_payload, &mut stb)?;
778    }
779
780    // Main chunk section payload
781    let mut chunk_payload = Vec::new();
782    serialize_chunk(&result.chunk, &mut chunk_payload, &mut stb)?;
783
784    // Now build the string table section payload
785    let string_table = stb.finish();
786    let mut strtab_payload = Vec::new();
787    let n_strings = checked_u32(string_table.len(), "string table size")?;
788    strtab_payload.extend_from_slice(&n_strings.to_le_bytes());
789    for s in &string_table {
790        let bytes = s.as_bytes();
791        let len = checked_u32(bytes.len(), "string length")?;
792        strtab_payload.extend_from_slice(&len.to_le_bytes());
793        strtab_payload.extend_from_slice(bytes);
794    }
795
796    // Assemble the file
797    let n_sections: u16 = 3; // string table + function table + main chunk
798    let mut out = Vec::new();
799
800    // Header (24 bytes)
801    out.extend_from_slice(&MAGIC);
802    out.extend_from_slice(&FORMAT_VERSION.to_le_bytes());
803    out.extend_from_slice(&0u16.to_le_bytes()); // flags
804                                                // Sema version — parse from Cargo.toml version at compile time
805    let (major, minor, patch) = parse_sema_version();
806    out.extend_from_slice(&major.to_le_bytes());
807    out.extend_from_slice(&minor.to_le_bytes());
808    out.extend_from_slice(&patch.to_le_bytes());
809    out.extend_from_slice(&n_sections.to_le_bytes());
810    out.extend_from_slice(&source_hash.to_le_bytes());
811    out.extend_from_slice(&0u32.to_le_bytes()); // reserved
812
813    // Section: String Table
814    write_section(&mut out, SECTION_STRING_TABLE, &strtab_payload)?;
815    // Section: Function Table
816    write_section(&mut out, SECTION_FUNCTION_TABLE, &func_payload)?;
817    // Section: Main Chunk
818    write_section(&mut out, SECTION_MAIN_CHUNK, &chunk_payload)?;
819
820    Ok(out)
821}
822
823fn write_section(out: &mut Vec<u8>, section_type: u16, payload: &[u8]) -> Result<(), SemaError> {
824    let len = checked_u32(payload.len(), "section payload length")?;
825    out.extend_from_slice(&section_type.to_le_bytes());
826    out.extend_from_slice(&len.to_le_bytes());
827    out.extend_from_slice(payload);
828    Ok(())
829}
830
831fn parse_sema_version() -> (u16, u16, u16) {
832    let version = env!("CARGO_PKG_VERSION");
833    let parts: Vec<&str> = version.split('.').collect();
834    let major = parts.first().and_then(|s| s.parse().ok()).unwrap_or(0);
835    let minor = parts.get(1).and_then(|s| s.parse().ok()).unwrap_or(0);
836    let patch = parts.get(2).and_then(|s| s.parse().ok()).unwrap_or(0);
837    (major, minor, patch)
838}
839
840/// Validate bytecode operand bounds after deserialization.
841fn validate_bytecode(result: &CompileResult) -> Result<(), SemaError> {
842    validate_chunk_bytecode(&result.chunk, result.functions.len(), 0, "main chunk")?;
843    for (i, func) in result.functions.iter().enumerate() {
844        let label = format!("function {i}");
845        let n_upvalues = func.upvalue_descs.len();
846        validate_chunk_bytecode(&func.chunk, result.functions.len(), n_upvalues, &label)?;
847    }
848    Ok(())
849}
850
851fn validate_chunk_bytecode(
852    chunk: &Chunk,
853    n_functions: usize,
854    n_upvalues: usize,
855    label: &str,
856) -> Result<(), SemaError> {
857    let code = &chunk.code;
858    let n_locals = chunk.n_locals as usize;
859    let mut pc = 0;
860    while pc < code.len() {
861        let (op, next) = advance_pc(code, pc)?;
862        match op {
863            Op::Const => {
864                let idx = u16::from_le_bytes([code[pc + 1], code[pc + 2]]) as usize;
865                if idx >= chunk.consts.len() {
866                    return Err(SemaError::eval(format!(
867                        "in {label}: Const index {idx} out of range (pool has {} entries) at pc {pc}",
868                        chunk.consts.len()
869                    )));
870                }
871            }
872            Op::MakeClosure => {
873                let func_id = u16::from_le_bytes([code[pc + 1], code[pc + 2]]) as usize;
874                if func_id >= n_functions {
875                    return Err(SemaError::eval(format!(
876                        "in {label}: MakeClosure func_id {func_id} out of range ({n_functions} functions) at pc {pc}",
877                    )));
878                }
879            }
880            Op::LoadLocal | Op::StoreLocal => {
881                let slot = u16::from_le_bytes([code[pc + 1], code[pc + 2]]) as usize;
882                if slot >= n_locals {
883                    return Err(SemaError::eval(format!(
884                        "in {label}: local slot {slot} out of range (n_locals={n_locals}) at pc {pc}",
885                    )));
886                }
887            }
888            Op::LoadUpvalue | Op::StoreUpvalue => {
889                let slot = u16::from_le_bytes([code[pc + 1], code[pc + 2]]) as usize;
890                if slot >= n_upvalues {
891                    return Err(SemaError::eval(format!(
892                        "in {label}: upvalue slot {slot} out of range (n_upvalues={n_upvalues}) at pc {pc}",
893                    )));
894                }
895            }
896            _ => {}
897        }
898        pc = next;
899    }
900    Ok(())
901}
902
903/// Deserialize a .semac file from bytes into a CompileResult.
904pub fn deserialize_from_bytes(bytes: &[u8]) -> Result<CompileResult, SemaError> {
905    if bytes.len() < 24 {
906        return Err(SemaError::eval(
907            "bytecode file too short (< 24 bytes header)",
908        ));
909    }
910
911    // Validate header
912    if bytes[0..4] != MAGIC {
913        return Err(SemaError::eval(
914            "invalid bytecode magic number (expected \\x00SEM)",
915        ));
916    }
917    let format_version = u16::from_le_bytes([bytes[4], bytes[5]]);
918    if format_version != FORMAT_VERSION {
919        return Err(SemaError::eval(format!(
920            "unsupported bytecode format version {format_version} (expected {FORMAT_VERSION}). Recompile from source."
921        )));
922    }
923    let reserved = u32::from_le_bytes([bytes[20], bytes[21], bytes[22], bytes[23]]);
924    if reserved != 0 {
925        return Err(SemaError::eval(format!(
926            "non-zero reserved header field (0x{reserved:08x}); file may be from a newer Sema version"
927        )));
928    }
929    let n_sections = u16::from_le_bytes([bytes[14], bytes[15]]) as usize;
930
931    // Read sections
932    let mut cursor = 24;
933    let mut string_table: Option<Vec<String>> = None;
934    let mut func_table_data: Option<(usize, usize)> = None; // (start, len) in bytes
935    let mut main_chunk_data: Option<(usize, usize)> = None;
936
937    for _ in 0..n_sections {
938        if cursor + 6 > bytes.len() {
939            return Err(SemaError::eval(
940                "unexpected end of bytecode file in section header",
941            ));
942        }
943        let section_type = u16::from_le_bytes([bytes[cursor], bytes[cursor + 1]]);
944        let section_len = u32::from_le_bytes([
945            bytes[cursor + 2],
946            bytes[cursor + 3],
947            bytes[cursor + 4],
948            bytes[cursor + 5],
949        ]) as usize;
950        cursor += 6;
951
952        if cursor + section_len > bytes.len() {
953            return Err(SemaError::eval(format!(
954                "section 0x{section_type:04x} claims {section_len} bytes but only {} remain",
955                bytes.len() - cursor
956            )));
957        }
958
959        match section_type {
960            0x01 => {
961                // String Table — slice to section boundary
962                let section_data = &bytes[cursor..cursor + section_len];
963                let mut sc = 0usize;
964                let count = read_u32_le(section_data, &mut sc)? as usize;
965                // Each string needs at least 4 bytes for its length prefix;
966                // use remaining bytes after reading count
967                let remaining_after_count = section_len.saturating_sub(sc);
968                if count > remaining_after_count / 4 {
969                    return Err(SemaError::eval(format!(
970                        "string table count ({count}) exceeds section capacity"
971                    )));
972                }
973                let mut table = Vec::with_capacity(count);
974                for _ in 0..count {
975                    let len = read_u32_le(section_data, &mut sc)? as usize;
976                    if sc + len > section_len {
977                        return Err(SemaError::eval("string table entry extends past section"));
978                    }
979                    let s = std::str::from_utf8(&section_data[sc..sc + len]).map_err(|e| {
980                        SemaError::eval(format!("invalid UTF-8 in string table: {e}"))
981                    })?;
982                    table.push(s.to_string());
983                    sc += len;
984                }
985                string_table = Some(table);
986            }
987            0x02 => {
988                func_table_data = Some((cursor, section_len));
989            }
990            0x03 => {
991                main_chunk_data = Some((cursor, section_len));
992            }
993            _ => {
994                // Unknown section — skip for forward compatibility
995            }
996        }
997        cursor += section_len;
998    }
999
1000    // Validate required sections
1001    let table = string_table
1002        .ok_or_else(|| SemaError::eval("bytecode file missing string table section"))?;
1003    if table.is_empty() || !table[0].is_empty() {
1004        return Err(SemaError::eval(
1005            "string table index 0 must be the empty string",
1006        ));
1007    }
1008    let (func_start, func_len) = func_table_data
1009        .ok_or_else(|| SemaError::eval("bytecode file missing function table section"))?;
1010    let (chunk_start, chunk_len) = main_chunk_data
1011        .ok_or_else(|| SemaError::eval("bytecode file missing main chunk section"))?;
1012
1013    let remap = build_remap_table(&table);
1014
1015    // Deserialize function table (sliced to section boundary)
1016    let func_section = &bytes[func_start..func_start + func_len];
1017    let mut fc = 0;
1018    let n_funcs = read_u32_le(func_section, &mut fc)? as usize;
1019    // Each function needs at least several bytes; use 4 as minimum
1020    if n_funcs > func_len / 4 {
1021        return Err(SemaError::eval(format!(
1022            "function count ({n_funcs}) exceeds section capacity"
1023        )));
1024    }
1025    let mut functions = Vec::with_capacity(n_funcs);
1026    for _ in 0..n_funcs {
1027        functions.push(deserialize_function(func_section, &mut fc, &table, &remap)?);
1028    }
1029    if fc != func_len {
1030        return Err(SemaError::eval(format!(
1031            "function table section has {} unconsumed trailing bytes",
1032            func_len - fc
1033        )));
1034    }
1035
1036    // Deserialize main chunk (sliced to section boundary)
1037    let chunk_section = &bytes[chunk_start..chunk_start + chunk_len];
1038    let mut cc = 0;
1039    let chunk = deserialize_chunk(chunk_section, &mut cc, &table, &remap)?;
1040    if cc != chunk_len {
1041        return Err(SemaError::eval(format!(
1042            "main chunk section has {} unconsumed trailing bytes",
1043            chunk_len - cc
1044        )));
1045    }
1046
1047    let result = CompileResult { chunk, functions };
1048    validate_bytecode(&result)?;
1049    Ok(result)
1050}
1051
1052/// Check if a byte buffer starts with the .semac magic number.
1053pub fn is_bytecode_file(bytes: &[u8]) -> bool {
1054    bytes.len() >= 4 && bytes[0..4] == MAGIC
1055}
1056
1057#[cfg(test)]
1058mod tests {
1059    use super::*;
1060    use sema_core::intern;
1061
1062    #[test]
1063    fn test_string_table_builder() {
1064        let mut builder = StringTableBuilder::new();
1065        // Index 0 is always ""
1066        assert_eq!(builder.intern_str(""), 0);
1067        let idx_hello = builder.intern_str("hello");
1068        let idx_world = builder.intern_str("world");
1069        let idx_hello2 = builder.intern_str("hello");
1070        assert_eq!(idx_hello, idx_hello2); // deduplication
1071        assert_ne!(idx_hello, idx_world);
1072
1073        let table = builder.finish();
1074        assert_eq!(table.len(), 3); // "", "hello", "world"
1075        assert_eq!(table[0], "");
1076        assert_eq!(table[idx_hello as usize], "hello");
1077        assert_eq!(table[idx_world as usize], "world");
1078    }
1079
1080    #[test]
1081    fn test_string_table_spur_interning() {
1082        let mut builder = StringTableBuilder::new();
1083        let spur = intern("my-var");
1084        let idx = builder.intern_spur(spur);
1085        assert!(idx > 0);
1086        let idx2 = builder.intern_spur(spur);
1087        assert_eq!(idx, idx2);
1088    }
1089
1090    #[test]
1091    fn test_chunk_roundtrip() {
1092        use crate::emit::Emitter;
1093        use crate::opcodes::Op;
1094
1095        let mut e = Emitter::new();
1096        e.emit_const(Value::int(42));
1097        e.emit_const(Value::string("hello"));
1098        e.emit_op(Op::Add);
1099        e.emit_op(Op::Return);
1100        let mut chunk = e.into_chunk();
1101        chunk.n_locals = 2;
1102        chunk.max_stack = 4;
1103
1104        let mut buf = Vec::new();
1105        let mut stb = StringTableBuilder::new();
1106        serialize_chunk(&chunk, &mut buf, &mut stb).unwrap();
1107
1108        let table = stb.finish();
1109        let remap = build_remap_table(&table);
1110        let mut cursor = 0;
1111        let chunk2 = deserialize_chunk(&buf, &mut cursor, &table, &remap).unwrap();
1112
1113        assert_eq!(chunk2.code, chunk.code);
1114        assert_eq!(chunk2.consts.len(), chunk.consts.len());
1115        assert_eq!(chunk2.n_locals, 2);
1116        assert_eq!(chunk2.max_stack, 4);
1117    }
1118
1119    // ── Float edge cases ────────────────────────────────────────
1120
1121    #[test]
1122    fn test_serialize_float_nan() {
1123        let mut buf = Vec::new();
1124        let mut stb = StringTableBuilder::new();
1125        serialize_value(&Value::float(f64::NAN), &mut buf, &mut stb).unwrap();
1126
1127        let table = stb.finish();
1128        let remap = build_remap_table(&table);
1129        let mut cursor = 0;
1130        let v = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1131        assert!(v.as_float().unwrap().is_nan());
1132    }
1133
1134    #[test]
1135    fn test_serialize_float_neg_zero() {
1136        let mut buf = Vec::new();
1137        let mut stb = StringTableBuilder::new();
1138        let neg_zero = Value::float(-0.0);
1139        serialize_value(&neg_zero, &mut buf, &mut stb).unwrap();
1140
1141        let table = stb.finish();
1142        let remap = build_remap_table(&table);
1143        let mut cursor = 0;
1144        let v = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1145        let f = v.as_float().unwrap();
1146        assert!(f.is_sign_negative());
1147        assert_eq!(f.to_bits(), (-0.0f64).to_bits());
1148    }
1149
1150    #[test]
1151    fn test_serialize_float_infinities() {
1152        let mut buf = Vec::new();
1153        let mut stb = StringTableBuilder::new();
1154        serialize_value(&Value::float(f64::INFINITY), &mut buf, &mut stb).unwrap();
1155        serialize_value(&Value::float(f64::NEG_INFINITY), &mut buf, &mut stb).unwrap();
1156
1157        let table = stb.finish();
1158        let remap = build_remap_table(&table);
1159        let mut cursor = 0;
1160        let v1 = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1161        assert_eq!(v1.as_float(), Some(f64::INFINITY));
1162        let v2 = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1163        assert_eq!(v2.as_float(), Some(f64::NEG_INFINITY));
1164    }
1165
1166    // ── Int edge cases ───────────────────────────────────────────
1167
1168    #[test]
1169    fn test_serialize_int_extremes() {
1170        let mut buf = Vec::new();
1171        let mut stb = StringTableBuilder::new();
1172        serialize_value(&Value::int(i64::MIN), &mut buf, &mut stb).unwrap();
1173        serialize_value(&Value::int(i64::MAX), &mut buf, &mut stb).unwrap();
1174        serialize_value(&Value::int(0), &mut buf, &mut stb).unwrap();
1175        serialize_value(&Value::int(-1), &mut buf, &mut stb).unwrap();
1176
1177        let table = stb.finish();
1178        let remap = build_remap_table(&table);
1179        let mut cursor = 0;
1180        assert_eq!(
1181            deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1182            Value::int(i64::MIN)
1183        );
1184        assert_eq!(
1185            deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1186            Value::int(i64::MAX)
1187        );
1188        assert_eq!(
1189            deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1190            Value::int(0)
1191        );
1192        assert_eq!(
1193            deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1194            Value::int(-1)
1195        );
1196    }
1197
1198    // ── Empty collections ────────────────────────────────────────
1199
1200    #[test]
1201    fn test_serialize_empty_collections() {
1202        let mut buf = Vec::new();
1203        let mut stb = StringTableBuilder::new();
1204
1205        serialize_value(&Value::list(vec![]), &mut buf, &mut stb).unwrap();
1206        serialize_value(&Value::vector(vec![]), &mut buf, &mut stb).unwrap();
1207        serialize_value(
1208            &Value::map(std::collections::BTreeMap::new()),
1209            &mut buf,
1210            &mut stb,
1211        )
1212        .unwrap();
1213        serialize_value(&Value::hashmap(vec![]), &mut buf, &mut stb).unwrap();
1214        serialize_value(&Value::bytevector(vec![]), &mut buf, &mut stb).unwrap();
1215
1216        let table = stb.finish();
1217        let remap = build_remap_table(&table);
1218        let mut cursor = 0;
1219
1220        let l = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1221        assert_eq!(l.as_list().unwrap().len(), 0);
1222        let v = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1223        assert_eq!(v.as_vector().unwrap().len(), 0);
1224        let m = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1225        assert_eq!(m.as_map_rc().unwrap().len(), 0);
1226        let hm = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1227        assert_eq!(hm.as_hashmap_rc().unwrap().len(), 0);
1228        let bv = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1229        assert_eq!(bv.as_bytevector().unwrap().len(), 0);
1230    }
1231
1232    // ── Nested collections ───────────────────────────────────────
1233
1234    #[test]
1235    fn test_serialize_nested_collections() {
1236        let mut buf = Vec::new();
1237        let mut stb = StringTableBuilder::new();
1238
1239        // vector of lists
1240        let nested = Value::vector(vec![
1241            Value::list(vec![Value::int(1), Value::int(2)]),
1242            Value::list(vec![Value::string("a"), Value::symbol("b")]),
1243        ]);
1244        serialize_value(&nested, &mut buf, &mut stb).unwrap();
1245
1246        let table = stb.finish();
1247        let remap = build_remap_table(&table);
1248        let mut cursor = 0;
1249        let v = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1250        assert_eq!(v, nested);
1251    }
1252
1253    // ── Char roundtrip ───────────────────────────────────────────
1254
1255    #[test]
1256    fn test_serialize_char() {
1257        let mut buf = Vec::new();
1258        let mut stb = StringTableBuilder::new();
1259        serialize_value(&Value::char('A'), &mut buf, &mut stb).unwrap();
1260        serialize_value(&Value::char('🦀'), &mut buf, &mut stb).unwrap();
1261
1262        let table = stb.finish();
1263        let remap = build_remap_table(&table);
1264        let mut cursor = 0;
1265        assert_eq!(
1266            deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1267            Value::char('A')
1268        );
1269        assert_eq!(
1270            deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1271            Value::char('🦀')
1272        );
1273    }
1274
1275    // ── Bytevector roundtrip ─────────────────────────────────────
1276
1277    #[test]
1278    fn test_serialize_bytevector() {
1279        let mut buf = Vec::new();
1280        let mut stb = StringTableBuilder::new();
1281        let data = vec![0u8, 1, 2, 255, 128, 64];
1282        serialize_value(&Value::bytevector(data.clone()), &mut buf, &mut stb).unwrap();
1283
1284        let table = stb.finish();
1285        let remap = build_remap_table(&table);
1286        let mut cursor = 0;
1287        let v = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1288        assert_eq!(v.as_bytevector().unwrap(), &data);
1289    }
1290
1291    // ── Invalid data deserialization ─────────────────────────────
1292
1293    #[test]
1294    fn test_deserialize_invalid_bool() {
1295        let buf = vec![VAL_BOOL, 0x02]; // invalid: not 0 or 1
1296        let table: Vec<String> = vec![];
1297        let remap: Vec<Spur> = vec![];
1298        let mut cursor = 0;
1299        let result = deserialize_value(&buf, &mut cursor, &table, &remap);
1300        assert!(result.is_err());
1301    }
1302
1303    #[test]
1304    fn test_deserialize_invalid_char() {
1305        // 0xD800 is a surrogate — not a valid Unicode scalar value
1306        let mut buf = vec![VAL_CHAR];
1307        buf.extend_from_slice(&0xD800u32.to_le_bytes());
1308        let table: Vec<String> = vec![];
1309        let remap: Vec<Spur> = vec![];
1310        let mut cursor = 0;
1311        let result = deserialize_value(&buf, &mut cursor, &table, &remap);
1312        assert!(result.is_err());
1313    }
1314
1315    #[test]
1316    fn test_deserialize_unknown_tag() {
1317        let buf = vec![0xFF];
1318        let table: Vec<String> = vec![];
1319        let remap: Vec<Spur> = vec![];
1320        let mut cursor = 0;
1321        let result = deserialize_value(&buf, &mut cursor, &table, &remap);
1322        assert!(result.is_err());
1323    }
1324
1325    #[test]
1326    fn test_deserialize_truncated_data() {
1327        // Int tag but only 3 bytes of payload instead of 8
1328        let buf = vec![VAL_INT, 0x01, 0x02, 0x03];
1329        let table: Vec<String> = vec![];
1330        let remap: Vec<Spur> = vec![];
1331        let mut cursor = 0;
1332        let result = deserialize_value(&buf, &mut cursor, &table, &remap);
1333        assert!(result.is_err());
1334    }
1335
1336    #[test]
1337    fn test_deserialize_string_index_out_of_range() {
1338        let mut buf = vec![VAL_STRING];
1339        buf.extend_from_slice(&99u32.to_le_bytes()); // index 99, but table is smaller
1340        let table = vec!["".to_string()];
1341        let remap = build_remap_table(&table);
1342        let mut cursor = 0;
1343        let result = deserialize_value(&buf, &mut cursor, &table, &remap);
1344        assert!(result.is_err());
1345    }
1346
1347    // ── Runtime-only types rejected ──────────────────────────────
1348
1349    #[test]
1350    fn test_serialize_runtime_only_type_rejected() {
1351        use sema_core::{Env, Lambda};
1352        let lambda = Value::lambda(Lambda {
1353            params: vec![],
1354            rest_param: None,
1355            body: vec![],
1356            env: Env::new(),
1357            name: None,
1358        });
1359        let mut buf = Vec::new();
1360        let mut stb = StringTableBuilder::new();
1361        let result = serialize_value(&lambda, &mut buf, &mut stb);
1362        assert!(result.is_err());
1363    }
1364
1365    // ── Chunk edge cases ─────────────────────────────────────────
1366
1367    #[test]
1368    fn test_chunk_roundtrip_with_exceptions() {
1369        use crate::chunk::ExceptionEntry;
1370        use crate::emit::Emitter;
1371        use crate::opcodes::Op;
1372
1373        let mut e = Emitter::new();
1374        e.emit_op(Op::Nil);
1375        e.emit_op(Op::Return);
1376        let mut chunk = e.into_chunk();
1377        chunk.exception_table = vec![
1378            ExceptionEntry {
1379                try_start: 0,
1380                try_end: 10,
1381                handler_pc: 20,
1382                stack_depth: 3,
1383                catch_slot: 5,
1384            },
1385            ExceptionEntry {
1386                try_start: 100,
1387                try_end: 200,
1388                handler_pc: 300,
1389                stack_depth: 0,
1390                catch_slot: 7,
1391            },
1392        ];
1393
1394        let mut buf = Vec::new();
1395        let mut stb = StringTableBuilder::new();
1396        serialize_chunk(&chunk, &mut buf, &mut stb).unwrap();
1397
1398        let table = stb.finish();
1399        let remap = build_remap_table(&table);
1400        let mut cursor = 0;
1401        let chunk2 = deserialize_chunk(&buf, &mut cursor, &table, &remap).unwrap();
1402
1403        assert_eq!(chunk2.exception_table.len(), 2);
1404        assert_eq!(chunk2.exception_table[0].try_start, 0);
1405        assert_eq!(chunk2.exception_table[0].try_end, 10);
1406        assert_eq!(chunk2.exception_table[0].handler_pc, 20);
1407        assert_eq!(chunk2.exception_table[0].stack_depth, 3);
1408        assert_eq!(chunk2.exception_table[0].catch_slot, 5);
1409        assert_eq!(chunk2.exception_table[1].try_start, 100);
1410        assert_eq!(chunk2.exception_table[1].handler_pc, 300);
1411    }
1412
1413    #[test]
1414    fn test_chunk_roundtrip_with_spans() {
1415        use crate::emit::Emitter;
1416        use crate::opcodes::Op;
1417
1418        let mut e = Emitter::new();
1419        e.emit_op(Op::Nil);
1420        e.emit_op(Op::Return);
1421        let mut chunk = e.into_chunk();
1422        chunk.spans = vec![(0, Span::point(1, 5)), (1, Span::new(2, 10, 3, 15))];
1423
1424        let mut buf = Vec::new();
1425        let mut stb = StringTableBuilder::new();
1426        serialize_chunk(&chunk, &mut buf, &mut stb).unwrap();
1427
1428        let table = stb.finish();
1429        let remap = build_remap_table(&table);
1430        let mut cursor = 0;
1431        let chunk2 = deserialize_chunk(&buf, &mut cursor, &table, &remap).unwrap();
1432
1433        assert_eq!(chunk2.spans.len(), 2);
1434        assert_eq!(chunk2.spans[0].0, 0);
1435        assert_eq!(chunk2.spans[0].1.line, 1);
1436        assert_eq!(chunk2.spans[0].1.col, 5);
1437        assert_eq!(chunk2.spans[0].1.end_line, 1);
1438        assert_eq!(chunk2.spans[0].1.end_col, 5);
1439        assert_eq!(chunk2.spans[1].0, 1);
1440        assert_eq!(chunk2.spans[1].1.line, 2);
1441        assert_eq!(chunk2.spans[1].1.col, 10);
1442        assert_eq!(chunk2.spans[1].1.end_line, 3);
1443        assert_eq!(chunk2.spans[1].1.end_col, 15);
1444    }
1445
1446    #[test]
1447    fn test_chunk_deserialize_truncated() {
1448        // A chunk with code_len=100 but only a few bytes in the buffer
1449        let mut buf = Vec::new();
1450        buf.extend_from_slice(&100u32.to_le_bytes()); // claims 100 bytes of code
1451        buf.extend_from_slice(&[0u8; 4]); // only 4 bytes, not 100
1452
1453        let table: Vec<String> = vec![];
1454        let remap: Vec<Spur> = vec![];
1455        let mut cursor = 0;
1456        let result = deserialize_chunk(&buf, &mut cursor, &table, &remap);
1457        assert!(result.is_err());
1458    }
1459
1460    // ── Spur remapping ─────────────────────────────────────────
1461
1462    #[test]
1463    fn test_spur_remapping_in_bytecode() {
1464        use crate::emit::Emitter;
1465        use crate::opcodes::Op;
1466
1467        let spur = intern("my-global");
1468        let mut e = Emitter::new();
1469        e.emit_op(Op::LoadGlobal);
1470        e.emit_u32(spur_to_u32(spur));
1471        e.emit_op(Op::Return);
1472        let chunk = e.into_chunk();
1473
1474        let mut buf = Vec::new();
1475        let mut stb = StringTableBuilder::new();
1476        serialize_chunk(&chunk, &mut buf, &mut stb).unwrap();
1477
1478        // Deserialize — the spur in the deserialized bytecode should resolve to "my-global"
1479        let table = stb.finish();
1480        let remap = build_remap_table(&table);
1481        let mut cursor = 0;
1482        let chunk2 = deserialize_chunk(&buf, &mut cursor, &table, &remap).unwrap();
1483
1484        let spur2_bits = u32::from_le_bytes([
1485            chunk2.code[1],
1486            chunk2.code[2],
1487            chunk2.code[3],
1488            chunk2.code[4],
1489        ]);
1490        let spur2 = u32_to_spur(spur2_bits);
1491        assert_eq!(sema_core::resolve(spur2), "my-global");
1492    }
1493
1494    #[test]
1495    fn test_spur_remapping_multiple_globals() {
1496        use crate::emit::Emitter;
1497        use crate::opcodes::Op;
1498
1499        let spur_a = intern("alpha");
1500        let spur_b = intern("beta");
1501        let mut e = Emitter::new();
1502        e.emit_op(Op::LoadGlobal);
1503        e.emit_u32(spur_to_u32(spur_a));
1504        e.emit_op(Op::DefineGlobal);
1505        e.emit_u32(spur_to_u32(spur_b));
1506        e.emit_op(Op::Return);
1507        let chunk = e.into_chunk();
1508
1509        let mut buf = Vec::new();
1510        let mut stb = StringTableBuilder::new();
1511        serialize_chunk(&chunk, &mut buf, &mut stb).unwrap();
1512
1513        let table = stb.finish();
1514        let remap = build_remap_table(&table);
1515        let mut cursor = 0;
1516        let chunk2 = deserialize_chunk(&buf, &mut cursor, &table, &remap).unwrap();
1517
1518        // Check both globals resolved correctly
1519        let bits_a = u32::from_le_bytes([
1520            chunk2.code[1],
1521            chunk2.code[2],
1522            chunk2.code[3],
1523            chunk2.code[4],
1524        ]);
1525        assert_eq!(sema_core::resolve(u32_to_spur(bits_a)), "alpha");
1526
1527        let bits_b = u32::from_le_bytes([
1528            chunk2.code[6],
1529            chunk2.code[7],
1530            chunk2.code[8],
1531            chunk2.code[9],
1532        ]);
1533        assert_eq!(sema_core::resolve(u32_to_spur(bits_b)), "beta");
1534    }
1535
1536    // ── Function serialization ─────────────────────────────────
1537
1538    #[test]
1539    fn test_function_roundtrip() {
1540        use crate::emit::Emitter;
1541        use crate::opcodes::Op;
1542
1543        let mut e = Emitter::new();
1544        e.emit_op(Op::LoadLocal0);
1545        e.emit_op(Op::Return);
1546        let chunk = e.into_chunk();
1547
1548        let func = Function {
1549            name: Some(intern("my-func")),
1550            chunk,
1551            upvalue_descs: vec![UpvalueDesc::ParentLocal(0), UpvalueDesc::ParentUpvalue(1)],
1552            arity: 2,
1553            has_rest: true,
1554            local_names: vec![(0, intern("x")), (1, intern("y"))],
1555        };
1556
1557        let mut buf = Vec::new();
1558        let mut stb = StringTableBuilder::new();
1559        serialize_function(&func, &mut buf, &mut stb).unwrap();
1560
1561        let table = stb.finish();
1562        let remap = build_remap_table(&table);
1563        let mut cursor = 0;
1564        let func2 = deserialize_function(&buf, &mut cursor, &table, &remap).unwrap();
1565
1566        assert_eq!(func2.arity, 2);
1567        assert!(func2.has_rest);
1568        assert_eq!(func2.upvalue_descs.len(), 2);
1569        assert_eq!(func2.local_names.len(), 2);
1570        assert!(func2.name.is_some());
1571        assert_eq!(sema_core::resolve(func2.name.unwrap()), "my-func");
1572        assert_eq!(sema_core::resolve(func2.local_names[0].1), "x");
1573        assert_eq!(sema_core::resolve(func2.local_names[1].1), "y");
1574    }
1575
1576    #[test]
1577    fn test_function_roundtrip_anonymous() {
1578        use crate::emit::Emitter;
1579        use crate::opcodes::Op;
1580
1581        let mut e = Emitter::new();
1582        e.emit_op(Op::Return);
1583        let chunk = e.into_chunk();
1584
1585        let func = Function {
1586            name: None,
1587            chunk,
1588            upvalue_descs: vec![],
1589            arity: 0,
1590            has_rest: false,
1591            local_names: vec![],
1592        };
1593
1594        let mut buf = Vec::new();
1595        let mut stb = StringTableBuilder::new();
1596        serialize_function(&func, &mut buf, &mut stb).unwrap();
1597
1598        let table = stb.finish();
1599        let remap = build_remap_table(&table);
1600        let mut cursor = 0;
1601        let func2 = deserialize_function(&buf, &mut cursor, &table, &remap).unwrap();
1602
1603        assert!(func2.name.is_none());
1604        assert_eq!(func2.arity, 0);
1605        assert!(!func2.has_rest);
1606        assert_eq!(func2.upvalue_descs.len(), 0);
1607    }
1608
1609    // ── Full file serialization ─────────────────────────────────
1610
1611    #[test]
1612    fn test_full_file_roundtrip() {
1613        use crate::emit::Emitter;
1614        use crate::opcodes::Op;
1615
1616        let mut e = Emitter::new();
1617        e.emit_const(Value::int(42));
1618        e.emit_op(Op::Return);
1619        let chunk = e.into_chunk();
1620        let result = CompileResult {
1621            chunk,
1622            functions: vec![],
1623        };
1624
1625        let bytes = serialize_to_bytes(&result, 0).unwrap();
1626        assert_eq!(&bytes[0..4], b"\x00SEM");
1627
1628        let result2 = deserialize_from_bytes(&bytes).unwrap();
1629        assert_eq!(result2.chunk.consts.len(), 1);
1630        assert_eq!(result2.functions.len(), 0);
1631    }
1632
1633    #[test]
1634    fn test_full_file_with_functions() {
1635        use crate::emit::Emitter;
1636        use crate::opcodes::Op;
1637
1638        // Main chunk
1639        let mut e = Emitter::new();
1640        e.emit_op(Op::MakeClosure);
1641        e.emit_u16(0); // func_id
1642        e.emit_u16(0); // n_upvalues
1643        e.emit_op(Op::Return);
1644        let chunk = e.into_chunk();
1645
1646        // Function
1647        let mut fe = Emitter::new();
1648        fe.emit_op(Op::LoadLocal0);
1649        fe.emit_op(Op::Return);
1650        let func = Function {
1651            name: Some(intern("add-one")),
1652            chunk: fe.into_chunk(),
1653            upvalue_descs: vec![],
1654            arity: 1,
1655            has_rest: false,
1656            local_names: vec![(0, intern("x"))],
1657        };
1658
1659        let result = CompileResult {
1660            chunk,
1661            functions: vec![func],
1662        };
1663
1664        let bytes = serialize_to_bytes(&result, 0xDEAD_BEEF).unwrap();
1665        let result2 = deserialize_from_bytes(&bytes).unwrap();
1666
1667        assert_eq!(result2.functions.len(), 1);
1668        assert_eq!(result2.functions[0].arity, 1);
1669        assert_eq!(
1670            sema_core::resolve(result2.functions[0].name.unwrap()),
1671            "add-one"
1672        );
1673    }
1674
1675    #[test]
1676    fn test_magic_detection() {
1677        assert!(is_bytecode_file(b"\x00SEM\x01\x00"));
1678        assert!(!is_bytecode_file(b"(define x 1)"));
1679        assert!(!is_bytecode_file(b""));
1680        assert!(!is_bytecode_file(b"\x00SE")); // too short
1681    }
1682
1683    #[test]
1684    fn test_deserialize_bad_magic() {
1685        let mut bytes = vec![0u8; 24];
1686        bytes[0..4].copy_from_slice(b"NOPE");
1687        let result = deserialize_from_bytes(&bytes);
1688        assert!(result.is_err());
1689    }
1690
1691    #[test]
1692    fn test_deserialize_bad_version() {
1693        let mut bytes = vec![0u8; 24];
1694        bytes[0..4].copy_from_slice(&[0x00, b'S', b'E', b'M']);
1695        bytes[4..6].copy_from_slice(&99u16.to_le_bytes()); // unsupported version
1696        let result = deserialize_from_bytes(&bytes);
1697        assert!(result.is_err());
1698    }
1699
1700    #[test]
1701    fn test_deserialize_rejects_nonzero_reserved() {
1702        let mut bytes = vec![0u8; 24];
1703        bytes[0..4].copy_from_slice(&MAGIC);
1704        bytes[4..6].copy_from_slice(&FORMAT_VERSION.to_le_bytes());
1705        // Set reserved field (offset 20-23) to non-zero
1706        bytes[20] = 0xFF;
1707        let result = deserialize_from_bytes(&bytes);
1708        assert!(result.is_err(), "should reject non-zero reserved field");
1709        let err = result.err().unwrap();
1710        assert!(err.to_string().contains("reserved"));
1711    }
1712
1713    #[test]
1714    fn test_deserialize_too_short() {
1715        let result = deserialize_from_bytes(&[0x00, b'S', b'E']);
1716        assert!(result.is_err());
1717    }
1718
1719    #[test]
1720    fn test_full_file_roundtrip_with_globals() {
1721        use crate::emit::Emitter;
1722        use crate::opcodes::Op;
1723
1724        // Build a chunk with global opcodes and symbol/keyword constants
1725        let spur_x = intern("my-var");
1726        let spur_print = intern("println");
1727        let mut e = Emitter::new();
1728        // (define my-var 42)
1729        e.emit_const(Value::int(42));
1730        e.emit_op(Op::DefineGlobal);
1731        e.emit_u32(spur_to_u32(spur_x));
1732        // (println my-var) — load both globals
1733        e.emit_op(Op::LoadGlobal);
1734        e.emit_u32(spur_to_u32(spur_print));
1735        e.emit_op(Op::LoadGlobal);
1736        e.emit_u32(spur_to_u32(spur_x));
1737        // symbol and keyword in constant pool
1738        e.emit_const(Value::symbol("test-sym"));
1739        e.emit_const(Value::keyword("test-kw"));
1740        e.emit_op(Op::Return);
1741        let chunk = e.into_chunk();
1742
1743        let result = CompileResult {
1744            chunk,
1745            functions: vec![],
1746        };
1747
1748        let bytes = serialize_to_bytes(&result, 0).unwrap();
1749        let result2 = deserialize_from_bytes(&bytes).unwrap();
1750
1751        // Verify globals resolve correctly in the deserialized bytecode
1752        // DefineGlobal "my-var" is at code offset 3 (after CONST(3 bytes))
1753        let code = &result2.chunk.code;
1754        // Find DefineGlobal
1755        let mut found_define = false;
1756        let mut found_load_print = false;
1757        let mut pc = 0;
1758        while pc < code.len() {
1759            let (op, next) = advance_pc(code, pc).unwrap();
1760            match op {
1761                Op::DefineGlobal => {
1762                    let bits = u32::from_le_bytes([
1763                        code[pc + 1],
1764                        code[pc + 2],
1765                        code[pc + 3],
1766                        code[pc + 4],
1767                    ]);
1768                    assert_eq!(sema_core::resolve(u32_to_spur(bits)), "my-var");
1769                    found_define = true;
1770                }
1771                Op::LoadGlobal => {
1772                    let bits = u32::from_le_bytes([
1773                        code[pc + 1],
1774                        code[pc + 2],
1775                        code[pc + 3],
1776                        code[pc + 4],
1777                    ]);
1778                    let name = sema_core::resolve(u32_to_spur(bits));
1779                    if name == "println" {
1780                        found_load_print = true;
1781                    }
1782                }
1783                _ => {}
1784            }
1785            pc = next;
1786        }
1787        assert!(found_define, "DefineGlobal 'my-var' not found");
1788        assert!(found_load_print, "LoadGlobal 'println' not found");
1789
1790        // Verify symbol/keyword constants survived
1791        assert_eq!(result2.chunk.consts.len(), 3); // 42, test-sym, test-kw
1792        assert!(result2.chunk.consts[1].as_symbol().is_some());
1793        assert!(result2.chunk.consts[2].as_keyword().is_some());
1794    }
1795
1796    #[test]
1797    fn test_truncated_global_operand_errors_not_panics() {
1798        // A LoadGlobal at the end with missing operand bytes
1799        let code = vec![Op::LoadGlobal as u8, 0x01, 0x00]; // only 2 of 4 operand bytes
1800        let mut stb = StringTableBuilder::new();
1801        let result = remap_spurs_to_indices(&code, &mut stb);
1802        assert!(result.is_err());
1803
1804        // Also test remap_indices_to_spurs
1805        let mut code2 = vec![Op::LoadGlobal as u8, 0x01]; // only 1 operand byte
1806        let remap = vec![intern("x")];
1807        let result2 = remap_indices_to_spurs(&mut code2, &remap);
1808        assert!(result2.is_err());
1809    }
1810
1811    #[test]
1812    fn test_truncated_make_closure_errors_not_panics() {
1813        // MakeClosure with truncated operands
1814        let code = vec![Op::MakeClosure as u8, 0x00]; // only 1 of 4 operand bytes
1815        let mut stb = StringTableBuilder::new();
1816        let result = remap_spurs_to_indices(&code, &mut stb);
1817        assert!(result.is_err());
1818    }
1819
1820    #[test]
1821    fn test_missing_required_section_errors() {
1822        // Valid header but n_sections=0 → missing all required sections
1823        let mut bytes = vec![0u8; 24];
1824        bytes[0..4].copy_from_slice(&[0x00, b'S', b'E', b'M']);
1825        bytes[4..6].copy_from_slice(&1u16.to_le_bytes()); // format version 1
1826        bytes[14..16].copy_from_slice(&0u16.to_le_bytes()); // 0 sections
1827        let result = deserialize_from_bytes(&bytes);
1828        match &result {
1829            Err(e) => assert!(e.to_string().contains("missing"), "unexpected error: {e}"),
1830            Ok(_) => panic!("expected error for missing sections"),
1831        }
1832    }
1833
1834    // ── Unicode string table ─────────────────────────────────────
1835
1836    #[test]
1837    fn test_string_table_unicode() {
1838        let mut builder = StringTableBuilder::new();
1839        let idx1 = builder.intern_str("こんにちは");
1840        let idx2 = builder.intern_str("🦀");
1841        let idx3 = builder.intern_str("café");
1842
1843        let table = builder.finish();
1844        assert_eq!(table[idx1 as usize], "こんにちは");
1845        assert_eq!(table[idx2 as usize], "🦀");
1846        assert_eq!(table[idx3 as usize], "café");
1847    }
1848
1849    #[test]
1850    fn test_serialize_value_roundtrip_primitives() {
1851        let mut buf = Vec::new();
1852        let mut stb = StringTableBuilder::new();
1853
1854        serialize_value(&Value::nil(), &mut buf, &mut stb).unwrap();
1855        serialize_value(&Value::bool(true), &mut buf, &mut stb).unwrap();
1856        serialize_value(&Value::bool(false), &mut buf, &mut stb).unwrap();
1857        serialize_value(&Value::int(42), &mut buf, &mut stb).unwrap();
1858        serialize_value(&Value::float(3.14), &mut buf, &mut stb).unwrap();
1859        serialize_value(&Value::string("hello"), &mut buf, &mut stb).unwrap();
1860        serialize_value(&Value::symbol("foo"), &mut buf, &mut stb).unwrap();
1861        serialize_value(&Value::keyword("bar"), &mut buf, &mut stb).unwrap();
1862
1863        let table = stb.finish();
1864        let remap = build_remap_table(&table);
1865        let mut cursor = 0;
1866        assert_eq!(
1867            deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1868            Value::nil()
1869        );
1870        assert_eq!(
1871            deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1872            Value::bool(true)
1873        );
1874        assert_eq!(
1875            deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1876            Value::bool(false)
1877        );
1878        assert_eq!(
1879            deserialize_value(&buf, &mut cursor, &table, &remap).unwrap(),
1880            Value::int(42)
1881        );
1882        let f = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1883        assert_eq!(f.as_float(), Some(3.14));
1884        let s = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1885        assert_eq!(s.as_str().unwrap(), "hello");
1886        let sym = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1887        assert!(sym.as_symbol().is_some());
1888        let kw = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1889        assert!(kw.as_keyword().is_some());
1890    }
1891
1892    #[test]
1893    fn test_serialize_value_roundtrip_collections() {
1894        let mut buf = Vec::new();
1895        let mut stb = StringTableBuilder::new();
1896
1897        let list = Value::list(vec![Value::int(1), Value::int(2), Value::int(3)]);
1898        serialize_value(&list, &mut buf, &mut stb).unwrap();
1899
1900        let vec = Value::vector(vec![Value::string("a"), Value::string("b")]);
1901        serialize_value(&vec, &mut buf, &mut stb).unwrap();
1902
1903        let table = stb.finish();
1904        let remap = build_remap_table(&table);
1905        let mut cursor = 0;
1906
1907        let list2 = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1908        assert_eq!(list2, list);
1909
1910        let vec2 = deserialize_value(&buf, &mut cursor, &table, &remap).unwrap();
1911        assert_eq!(vec2, vec);
1912    }
1913
1914    #[test]
1915    fn test_spur_u32_conversion_safe() {
1916        let spur = intern("test-var");
1917        let bits = spur_to_u32(spur);
1918        assert_ne!(bits, 0, "Spur should never be zero (it's NonZeroU32)");
1919        let spur2 = u32_to_spur(bits);
1920        assert_eq!(spur, spur2);
1921        assert_eq!(sema_core::resolve(spur2), "test-var");
1922    }
1923
1924    #[test]
1925    fn test_string_table_section_boundary() {
1926        use crate::emit::Emitter;
1927        use crate::opcodes::Op;
1928
1929        let mut e = Emitter::new();
1930        e.emit_const(Value::int(1));
1931        e.emit_op(Op::Return);
1932        let chunk = e.into_chunk();
1933        let result = CompileResult {
1934            chunk,
1935            functions: vec![],
1936        };
1937        let bytes = serialize_to_bytes(&result, 0).unwrap();
1938
1939        // Roundtrip should work on valid data
1940        let result2 = deserialize_from_bytes(&bytes);
1941        assert!(result2.is_ok());
1942    }
1943
1944    #[test]
1945    fn test_deserialize_value_depth_limit() {
1946        // Construct a deeply nested list: (list (list (list ... ))) 200 levels deep
1947        let depth = 200;
1948        let mut buf = Vec::new();
1949        for _ in 0..depth {
1950            buf.push(0x08); // VAL_LIST
1951            buf.extend_from_slice(&1u16.to_le_bytes()); // 1 element
1952        }
1953        buf.push(0x00); // VAL_NIL at the bottom
1954
1955        let table = vec!["".to_string()];
1956        let remap = build_remap_table(&table);
1957        let mut cursor = 0;
1958        let result = deserialize_value(&buf, &mut cursor, &table, &remap);
1959        assert!(result.is_err(), "should reject deeply nested values");
1960        assert!(
1961            result.unwrap_err().to_string().contains("depth"),
1962            "error should mention depth limit"
1963        );
1964    }
1965
1966    #[test]
1967    fn test_u32_to_spur_rejects_zero() {
1968        let result = std::panic::catch_unwind(|| u32_to_spur(0));
1969        assert!(
1970            result.is_err(),
1971            "u32_to_spur(0) should panic (was UB before fix)"
1972        );
1973    }
1974
1975    // ── DoS limits on allocation sizes ──────────────────────────
1976
1977    #[test]
1978    fn test_deserialize_rejects_huge_code_len() {
1979        let mut buf = Vec::new();
1980        buf.extend_from_slice(&0xFFFFFFFFu32.to_le_bytes()); // code_len
1981        let table = vec!["".to_string()];
1982        let remap = build_remap_table(&table);
1983        let mut cursor = 0;
1984        let result = deserialize_chunk(&buf, &mut cursor, &table, &remap);
1985        assert!(result.is_err());
1986    }
1987
1988    #[test]
1989    fn test_deserialize_rejects_huge_string_count() {
1990        let mut section = Vec::new();
1991        section.extend_from_slice(&0xFFFFFFFFu32.to_le_bytes()); // count
1992
1993        let mut bytes = vec![0u8; 24];
1994        bytes[0..4].copy_from_slice(&[0x00, b'S', b'E', b'M']);
1995        bytes[4..6].copy_from_slice(&1u16.to_le_bytes()); // format version
1996        bytes[14..16].copy_from_slice(&1u16.to_le_bytes()); // 1 section
1997                                                            // Section header
1998        bytes.extend_from_slice(&0x01u16.to_le_bytes()); // string table
1999        bytes.extend_from_slice(&(section.len() as u32).to_le_bytes());
2000        bytes.extend_from_slice(&section);
2001
2002        let result = deserialize_from_bytes(&bytes);
2003        assert!(result.is_err());
2004    }
2005
2006    #[test]
2007    fn test_deserialize_rejects_huge_bytevector() {
2008        let mut buf = Vec::new();
2009        buf.push(0x0C); // VAL_BYTEVECTOR
2010        buf.extend_from_slice(&0xFFFFFFFFu32.to_le_bytes()); // length
2011        let table = vec!["".to_string()];
2012        let remap = build_remap_table(&table);
2013        let mut cursor = 0;
2014        let result = deserialize_value(&buf, &mut cursor, &table, &remap);
2015        assert!(result.is_err());
2016    }
2017
2018    #[test]
2019    fn test_deserialize_rejects_nonempty_string_zero() {
2020        let mut bad_bytes = Vec::new();
2021        // Header
2022        bad_bytes.extend_from_slice(&[0x00, b'S', b'E', b'M']); // magic
2023        bad_bytes.extend_from_slice(&1u16.to_le_bytes()); // format version
2024        bad_bytes.extend_from_slice(&0u16.to_le_bytes()); // flags
2025        bad_bytes.extend_from_slice(&0u16.to_le_bytes()); // sema_major
2026        bad_bytes.extend_from_slice(&0u16.to_le_bytes()); // sema_minor
2027        bad_bytes.extend_from_slice(&0u16.to_le_bytes()); // sema_patch
2028        bad_bytes.extend_from_slice(&3u16.to_le_bytes()); // n_sections = 3
2029        bad_bytes.extend_from_slice(&0u32.to_le_bytes()); // source_hash
2030        bad_bytes.extend_from_slice(&0u32.to_le_bytes()); // reserved
2031        assert_eq!(bad_bytes.len(), 24);
2032
2033        // String table section with index 0 = "bad" instead of ""
2034        let mut strtab = Vec::new();
2035        strtab.extend_from_slice(&1u32.to_le_bytes()); // 1 string
2036        strtab.extend_from_slice(&3u32.to_le_bytes()); // length 3
2037        strtab.extend_from_slice(b"bad"); // not empty!
2038        bad_bytes.extend_from_slice(&0x01u16.to_le_bytes()); // section type
2039        bad_bytes.extend_from_slice(&(strtab.len() as u32).to_le_bytes());
2040        bad_bytes.extend_from_slice(&strtab);
2041
2042        // Empty function table section
2043        let mut functab = Vec::new();
2044        functab.extend_from_slice(&0u32.to_le_bytes()); // 0 functions
2045        bad_bytes.extend_from_slice(&0x02u16.to_le_bytes());
2046        bad_bytes.extend_from_slice(&(functab.len() as u32).to_le_bytes());
2047        bad_bytes.extend_from_slice(&functab);
2048
2049        // Minimal main chunk section
2050        let mut chunk_data = Vec::new();
2051        chunk_data.extend_from_slice(&1u32.to_le_bytes()); // code_len = 1
2052        chunk_data.push(Op::Return as u8);
2053        chunk_data.extend_from_slice(&0u16.to_le_bytes()); // n_consts = 0
2054        chunk_data.extend_from_slice(&0u32.to_le_bytes()); // n_spans = 0
2055        chunk_data.extend_from_slice(&0u16.to_le_bytes()); // max_stack
2056        chunk_data.extend_from_slice(&0u16.to_le_bytes()); // n_locals
2057        chunk_data.extend_from_slice(&0u16.to_le_bytes()); // n_exceptions
2058        bad_bytes.extend_from_slice(&0x03u16.to_le_bytes());
2059        bad_bytes.extend_from_slice(&(chunk_data.len() as u32).to_le_bytes());
2060        bad_bytes.extend_from_slice(&chunk_data);
2061
2062        let result = deserialize_from_bytes(&bad_bytes);
2063        assert!(
2064            result.is_err(),
2065            "should reject string table with non-empty index 0"
2066        );
2067        let err = result.err().unwrap();
2068        assert!(err.to_string().contains("index 0 must be the empty string"));
2069    }
2070
2071    #[test]
2072    fn test_deserialize_rejects_trailing_section_bytes() {
2073        use crate::emit::Emitter;
2074        use crate::opcodes::Op;
2075
2076        let mut stb = StringTableBuilder::new();
2077        let mut func_payload = Vec::new();
2078        func_payload.extend_from_slice(&0u32.to_le_bytes()); // 0 functions
2079        func_payload.extend_from_slice(&[0xDE, 0xAD]); // trailing garbage
2080
2081        let mut chunk_payload = Vec::new();
2082        let mut e = Emitter::new();
2083        e.emit_op(Op::Nil);
2084        e.emit_op(Op::Return);
2085        let chunk = e.into_chunk();
2086        serialize_chunk(&chunk, &mut chunk_payload, &mut stb).unwrap();
2087
2088        let string_table = stb.finish();
2089        let mut strtab_payload = Vec::new();
2090        strtab_payload.extend_from_slice(&(string_table.len() as u32).to_le_bytes());
2091        for s in &string_table {
2092            let sb = s.as_bytes();
2093            strtab_payload.extend_from_slice(&(sb.len() as u32).to_le_bytes());
2094            strtab_payload.extend_from_slice(sb);
2095        }
2096
2097        let mut out = Vec::new();
2098        out.extend_from_slice(&[0x00, b'S', b'E', b'M']);
2099        out.extend_from_slice(&1u16.to_le_bytes());
2100        out.extend_from_slice(&0u16.to_le_bytes());
2101        out.extend_from_slice(&0u16.to_le_bytes());
2102        out.extend_from_slice(&0u16.to_le_bytes());
2103        out.extend_from_slice(&0u16.to_le_bytes());
2104        out.extend_from_slice(&3u16.to_le_bytes()); // 3 sections
2105        out.extend_from_slice(&0u32.to_le_bytes()); // source_hash
2106        out.extend_from_slice(&0u32.to_le_bytes()); // reserved
2107
2108        // String table section
2109        out.extend_from_slice(&0x01u16.to_le_bytes());
2110        out.extend_from_slice(&(strtab_payload.len() as u32).to_le_bytes());
2111        out.extend_from_slice(&strtab_payload);
2112        // Function table section (with trailing bytes)
2113        out.extend_from_slice(&0x02u16.to_le_bytes());
2114        out.extend_from_slice(&(func_payload.len() as u32).to_le_bytes());
2115        out.extend_from_slice(&func_payload);
2116        // Main chunk section
2117        out.extend_from_slice(&0x03u16.to_le_bytes());
2118        out.extend_from_slice(&(chunk_payload.len() as u32).to_le_bytes());
2119        out.extend_from_slice(&chunk_payload);
2120
2121        match deserialize_from_bytes(&out) {
2122            Ok(_) => panic!("should reject trailing bytes in function table section"),
2123            Err(e) => {
2124                let msg = e.to_string();
2125                assert!(
2126                    msg.contains("trailing") || msg.contains("unconsumed"),
2127                    "error should mention trailing/unconsumed bytes, got: {msg}"
2128                );
2129            }
2130        }
2131    }
2132
2133    // ── Post-deserialization bytecode validation ─────────────────
2134
2135    #[test]
2136    fn test_validate_rejects_bad_const_index() {
2137        let chunk = Chunk {
2138            code: vec![Op::Const as u8, 0x03, 0x00, Op::Return as u8],
2139            consts: vec![Value::int(1)],
2140            spans: vec![],
2141            max_stack: 1,
2142            n_locals: 0,
2143            exception_table: vec![],
2144        };
2145
2146        let result = CompileResult {
2147            chunk,
2148            functions: vec![],
2149        };
2150        let bytes = serialize_to_bytes(&result, 0).unwrap();
2151        let deser = deserialize_from_bytes(&bytes);
2152        assert!(deser.is_err(), "should reject out-of-bounds const index");
2153    }
2154
2155    #[test]
2156    fn test_validate_rejects_bad_func_id() {
2157        use crate::emit::Emitter;
2158
2159        let mut e = Emitter::new();
2160        e.emit_op(Op::MakeClosure);
2161        e.emit_u16(5); // func_id 5, but we'll have 0 functions
2162        e.emit_u16(0); // 0 upvalues
2163        e.emit_op(Op::Return);
2164        let chunk = e.into_chunk();
2165
2166        let result = CompileResult {
2167            chunk,
2168            functions: vec![],
2169        };
2170        let bytes = serialize_to_bytes(&result, 0).unwrap();
2171        let deser = deserialize_from_bytes(&bytes);
2172        assert!(
2173            deser.is_err(),
2174            "should reject out-of-bounds func_id in MakeClosure"
2175        );
2176    }
2177}