Skip to main content

ion_core/
bytecode.rs

1//! Bytecode instruction set and chunk representation for the Ion VM.
2
3use crate::value::Value;
4
5/// VM opcodes — each is a single byte.
6#[derive(Debug, Clone, Copy, PartialEq)]
7#[repr(u8)]
8pub enum Op {
9    /// Push a constant from the constant pool onto the stack.
10    Constant, // u16 index
11    /// Push common values without a constant pool lookup.
12    True,
13    False,
14    Unit,
15    None,
16
17    // --- Arithmetic ---
18    Add,
19    Sub,
20    Mul,
21    Div,
22    Mod,
23    Neg,
24
25    // --- Bitwise ---
26    BitAnd,
27    BitOr,
28    BitXor,
29    Shl,
30    Shr,
31
32    // --- Comparison ---
33    Eq,
34    NotEq,
35    Lt,
36    Gt,
37    LtEq,
38    GtEq,
39
40    // --- Logic ---
41    Not,
42    And, // u16 jump offset (short-circuit)
43    Or,  // u16 jump offset (short-circuit)
44
45    // --- Variables ---
46    /// Define a variable in the current scope.
47    DefineLocal, // u16 name constant index, u8 mutable flag
48    /// Get a local variable by name.
49    GetLocal, // u16 name constant index
50    /// Set a local variable by name.
51    SetLocal, // u16 name constant index
52    /// Get a global/captured variable by name.
53    GetGlobal, // u16 name constant index
54    /// Set a global variable.
55    SetGlobal, // u16 name constant index
56
57    // --- Control flow ---
58    /// Unconditional jump forward.
59    Jump, // u16 offset
60    /// Jump forward if top of stack is falsy (pops condition).
61    JumpIfFalse, // u16 offset
62    /// Jump backward (for loops).
63    Loop, // u16 offset back
64
65    // --- Functions ---
66    /// Call a function: pops func + args from stack.
67    Call, // u8 arg count
68    /// Tail call: like Call but reuses the current frame (no stack growth).
69    TailCall, // u8 arg count
70    /// Return from current function.
71    Return,
72
73    // --- Stack manipulation ---
74    /// Pop and discard the top value.
75    Pop,
76    /// Duplicate the top value.
77    Dup,
78
79    // --- Composite types ---
80    /// Build a list from N values on stack.
81    BuildList, // u16 count
82    /// Build a tuple from N values on stack.
83    BuildTuple, // u16 count
84    /// Build a dict from N key-value pairs on stack.
85    BuildDict, // u16 count (number of pairs)
86
87    // --- Field/index access ---
88    /// Get field: pop object, push object.field.
89    GetField, // u16 field name constant index
90    /// Index access: pop index, pop object, push object[index].
91    GetIndex,
92    /// Set field: pop value, pop object, mutate, push value.
93    SetField, // u16 field name constant index
94    /// Set index: pop value, pop index, pop object, mutate, push value.
95    SetIndex,
96    /// Method call: pop args + receiver, push result.
97    MethodCall, // u16 method name constant index, u8 arg count
98
99    // --- Closures ---
100    /// Create a closure from a function prototype.
101    Closure, // u16 function constant index
102
103    // --- Option/Result ---
104    WrapSome,
105    WrapOk,
106    WrapErr,
107    /// Try operator (?): unwrap Ok/Some or propagate Err/None.
108    Try,
109
110    // --- Scope ---
111    PushScope,
112    PopScope,
113
114    // --- String ---
115    /// Build an f-string from N parts on stack.
116    BuildFString, // u16 part count
117
118    // --- Pipe ---
119    /// Pipe operator: rearranges stack for function call.
120    Pipe, // u8 arg count (always 1 extra)
121
122    // --- Pattern matching ---
123    /// Begin a match expression.
124    MatchBegin,
125    /// Test a pattern against the match subject.
126    MatchArm, // u8 kind (1=Some,2=Ok,3=Err,4=tuple,5=list), +u8 index for 4/5
127    /// End match (cleanup).
128    MatchEnd,
129
130    // --- Range ---
131    BuildRange, // u8: 0 = exclusive, 1 = inclusive
132
133    // --- Host types ---
134    ConstructStruct, // u16 type name index, u16 field count
135    ConstructEnum,   // u16 enum name index, u16 variant name index, u8 arg count
136
137    // --- Comprehensions ---
138    /// List comprehension iteration setup
139    IterInit,
140    IterNext, // u16 jump offset when exhausted
141    ListAppend,
142    /// Pop a list from TOS, extend the list below TOS with its items (for spread).
143    ListExtend,
144    DictInsert,
145    /// Merge a dict into the dict below it on the stack (for spread).
146    DictMerge,
147    /// Drop the current iterator (for break in for-loops).
148    IterDrop,
149    /// Runtime type check: peek TOS, compare type against constant at u16 index.
150    CheckType, // u16: constant index (string type name)
151
152    /// Slice access: pop end (or sentinel), pop start (or sentinel), pop object, push slice.
153    Slice, // u8: flags (bit 0 = has_start, bit 1 = has_end, bit 2 = inclusive)
154
155    // --- Stack-slot locals (fast path) ---
156    /// Define a local in the slot array.
157    DefineLocalSlot, // u8 mutable flag
158    /// Get a local by slot index (relative to current frame base).
159    GetLocalSlot, // u16 slot index
160    /// Set a local by slot index.
161    SetLocalSlot, // u16 slot index
162
163    /// Call with named arguments: u8 arg count, then u8 count of named pairs, each is u16 (arg position) + u16 (name constant)
164    CallNamed, // u8 total_args, u8 named_count, then [u8 position, u16 name_idx] * named_count
165
166    /// Begin a try block: push exception handler.
167    TryBegin, // u16 catch handler offset
168    /// End a try block (no error): pop handler, jump over catch.
169    TryEnd, // u16 jump offset past catch block
170
171    /// Print (for testing/debugging)
172    Print, // u8: 0 = print, 1 = println
173}
174
175/// A compiled bytecode chunk.
176#[derive(Debug, Clone)]
177pub struct Chunk {
178    /// The bytecode instructions.
179    pub code: Vec<u8>,
180    /// Constant pool.
181    pub constants: Vec<Value>,
182    /// Line number for each instruction (for error reporting).
183    pub lines: Vec<usize>,
184    /// Column number for each instruction (for error reporting).
185    pub cols: Vec<usize>,
186}
187
188impl Default for Chunk {
189    fn default() -> Self {
190        Self::new()
191    }
192}
193
194impl Chunk {
195    pub fn new() -> Self {
196        Self {
197            code: Vec::new(),
198            constants: Vec::new(),
199            lines: Vec::new(),
200            cols: Vec::new(),
201        }
202    }
203
204    /// Emit a single byte with source location.
205    pub fn emit(&mut self, byte: u8, line: usize) {
206        self.code.push(byte);
207        self.lines.push(line);
208        self.cols.push(0);
209    }
210
211    /// Emit a single byte with full source span (line + col).
212    pub fn emit_span(&mut self, byte: u8, line: usize, col: usize) {
213        self.code.push(byte);
214        self.lines.push(line);
215        self.cols.push(col);
216    }
217
218    /// Emit an opcode.
219    pub fn emit_op(&mut self, op: Op, line: usize) {
220        self.emit(op as u8, line);
221    }
222
223    /// Emit an opcode with full span.
224    pub fn emit_op_span(&mut self, op: Op, line: usize, col: usize) {
225        self.emit_span(op as u8, line, col);
226    }
227
228    /// Emit an opcode followed by a u16 operand.
229    pub fn emit_op_u16(&mut self, op: Op, operand: u16, line: usize) {
230        self.emit(op as u8, line);
231        self.emit((operand >> 8) as u8, line);
232        self.emit((operand & 0xff) as u8, line);
233    }
234
235    /// Emit an opcode followed by a u16 operand with full span.
236    pub fn emit_op_u16_span(&mut self, op: Op, operand: u16, line: usize, col: usize) {
237        self.emit_span(op as u8, line, col);
238        self.emit_span((operand >> 8) as u8, line, col);
239        self.emit_span((operand & 0xff) as u8, line, col);
240    }
241
242    /// Emit an opcode followed by a u8 operand.
243    pub fn emit_op_u8(&mut self, op: Op, operand: u8, line: usize) {
244        self.emit(op as u8, line);
245        self.emit(operand, line);
246    }
247
248    /// Emit an opcode followed by a u8 operand with full span.
249    pub fn emit_op_u8_span(&mut self, op: Op, operand: u8, line: usize, col: usize) {
250        self.emit_span(op as u8, line, col);
251        self.emit_span(operand, line, col);
252    }
253
254    /// Add a constant to the pool, returning its index.
255    /// Deduplicates string, int, float, and bool constants.
256    pub fn add_constant(&mut self, value: Value) -> u16 {
257        for (i, c) in self.constants.iter().enumerate() {
258            let is_dup = match (&value, c) {
259                (Value::Str(a), Value::Str(b)) => a == b,
260                (Value::Int(a), Value::Int(b)) => a == b,
261                (Value::Float(a), Value::Float(b)) => a.to_bits() == b.to_bits(),
262                (Value::Bool(a), Value::Bool(b)) => a == b,
263                _ => false,
264            };
265            if is_dup {
266                return i as u16;
267            }
268        }
269        self.constants.push(value);
270        (self.constants.len() - 1) as u16
271    }
272
273    /// Emit a constant load instruction.
274    pub fn emit_constant(&mut self, value: Value, line: usize) {
275        let idx = self.add_constant(value);
276        self.emit_op_u16(Op::Constant, idx, line);
277    }
278
279    /// Current code offset.
280    pub fn len(&self) -> usize {
281        self.code.len()
282    }
283
284    /// Returns true if the chunk contains no bytecode.
285    pub fn is_empty(&self) -> bool {
286        self.code.is_empty()
287    }
288
289    /// Emit a jump instruction, returning the offset to patch later.
290    pub fn emit_jump(&mut self, op: Op, line: usize) -> usize {
291        self.emit_op_u16(op, 0xffff, line);
292        self.code.len() - 2 // offset of the u16 placeholder
293    }
294
295    /// Patch a previously emitted jump to target the current position.
296    pub fn patch_jump(&mut self, offset: usize) {
297        let jump = self.code.len() - offset - 2;
298        self.code[offset] = (jump >> 8) as u8;
299        self.code[offset + 1] = (jump & 0xff) as u8;
300    }
301
302    /// Read a u16 operand at the given offset.
303    pub fn read_u16(&self, offset: usize) -> u16 {
304        ((self.code[offset] as u16) << 8) | (self.code[offset + 1] as u16)
305    }
306
307    /// Read a u8 operand at the given offset.
308    pub fn read_u8(&self, offset: usize) -> u8 {
309        self.code[offset]
310    }
311
312    /// Return the total size (opcode + operands) of the instruction at `offset`.
313    pub fn instruction_size(code: &[u8], offset: usize) -> usize {
314        if offset >= code.len() {
315            return 1;
316        }
317        match code[offset] {
318            // 1-byte (no operands)
319            x if x == Op::True as u8
320                || x == Op::False as u8
321                || x == Op::Unit as u8
322                || x == Op::None as u8
323                || x == Op::Add as u8
324                || x == Op::Sub as u8
325                || x == Op::Mul as u8
326                || x == Op::Div as u8
327                || x == Op::Mod as u8
328                || x == Op::Neg as u8
329                || x == Op::BitAnd as u8
330                || x == Op::BitOr as u8
331                || x == Op::BitXor as u8
332                || x == Op::Shl as u8
333                || x == Op::Shr as u8
334                || x == Op::Eq as u8
335                || x == Op::NotEq as u8
336                || x == Op::Lt as u8
337                || x == Op::Gt as u8
338                || x == Op::LtEq as u8
339                || x == Op::GtEq as u8
340                || x == Op::Not as u8
341                || x == Op::Pop as u8
342                || x == Op::Dup as u8
343                || x == Op::GetIndex as u8
344                || x == Op::SetIndex as u8
345                || x == Op::WrapSome as u8
346                || x == Op::WrapOk as u8
347                || x == Op::WrapErr as u8
348                || x == Op::Try as u8
349                || x == Op::PushScope as u8
350                || x == Op::PopScope as u8
351                || x == Op::MatchEnd as u8
352                || x == Op::Return as u8
353                || x == Op::IterInit as u8
354                || x == Op::ListAppend as u8
355                || x == Op::ListExtend as u8
356                || x == Op::DictInsert as u8
357                || x == Op::DictMerge as u8
358                || x == Op::IterDrop as u8 =>
359            {
360                1
361            }
362            // 2-byte (u8 operand)
363            x if x == Op::Call as u8
364                || x == Op::TailCall as u8
365                || x == Op::Pipe as u8
366                || x == Op::BuildRange as u8
367                || x == Op::Slice as u8
368                || x == Op::DefineLocalSlot as u8
369                || x == Op::Print as u8 =>
370            {
371                2
372            }
373            // 3-byte (u16 operand)
374            x if x == Op::Constant as u8
375                || x == Op::And as u8
376                || x == Op::Or as u8
377                || x == Op::GetLocal as u8
378                || x == Op::SetLocal as u8
379                || x == Op::GetGlobal as u8
380                || x == Op::SetGlobal as u8
381                || x == Op::Jump as u8
382                || x == Op::JumpIfFalse as u8
383                || x == Op::Loop as u8
384                || x == Op::BuildList as u8
385                || x == Op::BuildTuple as u8
386                || x == Op::BuildDict as u8
387                || x == Op::GetField as u8
388                || x == Op::SetField as u8
389                || x == Op::Closure as u8
390                || x == Op::BuildFString as u8
391                || x == Op::IterNext as u8
392                || x == Op::GetLocalSlot as u8
393                || x == Op::SetLocalSlot as u8
394                || x == Op::TryBegin as u8
395                || x == Op::TryEnd as u8
396                || x == Op::CheckType as u8 =>
397            {
398                3
399            }
400            // 4-byte (u16 + u8)
401            x if x == Op::DefineLocal as u8 || x == Op::MethodCall as u8 => 4,
402            // 5-byte (u16 + u16)
403            x if x == Op::ConstructStruct as u8 => 5,
404            // 6-byte (u16 + u16 + u8)
405            x if x == Op::ConstructEnum as u8 => 6,
406            // Variable-width: CallNamed: 1(op) + 1(total_args) + 1(named_count) + named_count * 3
407            x if x == Op::CallNamed as u8 => {
408                if offset + 2 < code.len() {
409                    let named_count = code[offset + 2] as usize;
410                    3 + named_count * 3 // op + total_args + named_count + (position u8 + name u16) * count
411                } else {
412                    3
413                }
414            }
415            // Variable-width: MatchBegin (u8 kind + extra operands depending on kind)
416            x if x == Op::MatchBegin as u8 => {
417                if offset + 1 < code.len() {
418                    match code[offset + 1] {
419                        4 => 3, // Tuple: kind + u8 length
420                        5 => 4, // List: kind + u8 length + u8 has_rest
421                        _ => 2, // Some/Ok/Err: just kind
422                    }
423                } else {
424                    2
425                }
426            }
427            // Variable-width: MatchArm (u8 kind, then u8 index for kinds 4/5)
428            x if x == Op::MatchArm as u8 => {
429                if offset + 1 < code.len() {
430                    let kind = code[offset + 1];
431                    if kind == 4 || kind == 5 {
432                        3
433                    } else {
434                        2
435                    }
436                } else {
437                    2
438                }
439            }
440            // Unknown — treat as 1 to avoid infinite loops
441            _ => 1,
442        }
443    }
444
445    /// Peephole optimization pass: removes dead instruction sequences and
446    /// adjusts jump targets accordingly.
447    /// - `Not; Not` → remove both (double negation)
448    /// - `Neg; Neg` → remove both (double arithmetic negation)
449    /// - `Jump 0` → remove (nop jump to next instruction)
450    /// - Pure push + `Pop` → remove both (dead value)
451    #[cfg(feature = "optimize")]
452    pub fn peephole_optimize(&mut self) {
453        let old_len = self.code.len();
454        if old_len == 0 {
455            return;
456        }
457        let mut dead = vec![false; old_len];
458        let mut changed = true;
459        while changed {
460            changed = false;
461            let mut i = 0;
462            while i < old_len {
463                if dead[i] {
464                    i += 1;
465                    continue;
466                }
467                let size = Self::instruction_size(&self.code, i);
468                // Find next live instruction
469                let mut next = i + size;
470                while next < old_len && dead[next] {
471                    next += 1;
472                }
473                if next >= old_len {
474                    break;
475                }
476                let next_size = Self::instruction_size(&self.code, next);
477
478                // Pattern: Not; Not → remove both
479                if self.code[i] == Op::Not as u8 && self.code[next] == Op::Not as u8 {
480                    dead[i] = true;
481                    dead[next] = true;
482                    changed = true;
483                    i = next + next_size;
484                    continue;
485                }
486
487                // Pattern: Neg; Neg → remove both
488                if self.code[i] == Op::Neg as u8 && self.code[next] == Op::Neg as u8 {
489                    dead[i] = true;
490                    dead[next] = true;
491                    changed = true;
492                    i = next + next_size;
493                    continue;
494                }
495
496                // Pattern: Jump 0 → remove (jump to next instruction)
497                if self.code[i] == Op::Jump as u8 && size == 3 {
498                    let target = self.read_u16(i + 1);
499                    if target == 0 {
500                        for item in dead.iter_mut().skip(i).take(3) {
501                            *item = true;
502                        }
503                        changed = true;
504                        i = next;
505                        continue;
506                    }
507                }
508
509                // Pattern: pure push followed by Pop → remove both
510                if self.code[next] == Op::Pop as u8 {
511                    let b = self.code[i];
512                    let is_pure = b == Op::True as u8
513                        || b == Op::False as u8
514                        || b == Op::Unit as u8
515                        || b == Op::None as u8
516                        || b == Op::Constant as u8
517                        || b == Op::Dup as u8
518                        || b == Op::GetLocalSlot as u8;
519                    if is_pure {
520                        for item in dead.iter_mut().skip(i).take(size) {
521                            *item = true;
522                        }
523                        dead[next] = true;
524                        changed = true;
525                        i = next + 1;
526                        continue;
527                    }
528                }
529
530                i = next;
531            }
532        }
533
534        self.compact_dead(&dead);
535    }
536
537    /// Remove dead bytes and adjust jump targets.
538    #[cfg(feature = "optimize")]
539    fn compact_dead(&mut self, dead: &[bool]) {
540        let old_len = self.code.len();
541
542        // Build offset map: old position → new position
543        let mut offset_map = vec![0usize; old_len + 1];
544        let mut new_pos = 0;
545        for old_pos in 0..old_len {
546            offset_map[old_pos] = new_pos;
547            if !dead[old_pos] {
548                new_pos += 1;
549            }
550        }
551        offset_map[old_len] = new_pos;
552
553        if new_pos == old_len {
554            return;
555        } // nothing to compact
556
557        // Adjust jump targets (walk instruction boundaries on live code)
558        let mut i = 0;
559        while i < old_len {
560            if dead[i] {
561                i += 1;
562                continue;
563            }
564            let op = self.code[i];
565            let size = Self::instruction_size(&self.code, i);
566
567            // Forward jumps: offset is relative to the byte AFTER the instruction
568            if (op == Op::Jump as u8
569                || op == Op::JumpIfFalse as u8
570                || op == Op::And as u8
571                || op == Op::Or as u8
572                || op == Op::IterNext as u8)
573                && size == 3
574            {
575                let old_offset = self.read_u16(i + 1) as usize;
576                let old_target = i + 3 + old_offset;
577                let new_instr_end = offset_map[i] + 3; // new position of byte after this instr
578                let new_target = if old_target <= old_len {
579                    offset_map[old_target]
580                } else {
581                    offset_map[old_len]
582                };
583                let new_offset = new_target.saturating_sub(new_instr_end);
584                self.code[i + 1] = (new_offset >> 8) as u8;
585                self.code[i + 2] = (new_offset & 0xff) as u8;
586            }
587
588            // Backward jump: Loop
589            if op == Op::Loop as u8 && size == 3 {
590                let old_offset = self.read_u16(i + 1) as usize;
591                let old_target = (i + 3).wrapping_sub(old_offset);
592                let new_instr_end = offset_map[i] + 3;
593                let new_target = if old_target <= old_len {
594                    offset_map[old_target]
595                } else {
596                    0
597                };
598                let new_offset = new_instr_end.saturating_sub(new_target);
599                self.code[i + 1] = (new_offset >> 8) as u8;
600                self.code[i + 2] = (new_offset & 0xff) as u8;
601            }
602
603            i += size;
604        }
605
606        // Compact: remove dead bytes
607        let mut new_code = Vec::with_capacity(new_pos);
608        let mut new_lines = Vec::with_capacity(new_pos);
609        let mut new_cols = Vec::with_capacity(new_pos);
610        for (j, &is_dead) in dead.iter().enumerate().take(old_len) {
611            if !is_dead {
612                new_code.push(self.code[j]);
613                new_lines.push(self.lines[j]);
614                new_cols.push(self.cols[j]);
615            }
616        }
617        self.code = new_code;
618        self.lines = new_lines;
619        self.cols = new_cols;
620    }
621}
622
623/// A compiled function prototype (stored in the constant pool).
624#[derive(Debug, Clone)]
625pub struct FnProto {
626    pub name: String,
627    pub arity: usize,
628    pub chunk: Chunk,
629    pub param_names: Vec<String>,
630    pub has_defaults: Vec<bool>,
631}