Skip to main content

ion_core/
bytecode.rs

1//! Bytecode instruction set and chunk representation for the Ion VM.
2
3use crate::value::Value;
4
5/// VM opcodes — each is a single byte.
6#[derive(Debug, Clone, Copy, PartialEq)]
7#[repr(u8)]
8pub enum Op {
9    /// Push a constant from the constant pool onto the stack.
10    Constant, // u16 index
11    /// Push common values without a constant pool lookup.
12    True,
13    False,
14    Unit,
15    None,
16
17    // --- Arithmetic ---
18    Add,
19    Sub,
20    Mul,
21    Div,
22    Mod,
23    Neg,
24
25    // --- Bitwise ---
26    BitAnd,
27    BitOr,
28    BitXor,
29    Shl,
30    Shr,
31
32    // --- Comparison ---
33    Eq,
34    NotEq,
35    Lt,
36    Gt,
37    LtEq,
38    GtEq,
39
40    // --- Logic ---
41    Not,
42    And, // u16 jump offset (short-circuit)
43    Or,  // u16 jump offset (short-circuit)
44
45    // --- Variables ---
46    /// Define a variable in the current scope.
47    DefineLocal, // u16 name constant index, u8 mutable flag
48    /// Get a local variable by name.
49    GetLocal, // u16 name constant index
50    /// Set a local variable by name.
51    SetLocal, // u16 name constant index
52    /// Get a global/captured variable by name.
53    GetGlobal, // u16 name constant index
54    /// Set a global variable.
55    SetGlobal, // u16 name constant index
56
57    // --- Control flow ---
58    /// Unconditional jump forward.
59    Jump, // u16 offset
60    /// Jump forward if top of stack is falsy (pops condition).
61    JumpIfFalse, // u16 offset
62    /// Jump backward (for loops).
63    Loop, // u16 offset back
64
65    // --- Functions ---
66    /// Call a function: pops func + args from stack.
67    Call, // u8 arg count
68    /// Tail call: like Call but reuses the current frame (no stack growth).
69    TailCall, // u8 arg count
70    /// Return from current function.
71    Return,
72
73    // --- Stack manipulation ---
74    /// Pop and discard the top value.
75    Pop,
76    /// Duplicate the top value.
77    Dup,
78
79    // --- Composite types ---
80    /// Build a list from N values on stack.
81    BuildList, // u16 count
82    /// Build a tuple from N values on stack.
83    BuildTuple, // u16 count
84    /// Build a dict from N key-value pairs on stack.
85    BuildDict, // u16 count (number of pairs)
86
87    // --- Field/index access ---
88    /// Get field: pop object, push object.field.
89    GetField, // u16 field name constant index
90    /// Index access: pop index, pop object, push object[index].
91    GetIndex,
92    /// Set field: pop value, pop object, mutate, push value.
93    SetField, // u16 field name constant index
94    /// Set index: pop value, pop index, pop object, mutate, push value.
95    SetIndex,
96    /// Method call: pop args + receiver, push result.
97    MethodCall, // u16 method name constant index, u8 arg count
98
99    // --- Closures ---
100    /// Create a closure from a function prototype.
101    Closure, // u16 function constant index
102
103    // --- Option/Result ---
104    WrapSome,
105    WrapOk,
106    WrapErr,
107    /// Try operator (?): unwrap Ok/Some or propagate Err/None.
108    Try,
109
110    // --- Scope ---
111    PushScope,
112    PopScope,
113
114    // --- String ---
115    /// Build an f-string from N parts on stack.
116    BuildFString, // u16 part count
117
118    // --- Pipe ---
119    /// Pipe operator: rearranges stack for function call.
120    Pipe, // u8 arg count (always 1 extra)
121
122    // --- Pattern matching ---
123    /// Begin a match expression.
124    MatchBegin,
125    /// Test a pattern against the match subject.
126    MatchArm, // u8 kind (1=Some,2=Ok,3=Err,4=tuple,5=list,6=struct field,7=enum field)
127    /// End match (cleanup).
128    MatchEnd,
129
130    // --- Range ---
131    BuildRange, // u8: 0 = exclusive, 1 = inclusive
132
133    // --- Host types ---
134    ConstructStruct, // u16 type name index, u16 field count
135    ConstructEnum,   // u16 enum name index, u16 variant name index, u8 arg count
136
137    // --- Comprehensions ---
138    /// List comprehension iteration setup
139    IterInit,
140    IterNext, // u16 jump offset when exhausted
141    ListAppend,
142    /// Pop a list from TOS, extend the list below TOS with its items (for spread).
143    ListExtend,
144    DictInsert,
145    /// Merge a dict into the dict below it on the stack (for spread).
146    DictMerge,
147    /// Drop the current iterator (for break in for-loops).
148    IterDrop,
149    /// Runtime type check: peek TOS, compare type against constant at u16 index.
150    CheckType, // u16: constant index (string type name)
151
152    /// Slice access: pop end (or sentinel), pop start (or sentinel), pop object, push slice.
153    Slice, // u8: flags (bit 0 = has_start, bit 1 = has_end, bit 2 = inclusive)
154
155    // --- Stack-slot locals (fast path) ---
156    /// Define a local in the slot array.
157    DefineLocalSlot, // u8 mutable flag
158    /// Get a local by slot index (relative to current frame base).
159    GetLocalSlot, // u16 slot index
160    /// Set a local by slot index.
161    SetLocalSlot, // u16 slot index
162
163    /// Import every entry from a module dict on TOS into the current env scope.
164    ImportGlob,
165
166    /// Call with named arguments: u8 arg count, then u8 count of named pairs, each is u16 (arg position) + u16 (name constant)
167    CallNamed, // u8 total_args, u8 named_count, then [u8 position, u16 name_idx] * named_count
168
169    /// Begin a try block: push exception handler.
170    TryBegin, // u16 catch handler offset
171    /// End a try block (no error): pop handler, jump over catch.
172    TryEnd, // u16 jump offset past catch block
173
174    // --- Native async runtime ---
175    /// Spawn a function call as an async runtime task: pops func + args, pushes AsyncTask.
176    SpawnCall, // u8 arg count
177    /// Spawn a function call with named arguments as an async runtime task.
178    SpawnCallNamed, // u8 total_args, u8 named_count, then [u8 position, u16 name_idx] * named_count
179    /// Await an AsyncTask by parking this VM continuation on the runtime future table.
180    AwaitTask,
181    /// Race N AsyncTask handles and push `(winner_index, value)` when one completes.
182    SelectTasks, // u8 branch count
183
184    /// Print (for testing/debugging)
185    Print, // u8: 0 = print, 1 = println
186}
187
188/// A compiled bytecode chunk.
189#[derive(Debug, Clone)]
190pub struct Chunk {
191    /// The bytecode instructions.
192    pub code: Vec<u8>,
193    /// Constant pool.
194    pub constants: Vec<Value>,
195    /// Line number for each instruction (for error reporting).
196    pub lines: Vec<usize>,
197    /// Column number for each instruction (for error reporting).
198    pub cols: Vec<usize>,
199}
200
201impl Default for Chunk {
202    fn default() -> Self {
203        Self::new()
204    }
205}
206
207impl Chunk {
208    pub fn new() -> Self {
209        Self {
210            code: Vec::new(),
211            constants: Vec::new(),
212            lines: Vec::new(),
213            cols: Vec::new(),
214        }
215    }
216
217    /// Emit a single byte with source location.
218    pub fn emit(&mut self, byte: u8, line: usize) {
219        self.code.push(byte);
220        self.lines.push(line);
221        self.cols.push(0);
222    }
223
224    /// Emit a single byte with full source span (line + col).
225    pub fn emit_span(&mut self, byte: u8, line: usize, col: usize) {
226        self.code.push(byte);
227        self.lines.push(line);
228        self.cols.push(col);
229    }
230
231    /// Emit an opcode.
232    pub fn emit_op(&mut self, op: Op, line: usize) {
233        self.emit(op as u8, line);
234    }
235
236    /// Emit an opcode with full span.
237    pub fn emit_op_span(&mut self, op: Op, line: usize, col: usize) {
238        self.emit_span(op as u8, line, col);
239    }
240
241    /// Emit an opcode followed by a u16 operand.
242    pub fn emit_op_u16(&mut self, op: Op, operand: u16, line: usize) {
243        self.emit(op as u8, line);
244        self.emit((operand >> 8) as u8, line);
245        self.emit((operand & 0xff) as u8, line);
246    }
247
248    /// Emit an opcode followed by a u16 operand with full span.
249    pub fn emit_op_u16_span(&mut self, op: Op, operand: u16, line: usize, col: usize) {
250        self.emit_span(op as u8, line, col);
251        self.emit_span((operand >> 8) as u8, line, col);
252        self.emit_span((operand & 0xff) as u8, line, col);
253    }
254
255    /// Emit an opcode followed by a u8 operand.
256    pub fn emit_op_u8(&mut self, op: Op, operand: u8, line: usize) {
257        self.emit(op as u8, line);
258        self.emit(operand, line);
259    }
260
261    /// Emit an opcode followed by a u8 operand with full span.
262    pub fn emit_op_u8_span(&mut self, op: Op, operand: u8, line: usize, col: usize) {
263        self.emit_span(op as u8, line, col);
264        self.emit_span(operand, line, col);
265    }
266
267    /// Add a constant to the pool, returning its index.
268    /// Deduplicates string, int, float, and bool constants.
269    pub fn add_constant(&mut self, value: Value) -> u16 {
270        for (i, c) in self.constants.iter().enumerate() {
271            let is_dup = match (&value, c) {
272                (Value::Str(a), Value::Str(b)) => a == b,
273                (Value::Int(a), Value::Int(b)) => a == b,
274                (Value::Float(a), Value::Float(b)) => a.to_bits() == b.to_bits(),
275                (Value::Bool(a), Value::Bool(b)) => a == b,
276                _ => false,
277            };
278            if is_dup {
279                return i as u16;
280            }
281        }
282        self.constants.push(value);
283        (self.constants.len() - 1) as u16
284    }
285
286    /// Emit a constant load instruction.
287    pub fn emit_constant(&mut self, value: Value, line: usize) {
288        let idx = self.add_constant(value);
289        self.emit_op_u16(Op::Constant, idx, line);
290    }
291
292    /// Current code offset.
293    pub fn len(&self) -> usize {
294        self.code.len()
295    }
296
297    /// Returns true if the chunk contains no bytecode.
298    pub fn is_empty(&self) -> bool {
299        self.code.is_empty()
300    }
301
302    /// Emit a jump instruction, returning the offset to patch later.
303    pub fn emit_jump(&mut self, op: Op, line: usize) -> usize {
304        self.emit_op_u16(op, 0xffff, line);
305        self.code.len() - 2 // offset of the u16 placeholder
306    }
307
308    /// Patch a previously emitted jump to target the current position.
309    pub fn patch_jump(&mut self, offset: usize) {
310        let jump = self.code.len() - offset - 2;
311        self.code[offset] = (jump >> 8) as u8;
312        self.code[offset + 1] = (jump & 0xff) as u8;
313    }
314
315    /// Read a u16 operand at the given offset.
316    pub fn read_u16(&self, offset: usize) -> u16 {
317        ((self.code[offset] as u16) << 8) | (self.code[offset + 1] as u16)
318    }
319
320    /// Read a u8 operand at the given offset.
321    pub fn read_u8(&self, offset: usize) -> u8 {
322        self.code[offset]
323    }
324
325    /// Return the total size (opcode + operands) of the instruction at `offset`.
326    pub fn instruction_size(code: &[u8], offset: usize) -> usize {
327        if offset >= code.len() {
328            return 1;
329        }
330        match code[offset] {
331            // 1-byte (no operands)
332            x if x == Op::True as u8
333                || x == Op::False as u8
334                || x == Op::Unit as u8
335                || x == Op::None as u8
336                || x == Op::Add as u8
337                || x == Op::Sub as u8
338                || x == Op::Mul as u8
339                || x == Op::Div as u8
340                || x == Op::Mod as u8
341                || x == Op::Neg as u8
342                || x == Op::BitAnd as u8
343                || x == Op::BitOr as u8
344                || x == Op::BitXor as u8
345                || x == Op::Shl as u8
346                || x == Op::Shr as u8
347                || x == Op::Eq as u8
348                || x == Op::NotEq as u8
349                || x == Op::Lt as u8
350                || x == Op::Gt as u8
351                || x == Op::LtEq as u8
352                || x == Op::GtEq as u8
353                || x == Op::Not as u8
354                || x == Op::Pop as u8
355                || x == Op::Dup as u8
356                || x == Op::GetIndex as u8
357                || x == Op::SetIndex as u8
358                || x == Op::WrapSome as u8
359                || x == Op::WrapOk as u8
360                || x == Op::WrapErr as u8
361                || x == Op::Try as u8
362                || x == Op::PushScope as u8
363                || x == Op::PopScope as u8
364                || x == Op::MatchEnd as u8
365                || x == Op::Return as u8
366                || x == Op::IterInit as u8
367                || x == Op::ListAppend as u8
368                || x == Op::ListExtend as u8
369                || x == Op::DictInsert as u8
370                || x == Op::DictMerge as u8
371                || x == Op::IterDrop as u8
372                || x == Op::ImportGlob as u8
373                || x == Op::AwaitTask as u8 =>
374            {
375                1
376            }
377            // 2-byte (u8 operand)
378            x if x == Op::Call as u8
379                || x == Op::TailCall as u8
380                || x == Op::Pipe as u8
381                || x == Op::BuildRange as u8
382                || x == Op::Slice as u8
383                || x == Op::DefineLocalSlot as u8
384                || x == Op::SpawnCall as u8
385                || x == Op::SelectTasks as u8
386                || x == Op::Print as u8 =>
387            {
388                2
389            }
390            // 3-byte (u16 operand)
391            x if x == Op::Constant as u8
392                || x == Op::And as u8
393                || x == Op::Or as u8
394                || x == Op::GetLocal as u8
395                || x == Op::SetLocal as u8
396                || x == Op::GetGlobal as u8
397                || x == Op::SetGlobal as u8
398                || x == Op::Jump as u8
399                || x == Op::JumpIfFalse as u8
400                || x == Op::Loop as u8
401                || x == Op::BuildList as u8
402                || x == Op::BuildTuple as u8
403                || x == Op::BuildDict as u8
404                || x == Op::GetField as u8
405                || x == Op::SetField as u8
406                || x == Op::Closure as u8
407                || x == Op::BuildFString as u8
408                || x == Op::IterNext as u8
409                || x == Op::GetLocalSlot as u8
410                || x == Op::SetLocalSlot as u8
411                || x == Op::TryBegin as u8
412                || x == Op::TryEnd as u8
413                || x == Op::CheckType as u8 =>
414            {
415                3
416            }
417            // 4-byte (u16 + u8)
418            x if x == Op::DefineLocal as u8 || x == Op::MethodCall as u8 => 4,
419            // 5-byte (u16 + u16)
420            x if x == Op::ConstructStruct as u8 => 5,
421            // 6-byte (u16 + u16 + u8)
422            x if x == Op::ConstructEnum as u8 => 6,
423            // Variable-width named calls: 1(op) + 1(total_args) + 1(named_count) + named_count * 3
424            x if x == Op::CallNamed as u8 || x == Op::SpawnCallNamed as u8 => {
425                if offset + 2 < code.len() {
426                    let named_count = code[offset + 2] as usize;
427                    3 + named_count * 3 // op + total_args + named_count + (position u8 + name u16) * count
428                } else {
429                    3
430                }
431            }
432            // Variable-width: MatchBegin (u8 kind + extra operands depending on kind)
433            x if x == Op::MatchBegin as u8 => {
434                if offset + 1 < code.len() {
435                    match code[offset + 1] {
436                        4 => 3, // Tuple: kind + u8 length
437                        5 => 4, // List: kind + u8 length + u8 has_rest
438                        6 => 4, // Host struct: kind + u16 type-name constant
439                        7 => 7, // Host enum: kind + u16 enum + u16 variant + u8 arity
440                        _ => 2, // Some/Ok/Err: just kind
441                    }
442                } else {
443                    2
444                }
445            }
446            // Variable-width: MatchArm (u8 kind, then u8 index for kinds 4/5/7 or u16 field for kind 6)
447            x if x == Op::MatchArm as u8 => {
448                if offset + 1 < code.len() {
449                    let kind = code[offset + 1];
450                    if kind == 4 || kind == 5 || kind == 7 {
451                        3
452                    } else if kind == 6 {
453                        4
454                    } else {
455                        2
456                    }
457                } else {
458                    2
459                }
460            }
461            // Unknown — treat as 1 to avoid infinite loops
462            _ => 1,
463        }
464    }
465
466    /// Peephole optimization pass: removes dead instruction sequences and
467    /// adjusts jump targets accordingly.
468    /// - `Not; Not` → remove both (double negation)
469    /// - `Neg; Neg` → remove both (double arithmetic negation)
470    /// - `Jump 0` → remove (nop jump to next instruction)
471    /// - Pure push + `Pop` → remove both (dead value)
472    pub fn peephole_optimize(&mut self) {
473        let old_len = self.code.len();
474        if old_len == 0 {
475            return;
476        }
477        let mut dead = vec![false; old_len];
478        let mut changed = true;
479        while changed {
480            changed = false;
481            let mut i = 0;
482            while i < old_len {
483                if dead[i] {
484                    i += 1;
485                    continue;
486                }
487                let size = Self::instruction_size(&self.code, i);
488                // Find next live instruction
489                let mut next = i + size;
490                while next < old_len && dead[next] {
491                    next += 1;
492                }
493                if next >= old_len {
494                    break;
495                }
496                let next_size = Self::instruction_size(&self.code, next);
497
498                // Pattern: Not; Not → remove both
499                if self.code[i] == Op::Not as u8 && self.code[next] == Op::Not as u8 {
500                    dead[i] = true;
501                    dead[next] = true;
502                    changed = true;
503                    i = next + next_size;
504                    continue;
505                }
506
507                // Pattern: Neg; Neg → remove both
508                if self.code[i] == Op::Neg as u8 && self.code[next] == Op::Neg as u8 {
509                    dead[i] = true;
510                    dead[next] = true;
511                    changed = true;
512                    i = next + next_size;
513                    continue;
514                }
515
516                // Pattern: Jump 0 → remove (jump to next instruction)
517                if self.code[i] == Op::Jump as u8 && size == 3 {
518                    let target = self.read_u16(i + 1);
519                    if target == 0 {
520                        for item in dead.iter_mut().skip(i).take(3) {
521                            *item = true;
522                        }
523                        changed = true;
524                        i = next;
525                        continue;
526                    }
527                }
528
529                // Pattern: pure push followed by Pop → remove both
530                if self.code[next] == Op::Pop as u8 {
531                    let b = self.code[i];
532                    let is_pure = b == Op::True as u8
533                        || b == Op::False as u8
534                        || b == Op::Unit as u8
535                        || b == Op::None as u8
536                        || b == Op::Constant as u8
537                        || b == Op::Dup as u8
538                        || b == Op::GetLocalSlot as u8;
539                    if is_pure {
540                        for item in dead.iter_mut().skip(i).take(size) {
541                            *item = true;
542                        }
543                        dead[next] = true;
544                        changed = true;
545                        i = next + 1;
546                        continue;
547                    }
548                }
549
550                i = next;
551            }
552        }
553
554        self.compact_dead(&dead);
555    }
556
557    /// Remove dead bytes and adjust jump targets.
558    fn compact_dead(&mut self, dead: &[bool]) {
559        let old_len = self.code.len();
560
561        // Build offset map: old position → new position
562        let mut offset_map = vec![0usize; old_len + 1];
563        let mut new_pos = 0;
564        for old_pos in 0..old_len {
565            offset_map[old_pos] = new_pos;
566            if !dead[old_pos] {
567                new_pos += 1;
568            }
569        }
570        offset_map[old_len] = new_pos;
571
572        if new_pos == old_len {
573            return;
574        } // nothing to compact
575
576        // Adjust jump targets (walk instruction boundaries on live code)
577        let mut i = 0;
578        while i < old_len {
579            if dead[i] {
580                i += 1;
581                continue;
582            }
583            let op = self.code[i];
584            let size = Self::instruction_size(&self.code, i);
585
586            // Forward jumps: offset is relative to the byte AFTER the instruction
587            if (op == Op::Jump as u8
588                || op == Op::JumpIfFalse as u8
589                || op == Op::And as u8
590                || op == Op::Or as u8
591                || op == Op::IterNext as u8)
592                && size == 3
593            {
594                let old_offset = self.read_u16(i + 1) as usize;
595                let old_target = i + 3 + old_offset;
596                let new_instr_end = offset_map[i] + 3; // new position of byte after this instr
597                let new_target = if old_target <= old_len {
598                    offset_map[old_target]
599                } else {
600                    offset_map[old_len]
601                };
602                let new_offset = new_target.saturating_sub(new_instr_end);
603                self.code[i + 1] = (new_offset >> 8) as u8;
604                self.code[i + 2] = (new_offset & 0xff) as u8;
605            }
606
607            // Backward jump: Loop
608            if op == Op::Loop as u8 && size == 3 {
609                let old_offset = self.read_u16(i + 1) as usize;
610                let old_target = (i + 3).wrapping_sub(old_offset);
611                let new_instr_end = offset_map[i] + 3;
612                let new_target = if old_target <= old_len {
613                    offset_map[old_target]
614                } else {
615                    0
616                };
617                let new_offset = new_instr_end.saturating_sub(new_target);
618                self.code[i + 1] = (new_offset >> 8) as u8;
619                self.code[i + 2] = (new_offset & 0xff) as u8;
620            }
621
622            i += size;
623        }
624
625        // Compact: remove dead bytes
626        let mut new_code = Vec::with_capacity(new_pos);
627        let mut new_lines = Vec::with_capacity(new_pos);
628        let mut new_cols = Vec::with_capacity(new_pos);
629        for (j, &is_dead) in dead.iter().enumerate().take(old_len) {
630            if !is_dead {
631                new_code.push(self.code[j]);
632                new_lines.push(self.lines[j]);
633                new_cols.push(self.cols[j]);
634            }
635        }
636        self.code = new_code;
637        self.lines = new_lines;
638        self.cols = new_cols;
639    }
640}
641
642/// A compiled function prototype (stored in the constant pool).
643#[derive(Debug, Clone)]
644pub struct FnProto {
645    pub name: String,
646    pub arity: usize,
647    pub chunk: Chunk,
648    pub param_names: Vec<String>,
649    pub has_defaults: Vec<bool>,
650}