ion_core/bytecode.rs
1//! Bytecode instruction set and chunk representation for the Ion VM.
2
3use crate::value::Value;
4
5/// VM opcodes — each is a single byte.
6#[derive(Debug, Clone, Copy, PartialEq)]
7#[repr(u8)]
8pub enum Op {
9 /// Push a constant from the constant pool onto the stack.
10 Constant, // u16 index
11 /// Push common values without a constant pool lookup.
12 True,
13 False,
14 Unit,
15 None,
16
17 // --- Arithmetic ---
18 Add,
19 Sub,
20 Mul,
21 Div,
22 Mod,
23 Neg,
24
25 // --- Bitwise ---
26 BitAnd,
27 BitOr,
28 BitXor,
29 Shl,
30 Shr,
31
32 // --- Comparison ---
33 Eq,
34 NotEq,
35 Lt,
36 Gt,
37 LtEq,
38 GtEq,
39
40 // --- Logic ---
41 Not,
42 And, // u16 jump offset (short-circuit)
43 Or, // u16 jump offset (short-circuit)
44
45 // --- Variables ---
46 /// Define a variable in the current scope.
47 DefineLocal, // u16 name constant index, u8 mutable flag
48 /// Get a local variable by name.
49 GetLocal, // u16 name constant index
50 /// Set a local variable by name.
51 SetLocal, // u16 name constant index
52 /// Get a global/captured variable by name.
53 GetGlobal, // u16 name constant index
54 /// Set a global variable.
55 SetGlobal, // u16 name constant index
56
57 // --- Control flow ---
58 /// Unconditional jump forward.
59 Jump, // u16 offset
60 /// Jump forward if top of stack is falsy (pops condition).
61 JumpIfFalse, // u16 offset
62 /// Jump backward (for loops).
63 Loop, // u16 offset back
64
65 // --- Functions ---
66 /// Call a function: pops func + args from stack.
67 Call, // u8 arg count
68 /// Tail call: like Call but reuses the current frame (no stack growth).
69 TailCall, // u8 arg count
70 /// Return from current function.
71 Return,
72
73 // --- Stack manipulation ---
74 /// Pop and discard the top value.
75 Pop,
76 /// Duplicate the top value.
77 Dup,
78
79 // --- Composite types ---
80 /// Build a list from N values on stack.
81 BuildList, // u16 count
82 /// Build a tuple from N values on stack.
83 BuildTuple, // u16 count
84 /// Build a dict from N key-value pairs on stack.
85 BuildDict, // u16 count (number of pairs)
86
87 // --- Field/index access ---
88 /// Get field: pop object, push object.field.
89 GetField, // u16 field name constant index
90 /// Index access: pop index, pop object, push object[index].
91 GetIndex,
92 /// Set field: pop value, pop object, mutate, push value.
93 SetField, // u16 field name constant index
94 /// Set index: pop value, pop index, pop object, mutate, push value.
95 SetIndex,
96 /// Method call: pop args + receiver, push result.
97 MethodCall, // u16 method name constant index, u8 arg count
98
99 // --- Closures ---
100 /// Create a closure from a function prototype.
101 Closure, // u16 function constant index
102
103 // --- Option/Result ---
104 WrapSome,
105 WrapOk,
106 WrapErr,
107 /// Try operator (?): unwrap Ok/Some or propagate Err/None.
108 Try,
109
110 // --- Scope ---
111 PushScope,
112 PopScope,
113
114 // --- String ---
115 /// Build an f-string from N parts on stack.
116 BuildFString, // u16 part count
117
118 // --- Pipe ---
119 /// Pipe operator: rearranges stack for function call.
120 Pipe, // u8 arg count (always 1 extra)
121
122 // --- Pattern matching ---
123 /// Begin a match expression.
124 MatchBegin,
125 /// Test a pattern against the match subject.
126 MatchArm, // u8 kind (1=Some,2=Ok,3=Err,4=tuple,5=list,6=struct field,7=enum field)
127 /// End match (cleanup).
128 MatchEnd,
129
130 // --- Range ---
131 BuildRange, // u8: 0 = exclusive, 1 = inclusive
132
133 // --- Host types ---
134 ConstructStruct, // u16 type name index, u16 field count
135 ConstructEnum, // u16 enum name index, u16 variant name index, u8 arg count
136
137 // --- Comprehensions ---
138 /// List comprehension iteration setup
139 IterInit,
140 IterNext, // u16 jump offset when exhausted
141 ListAppend,
142 /// Pop a list from TOS, extend the list below TOS with its items (for spread).
143 ListExtend,
144 DictInsert,
145 /// Merge a dict into the dict below it on the stack (for spread).
146 DictMerge,
147 /// Drop the current iterator (for break in for-loops).
148 IterDrop,
149 /// Runtime type check: peek TOS, compare type against constant at u16 index.
150 CheckType, // u16: constant index (string type name)
151
152 /// Slice access: pop end (or sentinel), pop start (or sentinel), pop object, push slice.
153 Slice, // u8: flags (bit 0 = has_start, bit 1 = has_end, bit 2 = inclusive)
154
155 // --- Stack-slot locals (fast path) ---
156 /// Define a local in the slot array.
157 DefineLocalSlot, // u8 mutable flag
158 /// Get a local by slot index (relative to current frame base).
159 GetLocalSlot, // u16 slot index
160 /// Set a local by slot index.
161 SetLocalSlot, // u16 slot index
162
163 /// Import every entry from a module dict on TOS into the current env scope.
164 ImportGlob,
165
166 /// Call with named arguments: u8 arg count, then u8 count of named pairs, each is u16 (arg position) + u16 (name constant)
167 CallNamed, // u8 total_args, u8 named_count, then [u8 position, u16 name_idx] * named_count
168
169 /// Begin a try block: push exception handler.
170 TryBegin, // u16 catch handler offset
171 /// End a try block (no error): pop handler, jump over catch.
172 TryEnd, // u16 jump offset past catch block
173
174 // --- Native async runtime ---
175 /// Spawn a function call as an async runtime task: pops func + args, pushes AsyncTask.
176 SpawnCall, // u8 arg count
177 /// Spawn a function call with named arguments as an async runtime task.
178 SpawnCallNamed, // u8 total_args, u8 named_count, then [u8 position, u16 name_idx] * named_count
179 /// Await an AsyncTask by parking this VM continuation on the runtime future table.
180 AwaitTask,
181 /// Race N AsyncTask handles and push `(winner_index, value)` when one completes.
182 SelectTasks, // u8 branch count
183
184 /// Print (for testing/debugging)
185 Print, // u8: 0 = print, 1 = println
186}
187
188/// A compiled bytecode chunk.
189#[derive(Debug, Clone)]
190pub struct Chunk {
191 /// The bytecode instructions.
192 pub code: Vec<u8>,
193 /// Constant pool.
194 pub constants: Vec<Value>,
195 /// Line number for each instruction (for error reporting).
196 pub lines: Vec<usize>,
197 /// Column number for each instruction (for error reporting).
198 pub cols: Vec<usize>,
199}
200
201impl Default for Chunk {
202 fn default() -> Self {
203 Self::new()
204 }
205}
206
207impl Chunk {
208 pub fn new() -> Self {
209 Self {
210 code: Vec::new(),
211 constants: Vec::new(),
212 lines: Vec::new(),
213 cols: Vec::new(),
214 }
215 }
216
217 /// Emit a single byte with source location.
218 pub fn emit(&mut self, byte: u8, line: usize) {
219 self.code.push(byte);
220 self.lines.push(line);
221 self.cols.push(0);
222 }
223
224 /// Emit a single byte with full source span (line + col).
225 pub fn emit_span(&mut self, byte: u8, line: usize, col: usize) {
226 self.code.push(byte);
227 self.lines.push(line);
228 self.cols.push(col);
229 }
230
231 /// Emit an opcode.
232 pub fn emit_op(&mut self, op: Op, line: usize) {
233 self.emit(op as u8, line);
234 }
235
236 /// Emit an opcode with full span.
237 pub fn emit_op_span(&mut self, op: Op, line: usize, col: usize) {
238 self.emit_span(op as u8, line, col);
239 }
240
241 /// Emit an opcode followed by a u16 operand.
242 pub fn emit_op_u16(&mut self, op: Op, operand: u16, line: usize) {
243 self.emit(op as u8, line);
244 self.emit((operand >> 8) as u8, line);
245 self.emit((operand & 0xff) as u8, line);
246 }
247
248 /// Emit an opcode followed by a u16 operand with full span.
249 pub fn emit_op_u16_span(&mut self, op: Op, operand: u16, line: usize, col: usize) {
250 self.emit_span(op as u8, line, col);
251 self.emit_span((operand >> 8) as u8, line, col);
252 self.emit_span((operand & 0xff) as u8, line, col);
253 }
254
255 /// Emit an opcode followed by a u8 operand.
256 pub fn emit_op_u8(&mut self, op: Op, operand: u8, line: usize) {
257 self.emit(op as u8, line);
258 self.emit(operand, line);
259 }
260
261 /// Emit an opcode followed by a u8 operand with full span.
262 pub fn emit_op_u8_span(&mut self, op: Op, operand: u8, line: usize, col: usize) {
263 self.emit_span(op as u8, line, col);
264 self.emit_span(operand, line, col);
265 }
266
267 /// Add a constant to the pool, returning its index.
268 /// Deduplicates string, int, float, and bool constants.
269 pub fn add_constant(&mut self, value: Value) -> u16 {
270 for (i, c) in self.constants.iter().enumerate() {
271 let is_dup = match (&value, c) {
272 (Value::Str(a), Value::Str(b)) => a == b,
273 (Value::Int(a), Value::Int(b)) => a == b,
274 (Value::Float(a), Value::Float(b)) => a.to_bits() == b.to_bits(),
275 (Value::Bool(a), Value::Bool(b)) => a == b,
276 _ => false,
277 };
278 if is_dup {
279 return i as u16;
280 }
281 }
282 self.constants.push(value);
283 (self.constants.len() - 1) as u16
284 }
285
286 /// Emit a constant load instruction.
287 pub fn emit_constant(&mut self, value: Value, line: usize) {
288 let idx = self.add_constant(value);
289 self.emit_op_u16(Op::Constant, idx, line);
290 }
291
292 /// Current code offset.
293 pub fn len(&self) -> usize {
294 self.code.len()
295 }
296
297 /// Returns true if the chunk contains no bytecode.
298 pub fn is_empty(&self) -> bool {
299 self.code.is_empty()
300 }
301
302 /// Emit a jump instruction, returning the offset to patch later.
303 pub fn emit_jump(&mut self, op: Op, line: usize) -> usize {
304 self.emit_op_u16(op, 0xffff, line);
305 self.code.len() - 2 // offset of the u16 placeholder
306 }
307
308 /// Patch a previously emitted jump to target the current position.
309 pub fn patch_jump(&mut self, offset: usize) {
310 let jump = self.code.len() - offset - 2;
311 self.code[offset] = (jump >> 8) as u8;
312 self.code[offset + 1] = (jump & 0xff) as u8;
313 }
314
315 /// Read a u16 operand at the given offset.
316 pub fn read_u16(&self, offset: usize) -> u16 {
317 ((self.code[offset] as u16) << 8) | (self.code[offset + 1] as u16)
318 }
319
320 /// Read a u8 operand at the given offset.
321 pub fn read_u8(&self, offset: usize) -> u8 {
322 self.code[offset]
323 }
324
325 /// Return the total size (opcode + operands) of the instruction at `offset`.
326 pub fn instruction_size(code: &[u8], offset: usize) -> usize {
327 if offset >= code.len() {
328 return 1;
329 }
330 match code[offset] {
331 // 1-byte (no operands)
332 x if x == Op::True as u8
333 || x == Op::False as u8
334 || x == Op::Unit as u8
335 || x == Op::None as u8
336 || x == Op::Add as u8
337 || x == Op::Sub as u8
338 || x == Op::Mul as u8
339 || x == Op::Div as u8
340 || x == Op::Mod as u8
341 || x == Op::Neg as u8
342 || x == Op::BitAnd as u8
343 || x == Op::BitOr as u8
344 || x == Op::BitXor as u8
345 || x == Op::Shl as u8
346 || x == Op::Shr as u8
347 || x == Op::Eq as u8
348 || x == Op::NotEq as u8
349 || x == Op::Lt as u8
350 || x == Op::Gt as u8
351 || x == Op::LtEq as u8
352 || x == Op::GtEq as u8
353 || x == Op::Not as u8
354 || x == Op::Pop as u8
355 || x == Op::Dup as u8
356 || x == Op::GetIndex as u8
357 || x == Op::SetIndex as u8
358 || x == Op::WrapSome as u8
359 || x == Op::WrapOk as u8
360 || x == Op::WrapErr as u8
361 || x == Op::Try as u8
362 || x == Op::PushScope as u8
363 || x == Op::PopScope as u8
364 || x == Op::MatchEnd as u8
365 || x == Op::Return as u8
366 || x == Op::IterInit as u8
367 || x == Op::ListAppend as u8
368 || x == Op::ListExtend as u8
369 || x == Op::DictInsert as u8
370 || x == Op::DictMerge as u8
371 || x == Op::IterDrop as u8
372 || x == Op::ImportGlob as u8
373 || x == Op::AwaitTask as u8 =>
374 {
375 1
376 }
377 // 2-byte (u8 operand)
378 x if x == Op::Call as u8
379 || x == Op::TailCall as u8
380 || x == Op::Pipe as u8
381 || x == Op::BuildRange as u8
382 || x == Op::Slice as u8
383 || x == Op::DefineLocalSlot as u8
384 || x == Op::SpawnCall as u8
385 || x == Op::SelectTasks as u8
386 || x == Op::Print as u8 =>
387 {
388 2
389 }
390 // 3-byte (u16 operand)
391 x if x == Op::Constant as u8
392 || x == Op::And as u8
393 || x == Op::Or as u8
394 || x == Op::GetLocal as u8
395 || x == Op::SetLocal as u8
396 || x == Op::GetGlobal as u8
397 || x == Op::SetGlobal as u8
398 || x == Op::Jump as u8
399 || x == Op::JumpIfFalse as u8
400 || x == Op::Loop as u8
401 || x == Op::BuildList as u8
402 || x == Op::BuildTuple as u8
403 || x == Op::BuildDict as u8
404 || x == Op::GetField as u8
405 || x == Op::SetField as u8
406 || x == Op::Closure as u8
407 || x == Op::BuildFString as u8
408 || x == Op::IterNext as u8
409 || x == Op::GetLocalSlot as u8
410 || x == Op::SetLocalSlot as u8
411 || x == Op::TryBegin as u8
412 || x == Op::TryEnd as u8
413 || x == Op::CheckType as u8 =>
414 {
415 3
416 }
417 // 4-byte (u16 + u8)
418 x if x == Op::DefineLocal as u8 || x == Op::MethodCall as u8 => 4,
419 // 5-byte (u16 + u16)
420 x if x == Op::ConstructStruct as u8 => 5,
421 // 6-byte (u16 + u16 + u8)
422 x if x == Op::ConstructEnum as u8 => 6,
423 // Variable-width named calls: 1(op) + 1(total_args) + 1(named_count) + named_count * 3
424 x if x == Op::CallNamed as u8 || x == Op::SpawnCallNamed as u8 => {
425 if offset + 2 < code.len() {
426 let named_count = code[offset + 2] as usize;
427 3 + named_count * 3 // op + total_args + named_count + (position u8 + name u16) * count
428 } else {
429 3
430 }
431 }
432 // Variable-width: MatchBegin (u8 kind + extra operands depending on kind)
433 x if x == Op::MatchBegin as u8 => {
434 if offset + 1 < code.len() {
435 match code[offset + 1] {
436 4 => 3, // Tuple: kind + u8 length
437 5 => 4, // List: kind + u8 length + u8 has_rest
438 6 => 4, // Host struct: kind + u16 type-name constant
439 7 => 7, // Host enum: kind + u16 enum + u16 variant + u8 arity
440 _ => 2, // Some/Ok/Err: just kind
441 }
442 } else {
443 2
444 }
445 }
446 // Variable-width: MatchArm (u8 kind, then u8 index for kinds 4/5/7 or u16 field for kind 6)
447 x if x == Op::MatchArm as u8 => {
448 if offset + 1 < code.len() {
449 let kind = code[offset + 1];
450 if kind == 4 || kind == 5 || kind == 7 {
451 3
452 } else if kind == 6 {
453 4
454 } else {
455 2
456 }
457 } else {
458 2
459 }
460 }
461 // Unknown — treat as 1 to avoid infinite loops
462 _ => 1,
463 }
464 }
465
466 /// Peephole optimization pass: removes dead instruction sequences and
467 /// adjusts jump targets accordingly.
468 /// - `Not; Not` → remove both (double negation)
469 /// - `Neg; Neg` → remove both (double arithmetic negation)
470 /// - `Jump 0` → remove (nop jump to next instruction)
471 /// - Pure push + `Pop` → remove both (dead value)
472 pub fn peephole_optimize(&mut self) {
473 let old_len = self.code.len();
474 if old_len == 0 {
475 return;
476 }
477 let mut dead = vec![false; old_len];
478 let mut changed = true;
479 while changed {
480 changed = false;
481 let mut i = 0;
482 while i < old_len {
483 if dead[i] {
484 i += 1;
485 continue;
486 }
487 let size = Self::instruction_size(&self.code, i);
488 // Find next live instruction
489 let mut next = i + size;
490 while next < old_len && dead[next] {
491 next += 1;
492 }
493 if next >= old_len {
494 break;
495 }
496 let next_size = Self::instruction_size(&self.code, next);
497
498 // Pattern: Not; Not → remove both
499 if self.code[i] == Op::Not as u8 && self.code[next] == Op::Not as u8 {
500 dead[i] = true;
501 dead[next] = true;
502 changed = true;
503 i = next + next_size;
504 continue;
505 }
506
507 // Pattern: Neg; Neg → remove both
508 if self.code[i] == Op::Neg as u8 && self.code[next] == Op::Neg as u8 {
509 dead[i] = true;
510 dead[next] = true;
511 changed = true;
512 i = next + next_size;
513 continue;
514 }
515
516 // Pattern: Jump 0 → remove (jump to next instruction)
517 if self.code[i] == Op::Jump as u8 && size == 3 {
518 let target = self.read_u16(i + 1);
519 if target == 0 {
520 for item in dead.iter_mut().skip(i).take(3) {
521 *item = true;
522 }
523 changed = true;
524 i = next;
525 continue;
526 }
527 }
528
529 // Pattern: pure push followed by Pop → remove both
530 if self.code[next] == Op::Pop as u8 {
531 let b = self.code[i];
532 let is_pure = b == Op::True as u8
533 || b == Op::False as u8
534 || b == Op::Unit as u8
535 || b == Op::None as u8
536 || b == Op::Constant as u8
537 || b == Op::Dup as u8
538 || b == Op::GetLocalSlot as u8;
539 if is_pure {
540 for item in dead.iter_mut().skip(i).take(size) {
541 *item = true;
542 }
543 dead[next] = true;
544 changed = true;
545 i = next + 1;
546 continue;
547 }
548 }
549
550 i = next;
551 }
552 }
553
554 self.compact_dead(&dead);
555 }
556
557 /// Remove dead bytes and adjust jump targets.
558 fn compact_dead(&mut self, dead: &[bool]) {
559 let old_len = self.code.len();
560
561 // Build offset map: old position → new position
562 let mut offset_map = vec![0usize; old_len + 1];
563 let mut new_pos = 0;
564 for old_pos in 0..old_len {
565 offset_map[old_pos] = new_pos;
566 if !dead[old_pos] {
567 new_pos += 1;
568 }
569 }
570 offset_map[old_len] = new_pos;
571
572 if new_pos == old_len {
573 return;
574 } // nothing to compact
575
576 // Adjust jump targets (walk instruction boundaries on live code)
577 let mut i = 0;
578 while i < old_len {
579 if dead[i] {
580 i += 1;
581 continue;
582 }
583 let op = self.code[i];
584 let size = Self::instruction_size(&self.code, i);
585
586 // Forward jumps: offset is relative to the byte AFTER the instruction
587 if (op == Op::Jump as u8
588 || op == Op::JumpIfFalse as u8
589 || op == Op::And as u8
590 || op == Op::Or as u8
591 || op == Op::IterNext as u8)
592 && size == 3
593 {
594 let old_offset = self.read_u16(i + 1) as usize;
595 let old_target = i + 3 + old_offset;
596 let new_instr_end = offset_map[i] + 3; // new position of byte after this instr
597 let new_target = if old_target <= old_len {
598 offset_map[old_target]
599 } else {
600 offset_map[old_len]
601 };
602 let new_offset = new_target.saturating_sub(new_instr_end);
603 self.code[i + 1] = (new_offset >> 8) as u8;
604 self.code[i + 2] = (new_offset & 0xff) as u8;
605 }
606
607 // Backward jump: Loop
608 if op == Op::Loop as u8 && size == 3 {
609 let old_offset = self.read_u16(i + 1) as usize;
610 let old_target = (i + 3).wrapping_sub(old_offset);
611 let new_instr_end = offset_map[i] + 3;
612 let new_target = if old_target <= old_len {
613 offset_map[old_target]
614 } else {
615 0
616 };
617 let new_offset = new_instr_end.saturating_sub(new_target);
618 self.code[i + 1] = (new_offset >> 8) as u8;
619 self.code[i + 2] = (new_offset & 0xff) as u8;
620 }
621
622 i += size;
623 }
624
625 // Compact: remove dead bytes
626 let mut new_code = Vec::with_capacity(new_pos);
627 let mut new_lines = Vec::with_capacity(new_pos);
628 let mut new_cols = Vec::with_capacity(new_pos);
629 for (j, &is_dead) in dead.iter().enumerate().take(old_len) {
630 if !is_dead {
631 new_code.push(self.code[j]);
632 new_lines.push(self.lines[j]);
633 new_cols.push(self.cols[j]);
634 }
635 }
636 self.code = new_code;
637 self.lines = new_lines;
638 self.cols = new_cols;
639 }
640}
641
642/// A compiled function prototype (stored in the constant pool).
643#[derive(Debug, Clone)]
644pub struct FnProto {
645 pub name: String,
646 pub arity: usize,
647 pub chunk: Chunk,
648 pub param_names: Vec<String>,
649 pub has_defaults: Vec<bool>,
650}