ion_core/bytecode.rs
1//! Bytecode instruction set and chunk representation for the Ion VM.
2
3use crate::value::Value;
4
5/// VM opcodes — each is a single byte.
6#[derive(Debug, Clone, Copy, PartialEq)]
7#[repr(u8)]
8pub enum Op {
9 /// Push a constant from the constant pool onto the stack.
10 Constant, // u16 index
11 /// Push common values without a constant pool lookup.
12 True,
13 False,
14 Unit,
15 None,
16
17 // --- Arithmetic ---
18 Add,
19 Sub,
20 Mul,
21 Div,
22 Mod,
23 Neg,
24
25 // --- Bitwise ---
26 BitAnd,
27 BitOr,
28 BitXor,
29 Shl,
30 Shr,
31
32 // --- Comparison ---
33 Eq,
34 NotEq,
35 Lt,
36 Gt,
37 LtEq,
38 GtEq,
39
40 // --- Logic ---
41 Not,
42 And, // u16 jump offset (short-circuit)
43 Or, // u16 jump offset (short-circuit)
44
45 // --- Variables ---
46 /// Define a variable in the current scope.
47 DefineLocal, // u16 name constant index, u8 mutable flag
48 /// Get a local variable by name.
49 GetLocal, // u16 name constant index
50 /// Set a local variable by name.
51 SetLocal, // u16 name constant index
52 /// Get a global/captured variable by name.
53 GetGlobal, // u16 name constant index
54 /// Set a global variable.
55 SetGlobal, // u16 name constant index
56
57 // --- Control flow ---
58 /// Unconditional jump forward.
59 Jump, // u16 offset
60 /// Jump forward if top of stack is falsy (pops condition).
61 JumpIfFalse, // u16 offset
62 /// Jump backward (for loops).
63 Loop, // u16 offset back
64
65 // --- Functions ---
66 /// Call a function: pops func + args from stack.
67 Call, // u8 arg count
68 /// Tail call: like Call but reuses the current frame (no stack growth).
69 TailCall, // u8 arg count
70 /// Return from current function.
71 Return,
72
73 // --- Stack manipulation ---
74 /// Pop and discard the top value.
75 Pop,
76 /// Duplicate the top value.
77 Dup,
78
79 // --- Composite types ---
80 /// Build a list from N values on stack.
81 BuildList, // u16 count
82 /// Build a tuple from N values on stack.
83 BuildTuple, // u16 count
84 /// Build a dict from N key-value pairs on stack.
85 BuildDict, // u16 count (number of pairs)
86
87 // --- Field/index access ---
88 /// Get field: pop object, push object.field.
89 GetField, // u16 field name constant index
90 /// Index access: pop index, pop object, push object[index].
91 GetIndex,
92 /// Set field: pop value, pop object, mutate, push value.
93 SetField, // u16 field name constant index
94 /// Set index: pop value, pop index, pop object, mutate, push value.
95 SetIndex,
96 /// Method call: pop args + receiver, push result.
97 MethodCall, // u16 method name constant index, u8 arg count
98
99 // --- Closures ---
100 /// Create a closure from a function prototype.
101 Closure, // u16 function constant index
102
103 // --- Option/Result ---
104 WrapSome,
105 WrapOk,
106 WrapErr,
107 /// Try operator (?): unwrap Ok/Some or propagate Err/None.
108 Try,
109
110 // --- Scope ---
111 PushScope,
112 PopScope,
113
114 // --- String ---
115 /// Build an f-string from N parts on stack.
116 BuildFString, // u16 part count
117
118 // --- Pipe ---
119 /// Pipe operator: rearranges stack for function call.
120 Pipe, // u8 arg count (always 1 extra)
121
122 // --- Pattern matching ---
123 /// Begin a match expression.
124 MatchBegin,
125 /// Test a pattern against the match subject.
126 MatchArm, // u8 kind (1=Some,2=Ok,3=Err,4=tuple,5=list), +u8 index for 4/5
127 /// End match (cleanup).
128 MatchEnd,
129
130 // --- Range ---
131 BuildRange, // u8: 0 = exclusive, 1 = inclusive
132
133 // --- Host types ---
134 ConstructStruct, // u16 type name index, u16 field count
135 ConstructEnum, // u16 enum name index, u16 variant name index, u8 arg count
136
137 // --- Comprehensions ---
138 /// List comprehension iteration setup
139 IterInit,
140 IterNext, // u16 jump offset when exhausted
141 ListAppend,
142 /// Pop a list from TOS, extend the list below TOS with its items (for spread).
143 ListExtend,
144 DictInsert,
145 /// Merge a dict into the dict below it on the stack (for spread).
146 DictMerge,
147 /// Drop the current iterator (for break in for-loops).
148 IterDrop,
149 /// Runtime type check: peek TOS, compare type against constant at u16 index.
150 CheckType, // u16: constant index (string type name)
151
152 /// Slice access: pop end (or sentinel), pop start (or sentinel), pop object, push slice.
153 Slice, // u8: flags (bit 0 = has_start, bit 1 = has_end, bit 2 = inclusive)
154
155 // --- Stack-slot locals (fast path) ---
156 /// Define a local in the slot array.
157 DefineLocalSlot, // u8 mutable flag
158 /// Get a local by slot index (relative to current frame base).
159 GetLocalSlot, // u16 slot index
160 /// Set a local by slot index.
161 SetLocalSlot, // u16 slot index
162
163 /// Call with named arguments: u8 arg count, then u8 count of named pairs, each is u16 (arg position) + u16 (name constant)
164 CallNamed, // u8 total_args, u8 named_count, then [u8 position, u16 name_idx] * named_count
165
166 /// Begin a try block: push exception handler.
167 TryBegin, // u16 catch handler offset
168 /// End a try block (no error): pop handler, jump over catch.
169 TryEnd, // u16 jump offset past catch block
170
171 /// Print (for testing/debugging)
172 Print, // u8: 0 = print, 1 = println
173}
174
175/// A compiled bytecode chunk.
176#[derive(Debug, Clone)]
177pub struct Chunk {
178 /// The bytecode instructions.
179 pub code: Vec<u8>,
180 /// Constant pool.
181 pub constants: Vec<Value>,
182 /// Line number for each instruction (for error reporting).
183 pub lines: Vec<usize>,
184 /// Column number for each instruction (for error reporting).
185 pub cols: Vec<usize>,
186}
187
188impl Default for Chunk {
189 fn default() -> Self {
190 Self::new()
191 }
192}
193
194impl Chunk {
195 pub fn new() -> Self {
196 Self {
197 code: Vec::new(),
198 constants: Vec::new(),
199 lines: Vec::new(),
200 cols: Vec::new(),
201 }
202 }
203
204 /// Emit a single byte with source location.
205 pub fn emit(&mut self, byte: u8, line: usize) {
206 self.code.push(byte);
207 self.lines.push(line);
208 self.cols.push(0);
209 }
210
211 /// Emit a single byte with full source span (line + col).
212 pub fn emit_span(&mut self, byte: u8, line: usize, col: usize) {
213 self.code.push(byte);
214 self.lines.push(line);
215 self.cols.push(col);
216 }
217
218 /// Emit an opcode.
219 pub fn emit_op(&mut self, op: Op, line: usize) {
220 self.emit(op as u8, line);
221 }
222
223 /// Emit an opcode with full span.
224 pub fn emit_op_span(&mut self, op: Op, line: usize, col: usize) {
225 self.emit_span(op as u8, line, col);
226 }
227
228 /// Emit an opcode followed by a u16 operand.
229 pub fn emit_op_u16(&mut self, op: Op, operand: u16, line: usize) {
230 self.emit(op as u8, line);
231 self.emit((operand >> 8) as u8, line);
232 self.emit((operand & 0xff) as u8, line);
233 }
234
235 /// Emit an opcode followed by a u16 operand with full span.
236 pub fn emit_op_u16_span(&mut self, op: Op, operand: u16, line: usize, col: usize) {
237 self.emit_span(op as u8, line, col);
238 self.emit_span((operand >> 8) as u8, line, col);
239 self.emit_span((operand & 0xff) as u8, line, col);
240 }
241
242 /// Emit an opcode followed by a u8 operand.
243 pub fn emit_op_u8(&mut self, op: Op, operand: u8, line: usize) {
244 self.emit(op as u8, line);
245 self.emit(operand, line);
246 }
247
248 /// Emit an opcode followed by a u8 operand with full span.
249 pub fn emit_op_u8_span(&mut self, op: Op, operand: u8, line: usize, col: usize) {
250 self.emit_span(op as u8, line, col);
251 self.emit_span(operand, line, col);
252 }
253
254 /// Add a constant to the pool, returning its index.
255 /// Deduplicates string, int, float, and bool constants.
256 pub fn add_constant(&mut self, value: Value) -> u16 {
257 for (i, c) in self.constants.iter().enumerate() {
258 let is_dup = match (&value, c) {
259 (Value::Str(a), Value::Str(b)) => a == b,
260 (Value::Int(a), Value::Int(b)) => a == b,
261 (Value::Float(a), Value::Float(b)) => a.to_bits() == b.to_bits(),
262 (Value::Bool(a), Value::Bool(b)) => a == b,
263 _ => false,
264 };
265 if is_dup {
266 return i as u16;
267 }
268 }
269 self.constants.push(value);
270 (self.constants.len() - 1) as u16
271 }
272
273 /// Emit a constant load instruction.
274 pub fn emit_constant(&mut self, value: Value, line: usize) {
275 let idx = self.add_constant(value);
276 self.emit_op_u16(Op::Constant, idx, line);
277 }
278
279 /// Current code offset.
280 pub fn len(&self) -> usize {
281 self.code.len()
282 }
283
284 /// Returns true if the chunk contains no bytecode.
285 pub fn is_empty(&self) -> bool {
286 self.code.is_empty()
287 }
288
289 /// Emit a jump instruction, returning the offset to patch later.
290 pub fn emit_jump(&mut self, op: Op, line: usize) -> usize {
291 self.emit_op_u16(op, 0xffff, line);
292 self.code.len() - 2 // offset of the u16 placeholder
293 }
294
295 /// Patch a previously emitted jump to target the current position.
296 pub fn patch_jump(&mut self, offset: usize) {
297 let jump = self.code.len() - offset - 2;
298 self.code[offset] = (jump >> 8) as u8;
299 self.code[offset + 1] = (jump & 0xff) as u8;
300 }
301
302 /// Read a u16 operand at the given offset.
303 pub fn read_u16(&self, offset: usize) -> u16 {
304 ((self.code[offset] as u16) << 8) | (self.code[offset + 1] as u16)
305 }
306
307 /// Read a u8 operand at the given offset.
308 pub fn read_u8(&self, offset: usize) -> u8 {
309 self.code[offset]
310 }
311
312 /// Return the total size (opcode + operands) of the instruction at `offset`.
313 pub fn instruction_size(code: &[u8], offset: usize) -> usize {
314 if offset >= code.len() {
315 return 1;
316 }
317 match code[offset] {
318 // 1-byte (no operands)
319 x if x == Op::True as u8
320 || x == Op::False as u8
321 || x == Op::Unit as u8
322 || x == Op::None as u8
323 || x == Op::Add as u8
324 || x == Op::Sub as u8
325 || x == Op::Mul as u8
326 || x == Op::Div as u8
327 || x == Op::Mod as u8
328 || x == Op::Neg as u8
329 || x == Op::BitAnd as u8
330 || x == Op::BitOr as u8
331 || x == Op::BitXor as u8
332 || x == Op::Shl as u8
333 || x == Op::Shr as u8
334 || x == Op::Eq as u8
335 || x == Op::NotEq as u8
336 || x == Op::Lt as u8
337 || x == Op::Gt as u8
338 || x == Op::LtEq as u8
339 || x == Op::GtEq as u8
340 || x == Op::Not as u8
341 || x == Op::Pop as u8
342 || x == Op::Dup as u8
343 || x == Op::GetIndex as u8
344 || x == Op::SetIndex as u8
345 || x == Op::WrapSome as u8
346 || x == Op::WrapOk as u8
347 || x == Op::WrapErr as u8
348 || x == Op::Try as u8
349 || x == Op::PushScope as u8
350 || x == Op::PopScope as u8
351 || x == Op::MatchEnd as u8
352 || x == Op::Return as u8
353 || x == Op::IterInit as u8
354 || x == Op::ListAppend as u8
355 || x == Op::ListExtend as u8
356 || x == Op::DictInsert as u8
357 || x == Op::DictMerge as u8
358 || x == Op::IterDrop as u8 =>
359 {
360 1
361 }
362 // 2-byte (u8 operand)
363 x if x == Op::Call as u8
364 || x == Op::TailCall as u8
365 || x == Op::Pipe as u8
366 || x == Op::BuildRange as u8
367 || x == Op::Slice as u8
368 || x == Op::DefineLocalSlot as u8
369 || x == Op::Print as u8 =>
370 {
371 2
372 }
373 // 3-byte (u16 operand)
374 x if x == Op::Constant as u8
375 || x == Op::And as u8
376 || x == Op::Or as u8
377 || x == Op::GetLocal as u8
378 || x == Op::SetLocal as u8
379 || x == Op::GetGlobal as u8
380 || x == Op::SetGlobal as u8
381 || x == Op::Jump as u8
382 || x == Op::JumpIfFalse as u8
383 || x == Op::Loop as u8
384 || x == Op::BuildList as u8
385 || x == Op::BuildTuple as u8
386 || x == Op::BuildDict as u8
387 || x == Op::GetField as u8
388 || x == Op::SetField as u8
389 || x == Op::Closure as u8
390 || x == Op::BuildFString as u8
391 || x == Op::IterNext as u8
392 || x == Op::GetLocalSlot as u8
393 || x == Op::SetLocalSlot as u8
394 || x == Op::TryBegin as u8
395 || x == Op::TryEnd as u8
396 || x == Op::CheckType as u8 =>
397 {
398 3
399 }
400 // 4-byte (u16 + u8)
401 x if x == Op::DefineLocal as u8 || x == Op::MethodCall as u8 => 4,
402 // 5-byte (u16 + u16)
403 x if x == Op::ConstructStruct as u8 => 5,
404 // 6-byte (u16 + u16 + u8)
405 x if x == Op::ConstructEnum as u8 => 6,
406 // Variable-width: CallNamed: 1(op) + 1(total_args) + 1(named_count) + named_count * 3
407 x if x == Op::CallNamed as u8 => {
408 if offset + 2 < code.len() {
409 let named_count = code[offset + 2] as usize;
410 3 + named_count * 3 // op + total_args + named_count + (position u8 + name u16) * count
411 } else {
412 3
413 }
414 }
415 // Variable-width: MatchBegin (u8 kind + extra operands depending on kind)
416 x if x == Op::MatchBegin as u8 => {
417 if offset + 1 < code.len() {
418 match code[offset + 1] {
419 4 => 3, // Tuple: kind + u8 length
420 5 => 4, // List: kind + u8 length + u8 has_rest
421 _ => 2, // Some/Ok/Err: just kind
422 }
423 } else {
424 2
425 }
426 }
427 // Variable-width: MatchArm (u8 kind, then u8 index for kinds 4/5)
428 x if x == Op::MatchArm as u8 => {
429 if offset + 1 < code.len() {
430 let kind = code[offset + 1];
431 if kind == 4 || kind == 5 {
432 3
433 } else {
434 2
435 }
436 } else {
437 2
438 }
439 }
440 // Unknown — treat as 1 to avoid infinite loops
441 _ => 1,
442 }
443 }
444
445 /// Peephole optimization pass: removes dead instruction sequences and
446 /// adjusts jump targets accordingly.
447 /// - `Not; Not` → remove both (double negation)
448 /// - `Neg; Neg` → remove both (double arithmetic negation)
449 /// - `Jump 0` → remove (nop jump to next instruction)
450 /// - Pure push + `Pop` → remove both (dead value)
451 #[cfg(feature = "optimize")]
452 pub fn peephole_optimize(&mut self) {
453 let old_len = self.code.len();
454 if old_len == 0 {
455 return;
456 }
457 let mut dead = vec![false; old_len];
458 let mut changed = true;
459 while changed {
460 changed = false;
461 let mut i = 0;
462 while i < old_len {
463 if dead[i] {
464 i += 1;
465 continue;
466 }
467 let size = Self::instruction_size(&self.code, i);
468 // Find next live instruction
469 let mut next = i + size;
470 while next < old_len && dead[next] {
471 next += 1;
472 }
473 if next >= old_len {
474 break;
475 }
476 let next_size = Self::instruction_size(&self.code, next);
477
478 // Pattern: Not; Not → remove both
479 if self.code[i] == Op::Not as u8 && self.code[next] == Op::Not as u8 {
480 dead[i] = true;
481 dead[next] = true;
482 changed = true;
483 i = next + next_size;
484 continue;
485 }
486
487 // Pattern: Neg; Neg → remove both
488 if self.code[i] == Op::Neg as u8 && self.code[next] == Op::Neg as u8 {
489 dead[i] = true;
490 dead[next] = true;
491 changed = true;
492 i = next + next_size;
493 continue;
494 }
495
496 // Pattern: Jump 0 → remove (jump to next instruction)
497 if self.code[i] == Op::Jump as u8 && size == 3 {
498 let target = self.read_u16(i + 1);
499 if target == 0 {
500 for item in dead.iter_mut().skip(i).take(3) {
501 *item = true;
502 }
503 changed = true;
504 i = next;
505 continue;
506 }
507 }
508
509 // Pattern: pure push followed by Pop → remove both
510 if self.code[next] == Op::Pop as u8 {
511 let b = self.code[i];
512 let is_pure = b == Op::True as u8
513 || b == Op::False as u8
514 || b == Op::Unit as u8
515 || b == Op::None as u8
516 || b == Op::Constant as u8
517 || b == Op::Dup as u8
518 || b == Op::GetLocalSlot as u8;
519 if is_pure {
520 for item in dead.iter_mut().skip(i).take(size) {
521 *item = true;
522 }
523 dead[next] = true;
524 changed = true;
525 i = next + 1;
526 continue;
527 }
528 }
529
530 i = next;
531 }
532 }
533
534 self.compact_dead(&dead);
535 }
536
537 /// Remove dead bytes and adjust jump targets.
538 #[cfg(feature = "optimize")]
539 fn compact_dead(&mut self, dead: &[bool]) {
540 let old_len = self.code.len();
541
542 // Build offset map: old position → new position
543 let mut offset_map = vec![0usize; old_len + 1];
544 let mut new_pos = 0;
545 for old_pos in 0..old_len {
546 offset_map[old_pos] = new_pos;
547 if !dead[old_pos] {
548 new_pos += 1;
549 }
550 }
551 offset_map[old_len] = new_pos;
552
553 if new_pos == old_len {
554 return;
555 } // nothing to compact
556
557 // Adjust jump targets (walk instruction boundaries on live code)
558 let mut i = 0;
559 while i < old_len {
560 if dead[i] {
561 i += 1;
562 continue;
563 }
564 let op = self.code[i];
565 let size = Self::instruction_size(&self.code, i);
566
567 // Forward jumps: offset is relative to the byte AFTER the instruction
568 if (op == Op::Jump as u8
569 || op == Op::JumpIfFalse as u8
570 || op == Op::And as u8
571 || op == Op::Or as u8
572 || op == Op::IterNext as u8)
573 && size == 3
574 {
575 let old_offset = self.read_u16(i + 1) as usize;
576 let old_target = i + 3 + old_offset;
577 let new_instr_end = offset_map[i] + 3; // new position of byte after this instr
578 let new_target = if old_target <= old_len {
579 offset_map[old_target]
580 } else {
581 offset_map[old_len]
582 };
583 let new_offset = new_target.saturating_sub(new_instr_end);
584 self.code[i + 1] = (new_offset >> 8) as u8;
585 self.code[i + 2] = (new_offset & 0xff) as u8;
586 }
587
588 // Backward jump: Loop
589 if op == Op::Loop as u8 && size == 3 {
590 let old_offset = self.read_u16(i + 1) as usize;
591 let old_target = (i + 3).wrapping_sub(old_offset);
592 let new_instr_end = offset_map[i] + 3;
593 let new_target = if old_target <= old_len {
594 offset_map[old_target]
595 } else {
596 0
597 };
598 let new_offset = new_instr_end.saturating_sub(new_target);
599 self.code[i + 1] = (new_offset >> 8) as u8;
600 self.code[i + 2] = (new_offset & 0xff) as u8;
601 }
602
603 i += size;
604 }
605
606 // Compact: remove dead bytes
607 let mut new_code = Vec::with_capacity(new_pos);
608 let mut new_lines = Vec::with_capacity(new_pos);
609 let mut new_cols = Vec::with_capacity(new_pos);
610 for (j, &is_dead) in dead.iter().enumerate().take(old_len) {
611 if !is_dead {
612 new_code.push(self.code[j]);
613 new_lines.push(self.lines[j]);
614 new_cols.push(self.cols[j]);
615 }
616 }
617 self.code = new_code;
618 self.lines = new_lines;
619 self.cols = new_cols;
620 }
621}
622
623/// A compiled function prototype (stored in the constant pool).
624#[derive(Debug, Clone)]
625pub struct FnProto {
626 pub name: String,
627 pub arity: usize,
628 pub chunk: Chunk,
629 pub param_names: Vec<String>,
630 pub has_defaults: Vec<bool>,
631}