use crate::errors::QalaError;
use crate::opcode::Opcode;
use crate::span::Span;
use crate::value::ConstValue;
use std::fmt::Write as _;
#[derive(Debug, Clone, Default)]
pub struct Chunk {
pub code: Vec<u8>,
pub constants: Vec<ConstValue>,
pub source_lines: Vec<u32>,
pub local_names: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct StructInfo {
pub name: String,
pub field_count: u16,
}
#[derive(Debug, Clone, Default)]
pub struct Program {
pub chunks: Vec<Chunk>,
pub main_index: usize,
pub globals: Vec<String>,
pub fn_names: Vec<String>,
pub enum_variant_names: Vec<(String, String)>,
pub structs: Vec<StructInfo>,
}
impl Chunk {
pub fn new() -> Self {
Self::default()
}
pub fn write_op(&mut self, op: Opcode, line: u32) {
self.code.push(op as u8);
self.source_lines.push(line);
}
pub fn write_u16(&mut self, v: u16, line: u32) {
let bytes = v.to_le_bytes();
self.code.push(bytes[0]);
self.code.push(bytes[1]);
self.source_lines.push(line);
self.source_lines.push(line);
}
pub fn write_i16(&mut self, v: i16, line: u32) {
self.write_u16(v as u16, line);
}
pub fn read_u16(&self, off: usize) -> u16 {
u16::from_le_bytes([self.code[off], self.code[off + 1]])
}
pub fn read_i16(&self, off: usize) -> i16 {
i16::from_le_bytes([self.code[off], self.code[off + 1]])
}
pub fn add_constant(&mut self, v: ConstValue) -> u16 {
let idx = self.constants.len();
debug_assert!(
idx <= u16::MAX as usize,
"constant pool overflow -- chunk has >65536 constants"
);
self.constants.push(v);
idx as u16
}
pub fn last_const(&self) -> Option<(usize, u16)> {
let n = self.code.len();
if n < 3 {
return None;
}
if self.code[n - 3] != (Opcode::Const as u8) {
return None;
}
let idx = u16::from_le_bytes([self.code[n - 2], self.code[n - 1]]);
Some((n - 3, idx))
}
pub fn second_to_last_const(&self) -> Option<(usize, u16)> {
let (off1, _) = self.last_const()?;
if off1 < 3 {
return None;
}
if self.code[off1 - 3] != (Opcode::Const as u8) {
return None;
}
let idx = u16::from_le_bytes([self.code[off1 - 2], self.code[off1 - 1]]);
Some((off1 - 3, idx))
}
pub fn rewind_last_const(&mut self) -> Option<u16> {
let (off, idx) = self.last_const()?;
self.code.truncate(off);
self.source_lines.truncate(off);
Some(idx)
}
pub fn emit_jump(&mut self, op: Opcode, line: u32) -> usize {
self.write_op(op, line);
let pos = self.code.len();
self.write_i16(i16::MAX, line);
pos
}
pub fn patch_jump(&mut self, pos: usize) -> Result<(), QalaError> {
let target = self.code.len();
let jump = target as isize - pos as isize - 2;
if jump < i16::MIN as isize || jump > i16::MAX as isize {
return Err(QalaError::Parse {
span: Span::new(0, 0),
message: format!("branch too far -- chunk exceeds i16 jump range ({jump} bytes)"),
});
}
let bytes = (jump as i16).to_le_bytes();
self.code[pos] = bytes[0];
self.code[pos + 1] = bytes[1];
Ok(())
}
pub fn emit_loop(&mut self, loop_start: usize, line: u32) -> Result<(), QalaError> {
let pos = self.code.len();
let offset = loop_start as isize - pos as isize - 3;
if offset < i16::MIN as isize || offset > i16::MAX as isize {
return Err(QalaError::Parse {
span: Span::new(0, 0),
message: format!("branch too far -- chunk exceeds i16 jump range ({offset} bytes)"),
});
}
self.write_op(Opcode::Jump, line);
self.write_i16(offset as i16, line);
Ok(())
}
pub fn disassemble(&self, program: &Program) -> String {
let mut out = String::new();
let mut off = 0usize;
while off < self.code.len() {
let byte = self.code[off];
let line = self.source_lines[off];
match Opcode::from_u8(byte) {
None => {
let _ = writeln!(out, "{off:>4} <unknown:{byte:#x}> ; line {line}");
off += 1;
continue;
}
Some(op) => {
let operand = self.operand_string(op, off, program);
let _ = writeln!(
out,
"{off:>4} {name:<9} {operand:<24} ; line {line}",
name = op.name(),
);
off += 1 + op.operand_bytes() as usize;
}
}
}
out
}
fn operand_string(&self, op: Opcode, off: usize, program: &Program) -> String {
match op {
Opcode::Const => {
let idx = self.read_u16(off + 1);
let val = self
.constants
.get(idx as usize)
.map(|v| v.to_string())
.unwrap_or_else(|| "?".to_string());
format!("#{idx} {val}")
}
Opcode::GetLocal | Opcode::SetLocal => {
let slot = self.read_u16(off + 1);
format!("{slot}")
}
Opcode::GetGlobal | Opcode::SetGlobal => {
let idx = self.read_u16(off + 1);
let name = program
.globals
.get(idx as usize)
.map(String::as_str)
.unwrap_or("?");
format!("#{idx}({name})")
}
Opcode::Jump | Opcode::JumpIfFalse | Opcode::JumpIfTrue => {
let offset = self.read_i16(off + 1);
let base = (off + 1 + 2) as isize;
let target = base + offset as isize;
format!("{offset:+} (-> {target})")
}
Opcode::Call => {
let fn_id = self.read_u16(off + 1);
let argc = self.code[off + 3];
let name = program
.fn_names
.get(fn_id as usize)
.map(String::as_str)
.unwrap_or("?");
format!("#{fn_id}({name})/{argc}")
}
Opcode::MakeArray | Opcode::MakeTuple => {
let count = self.read_u16(off + 1);
format!("{count}")
}
Opcode::MakeStruct => {
let id = self.read_u16(off + 1);
let name = program
.structs
.get(id as usize)
.map(|s| s.name.as_str())
.unwrap_or("?");
format!("#{id}({name})")
}
Opcode::MakeEnumVariant => {
let variant_id = self.read_u16(off + 1);
let payload = self.code[off + 3];
let (enum_name, variant_name) = program
.enum_variant_names
.get(variant_id as usize)
.map(|(e, v)| (e.as_str(), v.as_str()))
.unwrap_or(("?", "?"));
format!("#{variant_id}({enum_name}::{variant_name})/{payload}")
}
Opcode::Field => {
let idx = self.read_u16(off + 1);
format!("#{idx}")
}
Opcode::ConcatN => {
let count = self.read_u16(off + 1);
format!("{count}")
}
Opcode::MatchVariant => {
let variant_id = self.read_u16(off + 1);
let offset = self.read_i16(off + 3);
let (enum_name, variant_name) = program
.enum_variant_names
.get(variant_id as usize)
.map(|(e, v)| (e.as_str(), v.as_str()))
.unwrap_or(("?", "?"));
let base = (off + 1 + 4) as isize;
let target = base + offset as isize;
format!("#{variant_id}({enum_name}::{variant_name}) miss-> {target}")
}
Opcode::Pop
| Opcode::Dup
| Opcode::Add
| Opcode::Sub
| Opcode::Mul
| Opcode::Div
| Opcode::Mod
| Opcode::Neg
| Opcode::FAdd
| Opcode::FSub
| Opcode::FMul
| Opcode::FDiv
| Opcode::FNeg
| Opcode::Eq
| Opcode::Ne
| Opcode::Lt
| Opcode::Le
| Opcode::Gt
| Opcode::Ge
| Opcode::FEq
| Opcode::FNe
| Opcode::FLt
| Opcode::FLe
| Opcode::FGt
| Opcode::FGe
| Opcode::Not
| Opcode::Return
| Opcode::Index
| Opcode::Len
| Opcode::ToStr
| Opcode::Halt => String::new(),
}
}
}
impl Program {
pub fn new() -> Self {
Self::default()
}
pub fn disassemble(&self) -> String {
let mut out = String::new();
for (i, chunk) in self.chunks.iter().enumerate() {
let name = self.fn_names.get(i).map(String::as_str).unwrap_or("?");
let _ = writeln!(out, "== chunk: {name} (fn_id {i}) ==");
out.push_str(&chunk.disassemble(self));
if i + 1 < self.chunks.len() {
out.push('\n');
}
}
out
}
pub fn optimize(&mut self) {
for chunk in &mut self.chunks {
crate::optimizer::peephole(chunk);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn chunk_with_const(v: ConstValue) -> Chunk {
let mut c = Chunk::new();
let idx = c.add_constant(v);
c.write_op(Opcode::Const, 1);
c.write_u16(idx, 1);
c
}
fn program_with_chunk(chunk: Chunk) -> Program {
let mut p = Program::new();
p.chunks.push(chunk);
p.fn_names.push("main".to_string());
p
}
#[test]
fn code_and_source_lines_stay_in_lockstep_after_mixed_writes() {
let mut c = Chunk::new();
c.write_op(Opcode::Const, 1);
c.write_u16(0, 1);
c.write_op(Opcode::Pop, 2);
c.write_op(Opcode::Const, 3);
c.write_u16(1, 3);
c.write_op(Opcode::Halt, 4);
assert_eq!(c.code.len(), 8);
assert_eq!(c.code.len(), c.source_lines.len());
assert_eq!(c.source_lines, vec![1, 1, 1, 2, 3, 3, 3, 4]);
}
#[test]
fn write_u16_then_read_u16_is_a_round_trip_with_little_endian_bytes() {
for v in [0u16, 1, 0xFF, 0x100, 0x1234, 0xFFFF] {
let mut c = Chunk::new();
c.write_u16(v, 1);
assert_eq!(c.read_u16(0), v, "round-trip failed for {v:#x}");
}
let mut c = Chunk::new();
c.write_u16(0x1234, 1);
assert_eq!(c.code, vec![0x34, 0x12]);
}
#[test]
fn write_i16_then_read_i16_is_a_round_trip_with_twos_complement() {
for v in [0i16, 1, -1, i16::MAX, i16::MIN, -32, 32] {
let mut c = Chunk::new();
c.write_i16(v, 1);
assert_eq!(c.read_i16(0), v, "round-trip failed for {v}");
}
}
#[test]
fn add_constant_returns_sequential_indices_without_dedupe() {
let mut c = Chunk::new();
assert_eq!(c.add_constant(ConstValue::I64(1)), 0);
assert_eq!(c.add_constant(ConstValue::I64(2)), 1);
assert_eq!(c.add_constant(ConstValue::Str("x".to_string())), 2);
assert_eq!(c.add_constant(ConstValue::I64(1)), 3);
assert_eq!(c.constants.len(), 4);
}
#[test]
fn last_const_returns_none_when_trailing_op_is_not_const() {
let mut c = Chunk::new();
c.write_op(Opcode::Const, 1);
c.write_u16(7, 1);
c.write_op(Opcode::Pop, 2);
assert_eq!(c.last_const(), None);
}
#[test]
fn last_const_returns_offset_and_index_for_trailing_const_and_rewind_clears_it() {
let mut c = Chunk::new();
c.write_op(Opcode::Const, 1);
c.write_u16(7, 1);
assert_eq!(c.last_const(), Some((0, 7)));
let idx = c
.rewind_last_const()
.expect("rewind on a trailing CONST returns Some");
assert_eq!(idx, 7);
assert_eq!(c.code.len(), 0);
assert_eq!(c.source_lines.len(), 0);
}
#[test]
fn second_to_last_const_peeks_the_const_before_the_trailing_const() {
let mut c = Chunk::new();
c.write_op(Opcode::Const, 1);
c.write_u16(0, 1);
c.write_op(Opcode::Const, 1);
c.write_u16(1, 1);
c.write_op(Opcode::Const, 1);
c.write_u16(2, 1);
assert_eq!(c.last_const(), Some((6, 2)));
assert_eq!(c.second_to_last_const(), Some((3, 1)));
}
#[test]
fn second_to_last_const_is_none_when_only_one_const_is_in_the_chunk() {
let mut c = Chunk::new();
c.write_op(Opcode::Const, 1);
c.write_u16(0, 1);
assert_eq!(c.last_const(), Some((0, 0)));
assert_eq!(c.second_to_last_const(), None);
}
#[test]
fn lockstep_invariant_survives_rewind_last_const() {
let mut c = Chunk::new();
c.write_op(Opcode::Const, 1);
c.write_u16(0, 1);
c.write_op(Opcode::Const, 2);
c.write_u16(1, 2);
assert_eq!(c.code.len(), c.source_lines.len());
let idx = c
.rewind_last_const()
.expect("rewind on a trailing CONST returns Some");
assert_eq!(idx, 1);
assert_eq!(c.code.len(), 3);
assert_eq!(c.source_lines.len(), 3);
assert_eq!(c.code.len(), c.source_lines.len());
}
#[test]
fn rewind_last_const_returns_none_when_no_trailing_const() {
let mut c = Chunk::new();
assert_eq!(c.rewind_last_const(), None);
c.write_op(Opcode::Pop, 1);
assert_eq!(c.rewind_last_const(), None);
assert_eq!(c.code.len(), 1, "code must be unchanged after None return");
}
#[test]
fn emit_jump_returns_the_operand_offset_and_writes_i16_max_placeholder() {
let mut c = Chunk::new();
let pos = c.emit_jump(Opcode::JumpIfFalse, 5);
assert_eq!(pos, 1);
assert_eq!(c.read_i16(pos), i16::MAX);
assert_eq!(c.code.len(), 3);
assert_eq!(c.source_lines, vec![5, 5, 5]);
}
#[test]
fn patch_jump_writes_the_offset_to_the_current_end_of_code() {
let mut c = Chunk::new();
let pos = c.emit_jump(Opcode::JumpIfFalse, 5);
c.write_op(Opcode::Const, 6);
c.write_u16(0, 6);
c.write_op(Opcode::Pop, 6);
c.patch_jump(pos).unwrap();
assert_eq!(c.read_i16(pos), 4);
}
#[test]
fn patch_jump_returns_branch_too_far_when_offset_exceeds_i16_max() {
let mut c = Chunk::new();
let len = (i16::MAX as usize) + 100;
c.code.resize(len, 0);
c.source_lines.resize(len, 0);
let pos = 0usize;
match c.patch_jump(pos) {
Err(QalaError::Parse { message, .. }) => {
assert!(
message.contains("branch too far"),
"expected 'branch too far' in message, got: {message:?}"
);
}
other => panic!("expected QalaError::Parse, got: {other:?}"),
}
}
#[test]
fn emit_loop_writes_a_negative_offset_that_lands_at_loop_start() {
let mut c = Chunk::new();
let loop_start = 0usize;
c.write_op(Opcode::Const, 1);
c.write_u16(0, 1); c.write_op(Opcode::Const, 1);
c.write_u16(1, 1); c.write_op(Opcode::Const, 1);
c.write_u16(2, 1); c.write_op(Opcode::Pop, 1); assert_eq!(c.code.len(), 10);
c.emit_loop(loop_start, 2).unwrap();
let operand_offset = 11; assert_eq!(c.read_i16(operand_offset), -13);
}
#[test]
fn emit_loop_returns_branch_too_far_when_backward_distance_exceeds_i16_min() {
let mut c = Chunk::new();
let len = (i16::MAX as usize) + 100;
c.code.resize(len, 0);
c.source_lines.resize(len, 0);
match c.emit_loop(0, 1) {
Err(QalaError::Parse { message, .. }) => {
assert!(
message.contains("branch too far"),
"expected 'branch too far' in message, got: {message:?}"
);
}
other => panic!("expected QalaError::Parse, got: {other:?}"),
}
}
#[test]
fn disassemble_of_an_empty_chunk_produces_an_empty_string() {
let c = Chunk::new();
let p = Program::new();
assert_eq!(c.disassemble(&p), "");
}
#[test]
fn disassemble_of_one_const_instruction_matches_the_locked_format() {
let c = chunk_with_const(ConstValue::I64(42));
let p = program_with_chunk(c.clone());
let out = c.disassemble(&p);
assert_eq!(out, " 0 CONST #0 42 ; line 1\n");
}
#[test]
fn disassemble_renders_every_operand_kind_per_the_locked_table() {
let mut p = Program::new();
p.globals.push("g".to_string());
p.fn_names.push("main".to_string());
p.fn_names.push("callee".to_string());
p.enum_variant_names
.push(("Shape".to_string(), "Circle".to_string()));
let mut c = Chunk::new();
let const_idx = c.add_constant(ConstValue::I64(7));
c.write_op(Opcode::Const, 1);
c.write_u16(const_idx, 1);
c.write_op(Opcode::GetLocal, 2);
c.write_u16(0, 2);
c.write_op(Opcode::GetGlobal, 3);
c.write_u16(0, 3);
c.write_op(Opcode::Jump, 4);
c.write_i16(5, 4);
c.write_op(Opcode::Call, 5);
c.write_u16(1, 5);
c.code.push(2);
c.source_lines.push(5);
c.write_op(Opcode::MakeArray, 6);
c.write_u16(3, 6);
c.write_op(Opcode::MakeEnumVariant, 7);
c.write_u16(0, 7);
c.code.push(1);
c.source_lines.push(7);
c.write_op(Opcode::Field, 8);
c.write_u16(2, 8);
c.write_op(Opcode::ConcatN, 9);
c.write_u16(4, 9);
c.write_op(Opcode::MatchVariant, 10);
c.write_u16(0, 10);
c.write_i16(8, 10);
p.chunks.push(c.clone());
let out = c.disassemble(&p);
assert!(
out.contains("CONST #0 7"),
"missing CONST line in:\n{out}"
);
assert!(
out.contains("GET_LOCAL 0"),
"missing GET_LOCAL line in:\n{out}"
);
assert!(
out.contains("GET_GLOBAL #0(g)"),
"missing GET_GLOBAL line in:\n{out}"
);
assert!(
out.contains("JUMP +5"),
"missing JUMP signed-offset rendering in:\n{out}"
);
assert!(
out.contains("CALL #1(callee)/2"),
"missing CALL line in:\n{out}"
);
assert!(
out.contains("MAKE_ARRAY 3"),
"missing MAKE_ARRAY line in:\n{out}"
);
assert!(
out.contains("MAKE_ENUM_VARIANT #0(Shape::Circle)/1"),
"missing MAKE_ENUM_VARIANT line in:\n{out}"
);
assert!(
out.contains("FIELD #2"),
"missing FIELD line in:\n{out}"
);
assert!(
out.contains("CONCAT_N 4"),
"missing CONCAT_N line in:\n{out}"
);
assert!(
out.contains("MATCH_VARIANT #0(Shape::Circle) miss-> "),
"missing MATCH_VARIANT line in:\n{out}"
);
}
#[test]
fn disassemble_renders_make_struct_with_its_struct_name() {
let mut p = Program::new();
p.fn_names.push("main".to_string());
p.structs.push(StructInfo {
name: "Point".to_string(),
field_count: 2,
});
let mut c = Chunk::new();
c.write_op(Opcode::MakeStruct, 1);
c.write_u16(0, 1);
c.write_op(Opcode::MakeStruct, 2);
c.write_u16(9, 2);
p.chunks.push(c.clone());
let out = c.disassemble(&p);
assert!(
out.contains("MAKE_STRUCT #0(Point)"),
"missing MAKE_STRUCT struct-name rendering in:\n{out}"
);
assert!(
out.contains("MAKE_STRUCT #9(?)"),
"missing MAKE_STRUCT bad-id fallback in:\n{out}"
);
}
#[test]
fn disassemble_is_byte_identical_when_called_twice_on_the_same_chunk() {
let mut p = Program::new();
p.fn_names.push("main".to_string());
p.fn_names.push("helper".to_string());
let mut c = Chunk::new();
let i0 = c.add_constant(ConstValue::I64(1));
let i1 = c.add_constant(ConstValue::Str("hi".to_string()));
let i2 = c.add_constant(ConstValue::F64(2.5));
c.write_op(Opcode::Const, 1);
c.write_u16(i0, 1);
c.write_op(Opcode::Const, 1);
c.write_u16(i1, 1);
c.write_op(Opcode::Const, 1);
c.write_u16(i2, 1);
c.write_op(Opcode::Add, 2);
c.write_op(Opcode::Pop, 2);
c.write_op(Opcode::GetLocal, 3);
c.write_u16(0, 3);
c.write_op(Opcode::Call, 4);
c.write_u16(1, 4);
c.code.push(1);
c.source_lines.push(4);
c.write_op(Opcode::Return, 5);
p.chunks.push(c.clone());
let first = c.disassemble(&p);
let second = c.disassemble(&p);
assert_eq!(first, second, "disassembler is non-deterministic");
assert!(first.lines().count() >= 8);
}
#[test]
fn disassemble_produces_one_line_per_opcode_for_every_operand_width() {
let mut p = Program::new();
p.fn_names.push("main".to_string());
p.fn_names.push("f".to_string());
p.enum_variant_names
.push(("E".to_string(), "A".to_string()));
let mut c = Chunk::new();
c.write_op(Opcode::Pop, 1);
c.write_op(Opcode::Const, 2);
c.write_u16(0, 2);
c.write_op(Opcode::Call, 3);
c.write_u16(1, 3);
c.code.push(0);
c.source_lines.push(3);
c.write_op(Opcode::MatchVariant, 4);
c.write_u16(0, 4);
c.write_i16(0, 4);
let _ = c.add_constant(ConstValue::I64(0));
p.chunks.push(c.clone());
let out = c.disassemble(&p);
assert_eq!(out.matches('\n').count(), 4, "wrong line count in:\n{out}");
}
#[test]
fn program_optimize_runs_peephole_over_every_chunk() {
let mut p = Program::new();
p.fn_names.push("main".to_string());
p.fn_names.push("helper".to_string());
let mut a = Chunk::new();
let idx_a = a.add_constant(ConstValue::I64(0));
a.write_op(Opcode::Const, 1);
a.write_u16(idx_a, 1);
a.write_op(Opcode::Pop, 1);
a.write_op(Opcode::Return, 1);
let mut b = Chunk::new();
let idx_b = b.add_constant(ConstValue::I64(1));
b.write_op(Opcode::Const, 1);
b.write_u16(idx_b, 1);
b.write_op(Opcode::Pop, 1);
b.write_op(Opcode::Return, 1);
p.chunks.push(a);
p.chunks.push(b);
p.optimize();
assert_eq!(p.chunks[0].code, vec![Opcode::Return as u8]);
assert_eq!(p.chunks[1].code, vec![Opcode::Return as u8]);
assert_eq!(p.chunks[0].code.len(), p.chunks[0].source_lines.len());
assert_eq!(p.chunks[1].code.len(), p.chunks[1].source_lines.len());
}
#[test]
fn program_optimize_on_an_empty_program_does_nothing() {
let mut p = Program::new();
p.optimize();
assert!(p.chunks.is_empty());
}
#[test]
fn program_optimize_is_idempotent() {
let mut p = Program::new();
p.fn_names.push("main".to_string());
let mut a = Chunk::new();
let idx = a.add_constant(ConstValue::I64(0));
a.write_op(Opcode::Const, 1);
a.write_u16(idx, 1);
a.write_op(Opcode::Pop, 1);
a.write_op(Opcode::Dup, 2);
a.write_op(Opcode::Pop, 2);
a.write_op(Opcode::Return, 3);
p.chunks.push(a);
p.optimize();
let after_first = p.chunks[0].code.clone();
p.optimize();
assert_eq!(
p.chunks[0].code, after_first,
"second optimize changed bytes"
);
}
#[test]
fn program_disassemble_emits_a_header_per_chunk_and_concatenates_bodies() {
let mut p = Program::new();
p.fn_names.push("main".to_string());
p.fn_names.push("helper".to_string());
let mut a = chunk_with_const(ConstValue::I64(0));
a.write_op(Opcode::Return, 1);
let mut b = chunk_with_const(ConstValue::I64(1));
b.write_op(Opcode::Return, 1);
p.chunks.push(a);
p.chunks.push(b);
let out = p.disassemble();
assert!(out.contains("== chunk: main (fn_id 0) =="));
assert!(out.contains("== chunk: helper (fn_id 1) =="));
let main_idx = out.find("== chunk: main").unwrap();
let helper_idx = out.find("== chunk: helper").unwrap();
assert!(main_idx < helper_idx);
assert_eq!(p.disassemble(), p.disassemble());
}
}