#![allow(missing_docs)]
use crate::eval::Value;
use crate::utils::SymbolId;
use std::collections::HashMap;
use std::fmt;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[repr(u8)]
pub enum OpCode {
LoadConst = 0x00, LoadLocal = 0x01, LoadGlobal = 0x02, StoreLocal = 0x03, StoreGlobal = 0x04, Pop = 0x05, Dup = 0x06,
Add = 0x10, Sub = 0x11, Mul = 0x12, Div = 0x13, Mod = 0x14, Neg = 0x15,
Eq = 0x20, Ne = 0x21, Lt = 0x22, Le = 0x23, Gt = 0x24, Ge = 0x25,
Not = 0x30, And = 0x31, Or = 0x32,
Jump = 0x40, JumpIfFalse = 0x41, JumpIfTrue = 0x42, Call = 0x43, TailCall = 0x44, Return = 0x45,
Cons = 0x50, Car = 0x51, Cdr = 0x52, IsNull = 0x53, IsPair = 0x54,
MakeVector = 0x60, VectorRef = 0x61, VectorSet = 0x62, VectorLength = 0x63,
IsNumber = 0x70, IsString = 0x71, IsSymbol = 0x72, IsBoolean = 0x73, IsProcedure = 0x74,
MakeClosure = 0x80, Apply = 0x81, CallCC = 0x82, Yield = 0x83,
Debug = 0xF0, Profile = 0xF1,
Halt = 0xFF, }
#[derive(Debug, Clone, PartialEq)]
pub enum Operand {
None,
U8(u8),
U16(u16),
U32(u32),
ConstIndex(u32),
LocalIndex(u16),
JumpOffset(i32),
Symbol(SymbolId),
}
#[derive(Debug, Clone, PartialEq)]
pub struct Instruction {
pub opcode: OpCode,
pub operand: Operand,
pub source_location: Option<SourceLocation>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct SourceLocation {
pub line: u32,
pub column: u32,
pub filename: Option<String>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ConstantValue {
Value(Value),
String(String),
Number(f64),
Boolean(bool),
Symbol(SymbolId),
Bytecode(Vec<Instruction>),
}
impl std::hash::Hash for ConstantValue {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
use std::mem;
match self {
ConstantValue::Value(v) => {
0u8.hash(state);
let ptr = v as *const Value as usize;
ptr.hash(state);
}
ConstantValue::String(s) => {
1u8.hash(state);
s.hash(state);
}
ConstantValue::Number(n) => {
2u8.hash(state);
n.to_bits().hash(state);
}
ConstantValue::Boolean(b) => {
3u8.hash(state);
b.hash(state);
}
ConstantValue::Symbol(s) => {
4u8.hash(state);
s.hash(state);
}
ConstantValue::Bytecode(bc) => {
5u8.hash(state);
bc.len().hash(state);
if !bc.is_empty() {
mem::discriminant(&bc[0].opcode).hash(state);
}
}
}
}
}
impl Eq for ConstantValue {}
#[derive(Debug, Clone)]
pub struct ConstantPool {
constants: Vec<ConstantValue>,
constant_map: HashMap<ConstantValue, u32>,
}
impl ConstantPool {
pub fn new() -> Self {
Self {
constants: Vec::new(),
constant_map: HashMap::new(),
}
}
pub fn add_constant(&mut self, value: ConstantValue) -> u32 {
if let Some(&index) = self.constant_map.get(&value) {
return index;
}
let index = self.constants.len() as u32;
self.constants.push(value.clone());
self.constant_map.insert(value, index);
index
}
pub fn get_constant(&self, index: u32) -> Option<&ConstantValue> {
self.constants.get(index as usize)
}
pub fn len(&self) -> usize {
self.constants.len()
}
pub fn is_empty(&self) -> bool {
self.constants.is_empty()
}
pub fn clear(&mut self) {
self.constants.clear();
self.constant_map.clear();
}
pub fn iter(&self) -> impl Iterator<Item = (u32, &ConstantValue)> {
self.constants.iter().enumerate().map(|(i, v)| (i as u32, v))
}
pub fn memory_usage(&self) -> usize {
std::mem::size_of::<Self>() +
self.constants.capacity() * std::mem::size_of::<ConstantValue>() +
self.constant_map.capacity() * (std::mem::size_of::<ConstantValue>() + std::mem::size_of::<u32>())
}
}
impl Default for ConstantPool {
fn default() -> Self {
Self::new()
}
}
impl Instruction {
pub fn new(opcode: OpCode) -> Self {
Self {
opcode,
operand: Operand::None,
source_location: None,
}
}
pub fn with_operand(opcode: OpCode, operand: Operand) -> Self {
Self {
opcode,
operand,
source_location: None,
}
}
pub fn with_location(opcode: OpCode, operand: Operand, location: SourceLocation) -> Self {
Self {
opcode,
operand,
source_location: Some(location),
}
}
pub fn encoded_size(&self) -> usize {
1 + match &self.operand {
Operand::None => 0,
Operand::U8(_) => 1,
Operand::U16(_) => 2,
Operand::U32(_) => 4,
Operand::ConstIndex(_) => 4,
Operand::LocalIndex(_) => 2,
Operand::JumpOffset(_) => 4,
Operand::Symbol(_) => 8, }
}
pub fn is_removable(&self) -> bool {
match self.opcode {
OpCode::Debug | OpCode::Profile => true,
OpCode::Pop => true, _ => false,
}
}
pub fn is_control_flow(&self) -> bool {
matches!(self.opcode,
OpCode::Jump | OpCode::JumpIfFalse | OpCode::JumpIfTrue |
OpCode::Call | OpCode::TailCall | OpCode::Return |
OpCode::CallCC | OpCode::Yield
)
}
pub fn is_terminator(&self) -> bool {
matches!(self.opcode,
OpCode::Jump | OpCode::Return | OpCode::Halt | OpCode::Yield
)
}
}
impl fmt::Display for Instruction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.operand {
Operand::None => write!(f, "{:?}", self.opcode),
operand => write!(f, "{:?} {:?}", self.opcode, operand),
}
}
}
impl fmt::Display for OpCode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let name = match self {
OpCode::LoadConst => "LOAD_CONST",
OpCode::LoadLocal => "LOAD_LOCAL",
OpCode::LoadGlobal => "LOAD_GLOBAL",
OpCode::StoreLocal => "STORE_LOCAL",
OpCode::StoreGlobal => "STORE_GLOBAL",
OpCode::Pop => "POP",
OpCode::Dup => "DUP",
OpCode::Add => "ADD",
OpCode::Sub => "SUB",
OpCode::Mul => "MUL",
OpCode::Div => "DIV",
OpCode::Mod => "MOD",
OpCode::Neg => "NEG",
OpCode::Eq => "EQ",
OpCode::Ne => "NE",
OpCode::Lt => "LT",
OpCode::Le => "LE",
OpCode::Gt => "GT",
OpCode::Ge => "GE",
OpCode::Not => "NOT",
OpCode::And => "AND",
OpCode::Or => "OR",
OpCode::Jump => "JUMP",
OpCode::JumpIfFalse => "JUMP_IF_FALSE",
OpCode::JumpIfTrue => "JUMP_IF_TRUE",
OpCode::Call => "CALL",
OpCode::TailCall => "TAIL_CALL",
OpCode::Return => "RETURN",
OpCode::Cons => "CONS",
OpCode::Car => "CAR",
OpCode::Cdr => "CDR",
OpCode::IsNull => "IS_NULL",
OpCode::IsPair => "IS_PAIR",
OpCode::MakeVector => "MAKE_VECTOR",
OpCode::VectorRef => "VECTOR_REF",
OpCode::VectorSet => "VECTOR_SET",
OpCode::VectorLength => "VECTOR_LENGTH",
OpCode::IsNumber => "IS_NUMBER",
OpCode::IsString => "IS_STRING",
OpCode::IsSymbol => "IS_SYMBOL",
OpCode::IsBoolean => "IS_BOOLEAN",
OpCode::IsProcedure => "IS_PROCEDURE",
OpCode::MakeClosure => "MAKE_CLOSURE",
OpCode::Apply => "APPLY",
OpCode::CallCC => "CALL_CC",
OpCode::Yield => "YIELD",
OpCode::Debug => "DEBUG",
OpCode::Profile => "PROFILE",
OpCode::Halt => "HALT",
};
write!(f, "{name}")
}
}
#[derive(Debug, Clone)]
pub struct Bytecode {
pub instructions: Vec<Instruction>,
pub constants: ConstantPool,
pub entry_point: usize,
pub local_count: usize,
pub max_stack_depth: usize,
}
impl Bytecode {
pub fn new() -> Self {
Self {
instructions: Vec::new(),
constants: ConstantPool::new(),
entry_point: 0,
local_count: 0,
max_stack_depth: 0,
}
}
pub fn add_instruction(&mut self, instruction: Instruction) {
self.instructions.push(instruction);
}
pub fn add_instructions(&mut self, instructions: Vec<Instruction>) {
self.instructions.extend(instructions);
}
pub fn len(&self) -> usize {
self.instructions.len()
}
pub fn is_empty(&self) -> bool {
self.instructions.is_empty()
}
pub fn estimated_size(&self) -> usize {
let instruction_size: usize = self.instructions.iter().map(|i| i.encoded_size()).sum();
instruction_size + self.constants.memory_usage()
}
pub fn disassemble(&self) -> String {
let mut output = String::new();
output.push_str("=== Bytecode Disassembly ===\n");
output.push_str(&format!("Entry point: {}\n", self.entry_point));
output.push_str(&format!("Local variables: {}\n", self.local_count));
output.push_str(&format!("Max stack depth: {}\n", self.max_stack_depth));
output.push_str(&format!("Instructions: {}\n", self.instructions.len()));
output.push_str(&format!("Constants: {}\n\n", self.constants.len()));
if !self.constants.is_empty() {
output.push_str("=== Constants ===\n");
for (index, constant) in self.constants.iter() {
output.push_str(&format!("{index:4}: {constant:?}\n"));
}
output.push('\n');
}
output.push_str("=== Instructions ===\n");
for (index, instruction) in self.instructions.iter().enumerate() {
let marker = if index == self.entry_point { ">" } else { " " };
output.push_str(&format!("{marker}{index:4}: {instruction}\n"));
}
output
}
pub fn validate(&self) -> Result<(), String> {
if self.entry_point >= self.instructions.len() {
return Err("Entry point is out of bounds".to_string());
}
for (index, instruction) in self.instructions.iter().enumerate() {
if let Operand::ConstIndex(const_index) = &instruction.operand {
if *const_index >= self.constants.len() as u32 {
return Err(format!("Instruction {index} references invalid constant {const_index}"));
}
}
}
for (index, instruction) in self.instructions.iter().enumerate() {
match instruction.opcode {
OpCode::Jump | OpCode::JumpIfFalse | OpCode::JumpIfTrue => {
if let Operand::JumpOffset(offset) = &instruction.operand {
let target = (index as i32) + offset;
if target < 0 || target >= self.instructions.len() as i32 {
return Err(format!("Instruction {index} has invalid jump target {target}"));
}
}
}
_ => {}
}
}
Ok(())
}
}
impl Default for Bytecode {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_constant_pool() {
let mut pool = ConstantPool::new();
let index1 = pool.add_constant(ConstantValue::Number(42.0));
let index2 = pool.add_constant(ConstantValue::String("hello".to_string()));
let index3 = pool.add_constant(ConstantValue::Number(42.0));
assert_eq!(index1, 0);
assert_eq!(index2, 1);
assert_eq!(index3, 0); assert_eq!(pool.len(), 2);
let constant = pool.get_constant(0).unwrap();
assert_eq!(*constant, ConstantValue::Number(42.0));
}
#[test]
fn test_instruction_creation() {
let inst1 = Instruction::new(OpCode::Add);
assert_eq!(inst1.opcode, OpCode::Add);
assert_eq!(inst1.operand, Operand::None);
let inst2 = Instruction::with_operand(OpCode::LoadConst, Operand::ConstIndex(5));
assert_eq!(inst2.opcode, OpCode::LoadConst);
assert_eq!(inst2.operand, Operand::ConstIndex(5));
}
#[test]
fn test_bytecode_validation() {
let mut bytecode = Bytecode::new();
bytecode.add_instruction(Instruction::with_operand(OpCode::LoadConst, Operand::ConstIndex(0)));
bytecode.add_instruction(Instruction::new(OpCode::Return));
assert!(bytecode.validate().is_err());
bytecode.constants.add_constant(ConstantValue::Number(42.0));
assert!(bytecode.validate().is_ok());
}
#[test]
fn test_bytecode_disassembly() {
let mut bytecode = Bytecode::new();
bytecode.constants.add_constant(ConstantValue::Number(42.0));
bytecode.add_instruction(Instruction::with_operand(OpCode::LoadConst, Operand::ConstIndex(0)));
bytecode.add_instruction(Instruction::new(OpCode::Return));
let disasm = bytecode.disassemble();
assert!(disasm.contains("LOAD_CONST"));
assert!(disasm.contains("RETURN"));
assert!(disasm.contains("Constants"));
assert!(disasm.contains("42"));
}
#[test]
fn test_instruction_properties() {
let jump_inst = Instruction::with_operand(OpCode::Jump, Operand::JumpOffset(10));
assert!(jump_inst.is_control_flow());
assert!(jump_inst.is_terminator());
let add_inst = Instruction::new(OpCode::Add);
assert!(!add_inst.is_control_flow());
assert!(!add_inst.is_terminator());
let debug_inst = Instruction::new(OpCode::Debug);
assert!(debug_inst.is_removable());
}
}