#[cfg(not(feature = "std"))]
use alloc::{vec, vec::Vec};
use thiserror::Error;
use super::codec;
use super::program::Program;
use super::types::{Instruction, Register};
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum Error {
#[error("too many TARGETs: {count} (max {})", u16::MAX as usize + 1)]
TooManyTargets {
count: usize,
},
#[error("label {id} was used but never placed")]
UnplacedLabel {
id: usize,
},
#[error("label {id} was placed but never referenced by a jump")]
UnusedLabel {
id: usize,
},
#[error("fixup site {site:#06x} out of buffer bounds (buf len {buf_len})")]
FixupOutOfBounds {
site: usize,
buf_len: usize,
},
#[error("label {id} placed more than once")]
DuplicateLabel {
id: usize,
},
#[error("label {id} was not created by this builder")]
ForeignLabel {
id: usize,
},
}
type Result<T> = core::result::Result<T, Error>;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct LabelId(usize);
#[derive(Debug, Clone, Copy)]
enum JumpKind {
Jump,
JumpIf,
}
#[derive(Debug)]
struct Fixup {
site: usize,
kind: JumpKind,
label: LabelId,
}
#[derive(Debug, Default)]
pub struct InstructionBuilder {
buf: Vec<u8>,
label_positions: Vec<Option<usize>>,
fixups: Vec<Fixup>,
}
macro_rules! impl_builder_methods {
() => {};
( ($code:literal, $variant:ident, $mnem:literal, $doc:literal,
{label: $($rest_f:tt)*}), $($rest:tt)* ) => {
impl_builder_methods!($($rest)*);
};
( ($code:literal, $variant:ident, $mnem:literal, $doc:literal,
{model: $($rest_f:tt)*}), $($rest:tt)* ) => {
impl_builder_methods!($($rest)*);
};
( ($code:literal, $variant:ident, $mnem:literal, $doc:literal,
{val: $($rest_f:tt)*}), $($rest:tt)* ) => {
impl_builder_methods!($($rest)*);
};
( ($code:literal, $variant:ident, $mnem:literal, $doc:literal, {}),
$($rest:tt)* ) => {
::pastey::paste! {
#[doc = $doc]
pub fn [<emit_ $variant:snake>](&mut self) -> &mut Self {
self.emit(Instruction::$variant {})
}
}
impl_builder_methods!($($rest)*);
};
( ($code:literal, $variant:ident, $mnem:literal, $doc:literal,
{reg: $($ftype:tt)*}), $($rest:tt)* ) => {
::pastey::paste! {
#[doc = $doc]
pub fn [<emit_ $variant:snake>](&mut self, reg: Register) -> &mut Self {
self.emit(Instruction::$variant { reg })
}
}
impl_builder_methods!($($rest)*);
};
}
impl InstructionBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn label(&mut self) -> LabelId {
let id = self.label_positions.len();
self.label_positions.push(None);
LabelId(id)
}
pub fn place(&mut self, label: LabelId) -> Result<&mut Self> {
let slot = self
.label_positions
.get_mut(label.0)
.ok_or(Error::ForeignLabel { id: label.0 })?;
if slot.is_some() {
return Err(Error::DuplicateLabel { id: label.0 });
}
*slot = Some(self.buf.len());
Ok(self.emit(Instruction::Target {}))
}
pub fn emit(&mut self, instr: Instruction) -> &mut Self {
self.buf.extend_from_slice(&codec::encode(&instr));
self
}
pub fn emit_jump(&mut self, label: LabelId) -> &mut Self {
self.emit_with_fixup(
Instruction::Jump2 { label: u16::MAX },
JumpKind::Jump,
label,
)
}
pub fn emit_jump_if(&mut self, label: LabelId) -> &mut Self {
self.emit_with_fixup(
Instruction::JumpI2 { label: u16::MAX },
JumpKind::JumpIf,
label,
)
}
fn emit_with_fixup(&mut self, instr: Instruction, kind: JumpKind, label: LabelId) -> &mut Self {
let site = self.buf.len();
let enc = codec::encode(&instr);
self.buf.extend_from_slice(&enc);
self.fixups.push(Fixup { site, kind, label });
self
}
opcodes!(impl_builder_methods);
pub fn emit_push(&mut self, val: i64) -> &mut Self {
self.emit(minimal_push(val))
}
pub fn emit_energy(&mut self, model: Register, sample: Register) -> &mut Self {
self.emit(Instruction::Energy { model, sample })
}
pub fn build(mut self) -> Result<Program> {
let mut referenced = vec![false; self.label_positions.len()];
for fixup in &self.fixups {
let label_placed = self.label_positions.get(fixup.label.0).copied().flatten();
if label_placed.is_none() {
return Err(Error::UnplacedLabel { id: fixup.label.0 });
}
if let Some(slot) = referenced.get_mut(fixup.label.0) {
*slot = true;
}
}
for (idx, pos) in self.label_positions.iter().enumerate() {
if pos.is_some() && !referenced.get(idx).copied().unwrap_or(false) && *pos != Some(0) {
return Err(Error::UnusedLabel { id: idx });
}
}
let mut placed: Vec<(usize, usize)> = self
.label_positions
.iter()
.enumerate()
.filter_map(|(idx, pos)| pos.map(|p| (idx, p)))
.collect();
placed.sort_by_key(|&(_, pos)| pos);
if placed.len() > usize::from(u16::MAX) + 1 {
return Err(Error::TooManyTargets {
count: placed.len(),
});
}
let mut alloc_to_seq: Vec<Option<u16>> = vec![None; self.label_positions.len()];
for (seq_id, &(alloc_id, _)) in placed.iter().enumerate() {
let seq_u16 = u16::try_from(seq_id).map_err(|_| Error::TooManyTargets {
count: placed.len(),
})?;
if let Some(slot) = alloc_to_seq.get_mut(alloc_id) {
*slot = Some(seq_u16);
}
}
for fixup in &self.fixups {
let seq_id = alloc_to_seq
.get(fixup.label.0)
.copied()
.flatten()
.ok_or(Error::UnplacedLabel { id: fixup.label.0 })?;
let instr = match fixup.kind {
JumpKind::Jump => Instruction::Jump2 { label: seq_id },
JumpKind::JumpIf => Instruction::JumpI2 { label: seq_id },
};
let encoded = codec::encode(&instr);
let end = fixup.site + encoded.len();
let buf_len = self.buf.len();
self.buf
.get_mut(fixup.site..end)
.ok_or(Error::FixupOutOfBounds {
site: fixup.site,
buf_len,
})?
.copy_from_slice(&encoded);
}
let mut narrowable: Vec<(usize, JumpKind, u8)> = Vec::new();
for f in &self.fixups {
let seq_id = alloc_to_seq
.get(f.label.0)
.copied()
.flatten()
.ok_or(Error::UnplacedLabel { id: f.label.0 })?;
if let Ok(narrow_id) = u8::try_from(seq_id) {
narrowable.push((f.site, f.kind, narrow_id));
}
}
narrowable.sort_by_key(|&(site, _, _)| site);
for (shrinkage, (site, kind, narrow_id)) in narrowable.into_iter().enumerate() {
let actual = site - shrinkage;
let narrow = match kind {
JumpKind::Jump => Instruction::Jump1 { label: narrow_id },
JumpKind::JumpIf => Instruction::JumpI1 { label: narrow_id },
};
let nb = codec::encode(&narrow);
let nb_len = nb.len();
let buf_len = self.buf.len();
self.buf
.get_mut(actual..actual + nb_len)
.ok_or(Error::FixupOutOfBounds {
site: actual,
buf_len,
})?
.copy_from_slice(&nb);
let _ = self.buf.remove(actual + nb_len);
}
Ok(Program::new(self.buf))
}
}
fn minimal_push(val: i64) -> Instruction {
let be = val.to_be_bytes();
for n in 1..=7 {
let bits = n * 8;
let shift = 64 - bits;
if (val << shift) >> shift == val {
return match n {
1 => Instruction::Push1 { val: [be[7]] },
2 => Instruction::Push2 {
val: [be[6], be[7]],
},
3 => Instruction::Push3 {
val: [be[5], be[6], be[7]],
},
4 => Instruction::Push4 {
val: [be[4], be[5], be[6], be[7]],
},
5 => Instruction::Push5 {
val: [be[3], be[4], be[5], be[6], be[7]],
},
6 => Instruction::Push6 {
val: [be[2], be[3], be[4], be[5], be[6], be[7]],
},
7 => Instruction::Push7 {
val: [be[1], be[2], be[3], be[4], be[5], be[6], be[7]],
},
_ => unreachable!(),
};
}
}
Instruction::Push8 { val: be }
}
#[cfg(test)]
#[expect(
unused_results,
reason = "builder methods return &mut Self; results are intentionally discarded in test chains"
)]
mod tests {
use super::*;
use crate::bytecode::stream::InstructionStream;
use crate::bytecode::types::Instruction;
fn decode_all(buf: &[u8]) -> Vec<Instruction> {
InstructionStream::new(buf).map(|r| r.unwrap().2).collect()
}
#[test]
fn empty_builder_produces_empty_code() {
let program = InstructionBuilder::new().build().unwrap();
assert!(program.code().is_empty());
}
#[test]
fn push_zero_emits_push1() {
let mut b = InstructionBuilder::new();
b.emit_push(0).emit_halt();
let instrs = decode_all(b.build().unwrap().code());
assert_eq!(instrs[0], Instruction::Push1 { val: [0x00] });
}
#[test]
fn push_i8_emits_push1() {
let mut b = InstructionBuilder::new();
b.emit_push(42).emit_halt();
let instrs = decode_all(b.build().unwrap().code());
assert_eq!(instrs[0], Instruction::Push1 { val: [42] });
}
#[test]
fn push_minus_one_emits_push1() {
let mut b = InstructionBuilder::new();
b.emit_push(-1).emit_halt();
let instrs = decode_all(b.build().unwrap().code());
assert_eq!(instrs[0], Instruction::Push1 { val: [0xFF] });
}
#[test]
fn push_i8_max_uses_push1() {
let mut b = InstructionBuilder::new();
b.emit_push(127).emit_halt();
let instrs = decode_all(b.build().unwrap().code());
assert_eq!(instrs[0], Instruction::Push1 { val: [0x7F] });
}
#[test]
fn push_i8_max_plus_one_uses_push2() {
let mut b = InstructionBuilder::new();
b.emit_push(128).emit_halt();
let instrs = decode_all(b.build().unwrap().code());
assert_eq!(instrs[0], Instruction::Push2 { val: [0x00, 0x80] });
}
#[test]
fn push_i64_max_uses_push8() {
let mut b = InstructionBuilder::new();
b.emit_push(i64::MAX).emit_halt();
let instrs = decode_all(b.build().unwrap().code());
assert_eq!(
instrs[0],
Instruction::Push8 {
val: i64::MAX.to_be_bytes()
}
);
}
#[test]
fn push_i64_min_uses_push8() {
let mut b = InstructionBuilder::new();
b.emit_push(i64::MIN).emit_halt();
let instrs = decode_all(b.build().unwrap().code());
assert_eq!(
instrs[0],
Instruction::Push8 {
val: i64::MIN.to_be_bytes()
}
);
}
#[test]
fn backward_jump_resolves_correctly() {
let mut b = InstructionBuilder::new();
let top = b.label();
b.place(top).unwrap().emit_push(0).emit_jump_if(top);
let program = b.build().unwrap();
let instrs = decode_all(program.code());
assert_eq!(instrs[0], Instruction::Target {});
assert_eq!(instrs[1], Instruction::Push1 { val: [0x00] });
assert_eq!(instrs[2], Instruction::JumpI1 { label: 0 });
assert_eq!(program.jump_table().len(), 1);
assert_eq!(program.jump_table().get(0), Some(0));
}
#[test]
fn forward_jump_resolves_correctly() {
let mut b = InstructionBuilder::new();
let done = b.label();
b.emit_jump(done)
.emit_nop()
.place(done)
.unwrap()
.emit_halt();
let program = b.build().unwrap();
let instrs = decode_all(program.code());
assert_eq!(instrs[0], Instruction::Jump1 { label: 0 });
assert_eq!(instrs[1], Instruction::Nop {});
assert_eq!(instrs[2], Instruction::Target {});
assert_eq!(instrs[3], Instruction::Halt {});
assert_eq!(program.jump_table().get(0), Some(3));
}
#[test]
fn multiple_jumps_to_same_label() {
let mut b = InstructionBuilder::new();
let done = b.label();
b.emit_push(0)
.emit_jump_if(done)
.emit_push(0)
.emit_jump_if(done)
.place(done)
.unwrap()
.emit_halt();
let program = b.build().unwrap();
let instrs = decode_all(program.code());
assert_eq!(*instrs.last().unwrap(), Instruction::Halt {});
assert_eq!(instrs[0], Instruction::Push1 { val: [0x00] });
assert_eq!(instrs[2], Instruction::Push1 { val: [0x00] });
assert!(matches!(instrs[1], Instruction::JumpI1 { label: 0 }));
assert!(matches!(instrs[3], Instruction::JumpI1 { label: 0 }));
assert_eq!(instrs[4], Instruction::Target {});
assert_eq!(instrs[5], Instruction::Halt {});
}
#[test]
fn build_renumbers_labels_in_stream_order() {
let mut b = InstructionBuilder::new();
let first = b.label(); let second = b.label();
b.emit_jump_if(first).emit_jump(second);
b.place(second).unwrap().emit_nop();
b.place(first).unwrap().emit_halt();
let program = b.build().unwrap();
let instrs = decode_all(program.code());
assert!(matches!(instrs[0], Instruction::JumpI1 { label: 1 }));
assert!(matches!(instrs[1], Instruction::Jump1 { label: 0 }));
}
#[test]
fn jump_narrows_to_jump1_for_small_seq_id() {
let mut b = InstructionBuilder::new();
let l = b.label();
b.emit_jump(l).place(l).unwrap().emit_halt();
let program = b.build().unwrap();
let instrs = decode_all(program.code());
assert_eq!(instrs[0], Instruction::Jump1 { label: 0 });
}
#[test]
fn jump_if_narrows_to_jumpi1_for_small_seq_id() {
let mut b = InstructionBuilder::new();
let l = b.label();
b.emit_push(1).emit_jump_if(l).place(l).unwrap().emit_halt();
let program = b.build().unwrap();
let instrs = decode_all(program.code());
assert_eq!(instrs[1], Instruction::JumpI1 { label: 0 });
}
#[test]
fn unplaced_label_returns_error() {
let mut b = InstructionBuilder::new();
let ghost = b.label();
b.emit_jump(ghost).emit_halt();
assert_eq!(b.build(), Err(Error::UnplacedLabel { id: 0 }));
}
#[test]
fn unused_label_returns_error() {
let mut b = InstructionBuilder::new();
let l0 = b.label();
let _l1 = b.label(); b.emit_jump(l0);
b.place(l0).unwrap().emit_nop();
let mut b2 = InstructionBuilder::new();
let target = b2.label();
let unused = b2.label();
b2.emit_jump(target)
.place(target)
.unwrap()
.emit_nop()
.place(unused)
.unwrap()
.emit_halt();
assert_eq!(b2.build(), Err(Error::UnusedLabel { id: 1 }));
}
#[test]
fn emit_arbitrary_instruction() {
let mut b = InstructionBuilder::new();
b.emit(Instruction::Copy {}).emit(Instruction::Halt {});
let instrs = decode_all(b.build().unwrap().code());
assert_eq!(instrs, [Instruction::Copy {}, Instruction::Halt {}]);
}
#[test]
fn arithmetic_chain() {
let mut b = InstructionBuilder::new();
b.emit_push(10)
.emit_push(3)
.emit_add()
.emit_push(2)
.emit_mul()
.emit_neg()
.emit_halt();
let instrs = decode_all(b.build().unwrap().code());
assert_eq!(instrs[2], Instruction::Add {});
assert_eq!(instrs[4], Instruction::Mul {});
assert_eq!(instrs[5], Instruction::Neg {});
}
#[test]
fn energy_method_encodes_both_registers() {
let mut b = InstructionBuilder::new();
b.emit_energy(Register(1), Register(2)).emit_halt();
let instrs = decode_all(b.build().unwrap().code());
assert_eq!(
instrs[0],
Instruction::Energy {
model: Register(1),
sample: Register(2)
},
);
}
#[test]
fn place_twice_errors() {
let mut b = InstructionBuilder::new();
let l = b.label();
assert!(b.place(l).is_ok());
assert_eq!(b.place(l).unwrap_err(), Error::DuplicateLabel { id: 0 });
}
#[test]
fn jump_table_has_correct_ranges() {
let mut b = InstructionBuilder::new();
let l0 = b.label();
let l1 = b.label();
b.place(l0)
.unwrap()
.emit_nop()
.emit_jump(l1)
.place(l1)
.unwrap()
.emit_halt();
let program = b.build().unwrap();
assert_eq!(program.jump_table().len(), 2);
assert_eq!(program.jump_table().get(0), Some(0));
assert_eq!(program.jump_table().get(1), Some(4));
}
}