#[cfg(not(feature = "std"))]
use alloc::{collections::BTreeMap, format, string::String};
#[cfg(feature = "std")]
use std::collections::BTreeMap;
use thiserror::Error;
use super::codec;
use super::jump_table::JumpTable;
use super::program::Program;
use super::types::Opcode;
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum Error {
#[error("unknown opcode 0x{byte:02X} at offset {offset:#06X}")]
UnknownOpcode {
offset: usize,
byte: u8,
},
#[error("truncated instruction at offset {offset:#06X}")]
TruncatedInstruction {
offset: usize,
},
#[error("seek target {target} is out of bounds (buffer length {len})")]
SeekOutOfBounds {
target: usize,
len: usize,
},
}
type Result<T> = core::result::Result<T, Error>;
#[derive(Debug, Clone)]
pub struct InstructionStream<'a> {
bytes: &'a [u8],
pos: usize,
labels: BTreeMap<usize, String>,
}
impl<'a> InstructionStream<'a> {
pub fn new(bytes: &'a [u8]) -> Self {
Self {
bytes,
pos: 0,
labels: BTreeMap::new(),
}
}
pub fn with_jump_table(bytes: &'a [u8], table: &JumpTable) -> Self {
let labels = table
.targets()
.iter()
.enumerate()
.map(|(seq_id, &offset)| (offset, format!(".{seq_id}")))
.collect();
Self {
bytes,
pos: 0,
labels,
}
}
pub fn from_program(program: &'a Program) -> Self {
Self::with_jump_table(program.code(), program.jump_table())
}
pub fn pos(&self) -> usize {
self.pos
}
pub fn len(&self) -> usize {
self.bytes.len()
}
pub fn is_empty(&self) -> bool {
self.bytes.is_empty()
}
pub fn bytes(&self) -> &[u8] {
self.bytes
}
pub fn labels(&self) -> &BTreeMap<usize, String> {
&self.labels
}
pub fn seek(&mut self, pos: usize) -> Result<()> {
if pos > self.bytes.len() {
return Err(Error::SeekOutOfBounds {
target: pos,
len: self.bytes.len(),
});
}
self.pos = pos;
Ok(())
}
pub fn next_instruction(
&mut self,
) -> Option<Result<(usize, Option<String>, crate::bytecode::types::Instruction)>> {
if self.pos >= self.bytes.len() {
return None;
}
let offset = self.pos;
let label = self.labels.get(&offset).cloned();
match codec::decode(
self.bytes
.get(offset..)
.unwrap_or_else(|| unreachable!("offset < bytes.len() checked above")),
) {
Ok((instr, consumed)) => {
self.pos += consumed;
Some(Ok((offset, label, instr)))
}
Err(e) => {
self.pos += 1;
Some(Err(map_decode_error(
e,
offset,
*self
.bytes
.get(offset)
.unwrap_or_else(|| unreachable!("offset < bytes.len() checked above")),
)))
}
}
}
}
impl Iterator for InstructionStream<'_> {
type Item = Result<(usize, Option<String>, crate::bytecode::types::Instruction)>;
fn next(&mut self) -> Option<Self::Item> {
self.next_instruction()
}
}
fn map_decode_error(err: codec::DecodeError, offset: usize, byte: u8) -> Error {
match err {
codec::DecodeError::UnknownOpcode { byte } => Error::UnknownOpcode { offset, byte },
codec::DecodeError::EmptyInput | codec::DecodeError::TruncatedOperand { .. } => {
if Opcode::try_from(byte).is_ok() {
Error::TruncatedInstruction { offset }
} else {
Error::UnknownOpcode { offset, byte }
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::bytecode::types::{Instruction, Register};
fn assemble(program: &[Instruction]) -> Vec<u8> {
program.iter().flat_map(codec::encode).collect()
}
#[test]
fn empty_buffer_yields_none_immediately() {
let mut stream = InstructionStream::new(&[]);
assert_eq!(stream.next_instruction(), None);
}
#[test]
fn single_instruction_roundtrip() {
let buf = assemble(&[Instruction::Halt {}]);
let mut stream = InstructionStream::new(&buf);
assert_eq!(
stream.next_instruction(),
Some(Ok((0, None, Instruction::Halt {}))),
);
assert_eq!(stream.next_instruction(), None);
}
#[test]
fn offsets_advance_correctly() {
let buf = assemble(&[Instruction::Pop {}, Instruction::Halt {}]);
let mut stream = InstructionStream::new(&buf);
let (off0, _, _) = stream.next_instruction().unwrap().unwrap();
let (off1, _, _) = stream.next_instruction().unwrap().unwrap();
assert_eq!(off0, 0);
assert_eq!(off1, 1);
assert_eq!(stream.next_instruction(), None);
}
#[test]
fn iterator_collects_all_instructions() {
let program = [
Instruction::Push1 { val: [5] },
Instruction::Push1 { val: [3] },
Instruction::Add {},
Instruction::Halt {},
];
let buf = assemble(&program);
let items: Vec<_> = InstructionStream::new(&buf)
.collect::<Result<Vec<_>>>()
.unwrap();
assert_eq!(items.len(), 4);
for (i, (_, _, instr)) in items.iter().enumerate() {
assert_eq!(*instr, program[i]);
}
}
#[test]
fn jump_table_labels_are_assigned() {
let buf = assemble(&[
Instruction::Push1 { val: [3] },
Instruction::Jump2 { label: 0 },
Instruction::Halt {},
]);
let table = JumpTable::new(vec![5]);
let mut stream = InstructionStream::with_jump_table(&buf, &table);
let (_, label0, _) = stream.next_instruction().unwrap().unwrap();
let (_, label1, _) = stream.next_instruction().unwrap().unwrap();
let (_, label2, _) = stream.next_instruction().unwrap().unwrap();
assert_eq!(label0, None);
assert_eq!(label1, None);
assert_eq!(label2.as_deref(), Some(".0"));
}
#[test]
fn no_labels_when_no_jump_table() {
let buf = assemble(&[Instruction::Push1 { val: [1] }, Instruction::Halt {}]);
let stream = InstructionStream::new(&buf);
assert!(stream.labels().is_empty());
}
#[test]
fn seek_to_second_instruction_skips_first() {
let buf = assemble(&[
Instruction::Pop {},
Instruction::Nop {},
Instruction::Halt {},
]);
let mut stream = InstructionStream::new(&buf);
stream.seek(1).unwrap();
let (off, _, instr) = stream.next_instruction().unwrap().unwrap();
assert_eq!(off, 1);
assert_eq!(instr, Instruction::Nop {});
}
#[test]
fn seek_to_end_makes_stream_done() {
let buf = assemble(&[Instruction::Halt {}]);
let mut stream = InstructionStream::new(&buf);
stream.seek(buf.len()).unwrap();
assert_eq!(stream.next_instruction(), None);
}
#[test]
fn seek_out_of_bounds_returns_error() {
let buf = assemble(&[Instruction::Halt {}]);
let mut stream = InstructionStream::new(&buf);
assert_eq!(
stream.seek(buf.len() + 1),
Err(Error::SeekOutOfBounds {
target: buf.len() + 1,
len: buf.len()
}),
);
assert_eq!(stream.pos(), 0);
}
#[test]
fn seek_back_simulates_jump() {
let buf = assemble(&[
Instruction::Push1 { val: [3] },
Instruction::Jump2 { label: 0 },
]);
let table = JumpTable::new(vec![0]);
let mut stream = InstructionStream::with_jump_table(&buf, &table);
let (_, _, _push) = stream.next_instruction().unwrap().unwrap();
let (_, _, jump) = stream.next_instruction().unwrap().unwrap();
let target = if let Instruction::Jump2 { label } = jump {
table.get(label).unwrap()
} else {
panic!("expected Jump2");
};
stream.seek(target).unwrap();
let (off, label, instr) = stream.next_instruction().unwrap().unwrap();
assert_eq!(off, 0);
assert_eq!(label.as_deref(), Some(".0"));
assert_eq!(instr, Instruction::Push1 { val: [3] });
}
#[test]
fn unknown_opcode_returns_error_and_advances() {
let buf = [0x0Du8, Instruction::Halt {}.opcode() as u8];
let mut stream = InstructionStream::new(&buf);
assert_eq!(
stream.next_instruction(),
Some(Err(Error::UnknownOpcode {
offset: 0,
byte: 0x0D,
})),
);
assert_eq!(
stream.next_instruction(),
Some(Ok((1, None, Instruction::Halt {}))),
);
}
#[test]
fn truncated_instruction_returns_error_and_advances() {
let buf: Vec<u8> = vec![0x18u8];
let mut stream = InstructionStream::new(&buf);
assert_eq!(
stream.next_instruction(),
Some(Err(Error::TruncatedInstruction { offset: 0 })),
);
assert_eq!(stream.next_instruction(), None);
}
#[test]
fn pos_and_len_are_correct() {
let buf = assemble(&[Instruction::Nop {}, Instruction::Halt {}]);
let mut stream = InstructionStream::new(&buf);
assert_eq!(stream.len(), 2);
assert_eq!(stream.pos(), 0);
let _ = stream.next_instruction();
assert_eq!(stream.pos(), 1);
let _ = stream.next_instruction();
assert_eq!(stream.pos(), 2);
}
#[test]
fn register_instruction_decodes_correctly() {
let buf = assemble(&[Instruction::Load { reg: Register(7) }]);
let mut stream = InstructionStream::new(&buf);
let (_, _, instr) = stream.next_instruction().unwrap().unwrap();
assert_eq!(instr, Instruction::Load { reg: Register(7) });
}
}