use crate::{
error::Error,
pcode::opcode::{self, OpcodeInfo},
pcode::operand::{self, Operand},
pcode::semantics::PCodeDataType,
util::read_u16_le,
};
const RAW_LEN_SATURATION: u8 = u8::MAX;
#[derive(Debug, Clone)]
pub struct Instruction {
pub offset: u16,
pub raw_len: u8,
pub info: &'static OpcodeInfo,
pub operands: [Option<Operand>; 4],
}
impl Instruction {
#[inline]
pub fn data_type(&self) -> Option<PCodeDataType> {
self.info.data_type
}
#[inline]
pub fn operand_type(&self, index: usize) -> Option<PCodeDataType> {
let _ = self.operands.get(index)?.as_ref()?;
self.info.data_type
}
}
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct InstructionIterator<'a> {
bytes: &'a [u8],
pos: usize,
limit: usize,
}
impl<'a> InstructionIterator<'a> {
pub fn new(pcode_bytes: &'a [u8], proc_size: u16) -> Self {
let limit = (proc_size as usize).min(pcode_bytes.len());
Self {
bytes: pcode_bytes,
pos: 0,
limit,
}
}
#[inline]
pub fn position(&self) -> usize {
self.pos
}
}
impl Iterator for InstructionIterator<'_> {
type Item = Result<Instruction, Error>;
fn next(&mut self) -> Option<Self::Item> {
if self.pos >= self.limit {
return None;
}
let start = self.pos;
let Some(&first_byte) = self.bytes.get(self.pos) else {
return Some(Err(Error::UnexpectedEndOfPCode {
offset: self.pos,
needed: 1,
}));
};
self.pos = match self.pos.checked_add(1) {
Some(v) => v,
None => {
return Some(Err(Error::ArithmeticOverflow {
context: "decoder pos advance after first byte",
}));
}
};
let next_byte = if self.pos < self.limit {
self.bytes.get(self.pos).copied()
} else {
None
};
let (info, opcode_bytes_consumed) = opcode::lookup(first_byte, next_byte);
if opcode_bytes_consumed == 2 {
if self.pos >= self.limit {
return Some(Err(Error::UnexpectedEndOfPCode {
offset: start,
needed: 2,
}));
}
self.pos = match self.pos.checked_add(1) {
Some(v) => v,
None => {
return Some(Err(Error::ArithmeticOverflow {
context: "decoder pos advance after lead byte",
}));
}
};
}
let operands;
if info.is_variable_length() {
let after_size = match self.pos.checked_add(2) {
Some(v) => v,
None => {
return Some(Err(Error::ArithmeticOverflow {
context: "decoder variable-length size offset",
}));
}
};
if after_size > self.limit {
let needed = after_size.saturating_sub(start);
return Some(Err(Error::UnexpectedEndOfPCode {
offset: start,
needed,
}));
}
let byte_count = match read_u16_le(self.bytes, self.pos) {
Ok(v) => v,
Err(e) => return Some(Err(e)),
};
self.pos = after_size;
let payload_end = match self.pos.checked_add(byte_count as usize) {
Some(v) => v,
None => {
return Some(Err(Error::ArithmeticOverflow {
context: "decoder variable-length payload end",
}));
}
};
if payload_end > self.limit {
return Some(Err(Error::InvalidVariableLengthSize {
opcode_name: info.mnemonic,
size: byte_count,
}));
}
self.pos = payload_end;
operands = [
Some(Operand::VariableLength { byte_count }),
None,
None,
None,
];
} else if info.size > 0 {
match operand::decode_operands(
info.operand_format,
self.bytes,
&mut self.pos,
self.limit,
) {
Ok(ops) => operands = ops,
Err(e) => return Some(Err(e)),
}
let lead_extra = opcode_bytes_consumed.saturating_sub(1);
let expected_end = start
.checked_add(lead_extra)
.and_then(|v| v.checked_add(info.size as usize));
if let Some(expected_end) = expected_end
&& self.pos < expected_end
&& expected_end <= self.limit
{
self.pos = expected_end;
}
} else {
operands = [None; 4];
}
let raw_len = u8::try_from(self.pos.saturating_sub(start)).unwrap_or(RAW_LEN_SATURATION);
let offset_u16 = u16::try_from(start).unwrap_or(u16::MAX);
Some(Ok(Instruction {
offset: offset_u16,
raw_len,
info,
operands,
}))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::pcode::opcode::{DispatchTable, PRIMARY_TABLE};
fn decode_all(bytes: &[u8]) -> Vec<Instruction> {
let iter = InstructionIterator::new(bytes, bytes.len() as u16);
iter.map(|r| r.expect("decode error")).collect()
}
#[test]
fn test_exit_proc() {
let insns = decode_all(&[0x14]);
assert_eq!(insns.len(), 1);
assert_eq!(insns[0].info.mnemonic, "ExitProc");
assert_eq!(insns[0].raw_len, 1);
assert_eq!(insns[0].offset, 0);
assert!(insns[0].operands[0].is_none());
}
#[test]
fn test_lit_i2() {
let insns = decode_all(&[0xF3, 0x05, 0x00]);
assert_eq!(insns.len(), 1);
assert_eq!(insns[0].info.mnemonic, "LitI2");
assert_eq!(insns[0].raw_len, 3);
assert_eq!(insns[0].operands[0], Some(Operand::Int16(5)));
}
#[test]
fn test_branch() {
let insns = decode_all(&[0x1E, 0x20, 0x00]);
assert_eq!(insns.len(), 1);
assert_eq!(insns[0].info.mnemonic, "Branch");
assert_eq!(insns[0].operands[0], Some(Operand::JumpTarget(0x20)));
}
#[test]
fn test_lit_str() {
let insns = decode_all(&[0x1B, 0x10, 0x00]);
assert_eq!(insns.len(), 1);
assert_eq!(insns[0].info.mnemonic, "LitStr");
assert_eq!(insns[0].operands[0], Some(Operand::ConstPoolIndex(0x10)));
}
#[test]
fn test_fld_rf_var() {
let insns = decode_all(&[0x04, 0x70, 0xFF]);
assert_eq!(insns.len(), 1);
assert_eq!(insns[0].info.mnemonic, "FLdRfVar");
assert_eq!(insns[0].operands[0], Some(Operand::StackVar(-144))); }
#[test]
fn test_lit_i4() {
let insns = decode_all(&[0xF5, 0x78, 0x56, 0x34, 0x12]);
assert_eq!(insns.len(), 1);
assert_eq!(insns[0].info.mnemonic, "LitI4");
assert_eq!(insns[0].operands[0], Some(Operand::Int32(0x12345678)));
}
#[test]
fn test_multiple_instructions() {
let bytes = [
0xF3, 0x05, 0x00, 0xF3, 0x0A, 0x00, 0xA9, 0x14, ];
let insns = decode_all(&bytes);
assert_eq!(insns.len(), 4);
assert_eq!(insns[0].info.mnemonic, "LitI2");
assert_eq!(insns[0].offset, 0);
assert_eq!(insns[1].info.mnemonic, "LitI2");
assert_eq!(insns[1].offset, 3);
assert_eq!(insns[2].info.mnemonic, "AddI2");
assert_eq!(insns[2].offset, 6);
assert_eq!(insns[3].info.mnemonic, "ExitProc");
assert_eq!(insns[3].offset, 7);
}
#[test]
fn test_extended_opcode_lead0() {
let bytes = [0xFB, 0x00];
let insns = decode_all(&bytes);
assert_eq!(insns.len(), 1);
assert_eq!(insns[0].info.table, DispatchTable::Lead0);
assert_eq!(insns[0].raw_len, 2);
}
#[test]
fn test_ffree_var_variable_length() {
let bytes = [
0x36, 0x06, 0x00, 0x70, 0xFF, 0x68, 0xFF, 0x60, 0xFF, ];
let insns = decode_all(&bytes);
assert_eq!(insns.len(), 1);
assert_eq!(insns[0].info.mnemonic, "FFreeVar");
assert_eq!(
insns[0].operands[0],
Some(Operand::VariableLength { byte_count: 6 })
);
assert_eq!(insns[0].raw_len, 9); }
#[test]
fn test_ffree_str_variable_length() {
let bytes = [
0x32, 0x02, 0x00, 0x80, 0xFF, ];
let insns = decode_all(&bytes);
assert_eq!(insns.len(), 1);
assert_eq!(insns[0].info.mnemonic, "FFreeStr");
}
#[test]
fn test_truncated_instruction() {
let bytes = [0xF3, 0x05];
let iter = InstructionIterator::new(&bytes, bytes.len() as u16);
let results: Vec<_> = iter.collect();
assert!(results.iter().any(|r| r.is_err()));
}
#[test]
fn test_truncated_lead_byte() {
let bytes = [0xFB];
let iter = InstructionIterator::new(&bytes, bytes.len() as u16);
let results: Vec<_> = iter.collect();
assert_eq!(results.len(), 1);
}
#[test]
fn test_empty_stream() {
let bytes: &[u8] = &[];
let insns = decode_all(bytes);
assert!(insns.is_empty());
}
#[test]
fn test_data_type_and_operand_type() {
let insns = decode_all(&[0xF5, 0x78, 0x56, 0x34, 0x12]);
let insn = &insns[0];
assert_eq!(insn.data_type(), Some(PCodeDataType::I4));
assert_eq!(insn.operand_type(0), Some(PCodeDataType::I4));
let insns = decode_all(&[0xF3, 0x05, 0x00]);
let insn = &insns[0];
assert_eq!(insn.data_type(), Some(PCodeDataType::I2));
assert_eq!(insn.operand_type(0), Some(PCodeDataType::I2));
let insns = decode_all(&[0x14]);
let insn = &insns[0];
assert_eq!(insn.data_type(), None);
assert_eq!(insn.operand_type(0), None);
assert_eq!(insn.operand_type(7), None);
}
#[test]
fn test_position_tracking() {
let bytes = [0x14, 0x14]; let mut iter = InstructionIterator::new(&bytes, bytes.len() as u16);
assert_eq!(iter.position(), 0);
let _ = iter.next();
assert_eq!(iter.position(), 1);
let _ = iter.next();
assert_eq!(iter.position(), 2);
assert!(iter.next().is_none());
}
#[test]
fn test_invalid_variable_length_size() {
let bytes = [
0x36, 0xFF, 0x00, ];
let iter = InstructionIterator::new(&bytes, bytes.len() as u16);
let results: Vec<_> = iter.collect();
assert_eq!(results.len(), 1);
assert!(results[0].is_err());
}
#[test]
fn test_decode_all_single_byte_primary_opcodes() {
for i in 0..=0xFA_u8 {
let info = &PRIMARY_TABLE[i as usize];
if info.size == 1 && info.is_implemented() {
let bytes = [i];
let insns = decode_all(&bytes);
assert_eq!(
insns.len(),
1,
"Opcode 0x{:02X} ({}) should decode to 1 instruction",
i,
info.mnemonic
);
assert_eq!(insns[0].raw_len, 1);
}
}
}
}