use crate::{
arith::ArithError,
core::ArmVersion,
core::ItState,
instructions::{self, Encoding, Instruction, InstructionSize},
};
use std::{fmt::Display, rc::Rc};
pub trait InstructionDecode {
fn try_decode(
&self,
ins: u32,
size: InstructionSize,
state: ItState,
) -> Result<Rc<dyn Instruction>, InstructionDecodeError>;
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InstructionDecodeError {
Unknown,
Undefined,
Unpredictable,
}
#[derive(Clone, Copy)]
pub enum InstructionPatternBit {
OpcodeZero,
OpcodeOne,
Arg,
ArgZero,
ArgOne,
}
#[derive(Debug)]
pub enum DecodeError {
Other,
Unpredictable,
Undefined,
}
impl From<ArithError> for DecodeError {
fn from(value: ArithError) -> Self {
match value {
ArithError::Unpredictable => DecodeError::Unpredictable,
}
}
}
type InstructionDecodingFunction =
fn(Encoding, u32, ItState) -> Result<Rc<dyn Instruction>, DecodeError>;
#[derive(Clone)]
pub struct InstructionPattern {
bits: Vec<InstructionPatternBit>,
test_mask: u32,
test_value: u32,
unp_mask: u32,
unp_value: u32,
}
impl InstructionPattern {
pub fn new(pattern: &str) -> Self {
let mut bits = Vec::new();
bits.reserve_exact(
pattern
.chars()
.filter(|&c| c == '0' || c == '1' || c == 'x')
.count(),
);
let mut parenthesis = 0;
for c in pattern.chars() {
match parenthesis {
0 => match c {
'0' => bits.push(InstructionPatternBit::OpcodeZero),
'1' => bits.push(InstructionPatternBit::OpcodeOne),
'x' => bits.push(InstructionPatternBit::Arg),
'(' => parenthesis = 1,
_ => panic!(),
},
1 => {
match c {
'0' => bits.push(InstructionPatternBit::ArgZero),
'1' => bits.push(InstructionPatternBit::ArgOne),
_ => panic!(),
}
parenthesis = 2
}
2 => {
assert_eq!(c, ')');
parenthesis = 0
}
_ => panic!(),
}
}
assert_eq!(parenthesis, 0);
assert!(bits.len() == 16 || bits.len() == 32);
let mut test_mask = 0;
let mut test_value = 0;
let mut unp_mask = 0;
let mut unp_value = 0;
for bit in bits.iter() {
let (tm, tv, um, uv) = match bit {
InstructionPatternBit::OpcodeZero => (1, 0, 0, 0),
InstructionPatternBit::OpcodeOne => (1, 1, 0, 0),
InstructionPatternBit::Arg => (0, 0, 0, 0),
InstructionPatternBit::ArgZero => (0, 0, 1, 0),
InstructionPatternBit::ArgOne => (0, 0, 1, 1),
};
test_mask = (test_mask << 1) | tm;
test_value = (test_value << 1) | tv;
unp_mask = (unp_mask << 1) | um;
unp_value = (unp_value << 1) | uv;
}
Self {
bits,
test_mask,
test_value,
unp_mask,
unp_value,
}
}
pub fn test(&self, ins: u32, size: InstructionSize) -> Result<bool, InstructionDecodeError> {
if ins & self.test_mask != self.test_value {
return Ok(false);
}
if size != self.size() {
return Ok(false);
}
if ins & self.unp_mask != self.unp_value {
return Err(InstructionDecodeError::Unpredictable);
}
Ok(true)
}
pub fn size(&self) -> InstructionSize {
match self.bits.len() {
16 => InstructionSize::Ins16,
32 => InstructionSize::Ins32,
_ => panic!(),
}
}
}
impl Display for InstructionPattern {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut result = String::new();
for bit in self.bits.iter() {
result.push_str(match bit {
InstructionPatternBit::OpcodeZero => "0",
InstructionPatternBit::OpcodeOne => "1",
InstructionPatternBit::Arg => "x",
InstructionPatternBit::ArgZero => "(0)",
InstructionPatternBit::ArgOne => "(1)",
})
}
write!(f, "{}", result)
}
}
#[derive(Clone)]
pub struct BasicDecoderEntry {
pub patterns: Vec<(Encoding, InstructionPattern)>,
pub decoder: InstructionDecodingFunction,
}
pub struct BasicInstructionDecoder {
pub entries: Vec<BasicDecoderEntry>,
}
fn rc_decoder<T: 'static + Instruction>(
encoding: Encoding,
ins: u32,
state: ItState,
) -> Result<Rc<dyn Instruction>, DecodeError> {
match T::try_decode(encoding, ins, state) {
Ok(x) => Ok(Rc::new(x)),
Err(e) => Err(e),
}
}
impl BasicInstructionDecoder {
pub fn new(version: ArmVersion) -> Self {
let mut dec = Self {
entries: Vec::new(),
};
dec.insert::<instructions::adc::AdcImm>(version);
dec.insert::<instructions::adc::AdcReg>(version);
dec.insert::<instructions::add::AddImm>(version);
dec.insert::<instructions::add::AddReg>(version);
dec.insert::<instructions::add::AddSpPlusImm>(version);
dec.insert::<instructions::add::AddSpPlusReg>(version);
dec.insert::<instructions::adr::Adr>(version);
dec.insert::<instructions::and::AndImm>(version);
dec.insert::<instructions::and::AndReg>(version);
dec.insert::<instructions::asr::AsrImm>(version);
dec.insert::<instructions::asr::AsrReg>(version);
dec.insert::<instructions::b::B>(version);
dec.insert::<instructions::bfc::Bfc>(version);
dec.insert::<instructions::bfi::Bfi>(version);
dec.insert::<instructions::bic::BicImm>(version);
dec.insert::<instructions::bic::BicReg>(version);
dec.insert::<instructions::bkpt::Bkpt>(version);
dec.insert::<instructions::bl::Bl>(version);
dec.insert::<instructions::blx::Blx>(version);
dec.insert::<instructions::bx::Bx>(version);
dec.insert::<instructions::cbnz::Cbnz>(version);
dec.insert::<instructions::cdp::Cdp>(version);
dec.insert::<instructions::clrex::Clrex>(version);
dec.insert::<instructions::clz::Clz>(version);
dec.insert::<instructions::cmn::CmnImm>(version);
dec.insert::<instructions::cmn::CmnReg>(version);
dec.insert::<instructions::cmp::CmpImm>(version);
dec.insert::<instructions::cmp::CmpReg>(version);
dec.insert::<instructions::cps::Cps>(version);
dec.insert::<instructions::csdb::Csdb>(version);
dec.insert::<instructions::dbg::Dbg>(version);
dec.insert::<instructions::dmb::Dmb>(version);
dec.insert::<instructions::dsb::Dsb>(version);
dec.insert::<instructions::eor::EorImm>(version);
dec.insert::<instructions::eor::EorReg>(version);
dec.insert::<instructions::isb::Isb>(version);
dec.insert::<instructions::it::It>(version);
dec.insert::<instructions::ldc::LdcImm>(version);
dec.insert::<instructions::ldc::LdcLit>(version);
dec.insert::<instructions::ldm::Ldm>(version);
dec.insert::<instructions::ldmdb::Ldmdb>(version);
dec.insert::<instructions::ldr::LdrImm>(version);
dec.insert::<instructions::ldr::LdrImm>(version);
dec.insert::<instructions::ldr::LdrLit>(version);
dec.insert::<instructions::ldr::LdrReg>(version);
dec.insert::<instructions::ldrb::LdrbImm>(version);
dec.insert::<instructions::ldrb::LdrbLit>(version);
dec.insert::<instructions::ldrb::LdrbReg>(version);
dec.insert::<instructions::ldrbt::Ldrbt>(version);
dec.insert::<instructions::ldrd::LdrdImm>(version);
dec.insert::<instructions::ldrd::LdrdLit>(version);
dec.insert::<instructions::ldrex::Ldrex>(version);
dec.insert::<instructions::ldrexb::Ldrexb>(version);
dec.insert::<instructions::ldrexh::Ldrexh>(version);
dec.insert::<instructions::ldrh::LdrhImm>(version);
dec.insert::<instructions::ldrh::LdrhLit>(version);
dec.insert::<instructions::ldrh::LdrhReg>(version);
dec.insert::<instructions::ldrht::Ldrht>(version);
dec.insert::<instructions::ldrsb::LdrsbImm>(version);
dec.insert::<instructions::ldrsb::LdrsbLit>(version);
dec.insert::<instructions::ldrsb::LdrsbReg>(version);
dec.insert::<instructions::ldrsbt::Ldrsbt>(version);
dec.insert::<instructions::ldrsh::LdrshImm>(version);
dec.insert::<instructions::ldrsh::LdrshLit>(version);
dec.insert::<instructions::ldrsh::LdrshReg>(version);
dec.insert::<instructions::ldrsht::Ldrsht>(version);
dec.insert::<instructions::ldrt::Ldrt>(version);
dec.insert::<instructions::lsl::LslImm>(version);
dec.insert::<instructions::lsl::LslReg>(version);
dec.insert::<instructions::lsr::LsrImm>(version);
dec.insert::<instructions::lsr::LsrReg>(version);
dec.insert::<instructions::mcr::Mcr>(version);
dec.insert::<instructions::mcrr::Mcrr>(version);
dec.insert::<instructions::mla::Mla>(version);
dec.insert::<instructions::mls::Mls>(version);
dec.insert::<instructions::mov::MovImm>(version);
dec.insert::<instructions::mov::MovReg>(version);
dec.insert::<instructions::mov::MovRegShiftReg>(version);
dec.insert::<instructions::movt::Movt>(version);
dec.insert::<instructions::mrc::Mrc>(version);
dec.insert::<instructions::mrrc::Mrrc>(version);
dec.insert::<instructions::mrs::Mrs>(version);
dec.insert::<instructions::msr::Msr>(version);
dec.insert::<instructions::mul::Mul>(version);
dec.insert::<instructions::mvn::MvnImm>(version);
dec.insert::<instructions::mvn::MvnReg>(version);
dec.insert::<instructions::nop::Nop>(version);
dec.insert::<instructions::orn::OrnImm>(version);
dec.insert::<instructions::orn::OrnReg>(version);
dec.insert::<instructions::orr::OrrImm>(version);
dec.insert::<instructions::orr::OrrReg>(version);
dec.insert::<instructions::pop::Pop>(version);
dec.insert::<instructions::pld::PldImm>(version);
dec.insert::<instructions::pld::PldLit>(version);
dec.insert::<instructions::pld::PldReg>(version);
dec.insert::<instructions::pli::PliImmLit>(version);
dec.insert::<instructions::pli::PliReg>(version);
dec.insert::<instructions::push::Push>(version);
dec.insert::<instructions::pssbb::Pssbb>(version);
dec.insert::<instructions::qadd16::Qadd16>(version);
dec.insert::<instructions::qadd8::Qadd8>(version);
dec.insert::<instructions::qadd::Qadd>(version);
dec.insert::<instructions::qdadd::Qdadd>(version);
dec.insert::<instructions::qdsub::Qdsub>(version);
dec.insert::<instructions::qsub16::Qsub16>(version);
dec.insert::<instructions::qsub8::Qsub8>(version);
dec.insert::<instructions::qsub::Qsub>(version);
dec.insert::<instructions::r#yield::Yield>(version);
dec.insert::<instructions::rbit::Rbit>(version);
dec.insert::<instructions::rev16::Rev16>(version);
dec.insert::<instructions::rev::Rev>(version);
dec.insert::<instructions::revsh::Revsh>(version);
dec.insert::<instructions::ror::RorImm>(version);
dec.insert::<instructions::ror::RorReg>(version);
dec.insert::<instructions::rrx::Rrx>(version);
dec.insert::<instructions::rsb::RsbImm>(version);
dec.insert::<instructions::rsb::RsbReg>(version);
dec.insert::<instructions::sadd16::Sadd16>(version);
dec.insert::<instructions::sadd8::Sadd8>(version);
dec.insert::<instructions::sbc::SbcImm>(version);
dec.insert::<instructions::sbc::SbcReg>(version);
dec.insert::<instructions::sbfx::Sbfx>(version);
dec.insert::<instructions::sdiv::Sdiv>(version);
dec.insert::<instructions::sev::Sev>(version);
dec.insert::<instructions::smlal::Smlal>(version);
dec.insert::<instructions::smull::Smull>(version);
dec.insert::<instructions::ssat::Ssat>(version);
dec.insert::<instructions::stc::Stc>(version);
dec.insert::<instructions::stm::Stm>(version);
dec.insert::<instructions::stmdb::Stmdb>(version);
dec.insert::<instructions::str::StrImm>(version);
dec.insert::<instructions::str::StrReg>(version);
dec.insert::<instructions::strb::StrbImm>(version);
dec.insert::<instructions::strb::StrbReg>(version);
dec.insert::<instructions::strbt::Strbt>(version);
dec.insert::<instructions::strd::StrdImm>(version);
dec.insert::<instructions::strex::Strex>(version);
dec.insert::<instructions::strexb::Strexb>(version);
dec.insert::<instructions::strexh::Strexh>(version);
dec.insert::<instructions::strh::StrhImm>(version);
dec.insert::<instructions::strh::StrhReg>(version);
dec.insert::<instructions::strht::Strht>(version);
dec.insert::<instructions::strt::Strt>(version);
dec.insert::<instructions::sub::SubImm>(version);
dec.insert::<instructions::sub::SubReg>(version);
dec.insert::<instructions::sub::SubSpMinusImm>(version);
dec.insert::<instructions::sub::SubSpMinusReg>(version);
dec.insert::<instructions::ssbb::Ssbb>(version);
dec.insert::<instructions::svc::Svc>(version);
dec.insert::<instructions::sxtb::Sxtb>(version);
dec.insert::<instructions::sxth::Sxth>(version);
dec.insert::<instructions::tbb::Tbb>(version);
dec.insert::<instructions::teq::TeqImm>(version);
dec.insert::<instructions::teq::TeqReg>(version);
dec.insert::<instructions::tst::TstImm>(version);
dec.insert::<instructions::tst::TstReg>(version);
dec.insert::<instructions::ubfx::Ubfx>(version);
dec.insert::<instructions::udf::Udf>(version);
dec.insert::<instructions::udiv::Udiv>(version);
dec.insert::<instructions::umlal::Umlal>(version);
dec.insert::<instructions::umull::Umull>(version);
dec.insert::<instructions::usat16::Usat16>(version);
dec.insert::<instructions::usat::Usat>(version);
dec.insert::<instructions::uxtb::Uxtb>(version);
dec.insert::<instructions::uxth::Uxth>(version);
dec.insert::<instructions::wfe::Wfe>(version);
dec.insert::<instructions::wfi::Wfi>(version);
dec
}
pub fn insert<T: 'static + Instruction>(&mut self, version: ArmVersion) {
let mut patterns = Vec::new();
for pattern in T::patterns().iter() {
if pattern.versions.iter().any(|&v| v == version) {
patterns.push((
pattern.encoding,
InstructionPattern::new(pattern.expression),
));
}
}
if !patterns.is_empty() {
self.entries.push(BasicDecoderEntry {
patterns,
decoder: rc_decoder::<T>,
});
}
}
}
impl InstructionDecode for BasicInstructionDecoder {
fn try_decode(
&self,
ins: u32,
size: InstructionSize,
state: ItState,
) -> Result<Rc<dyn Instruction>, InstructionDecodeError> {
for entry in &self.entries {
for (tn, pattern) in entry.patterns.iter() {
if pattern.test(ins, size)? {
if let Ok(ins) = (entry.decoder)(*tn, ins, state) {
return Ok(ins);
}
}
}
}
Err(InstructionDecodeError::Unknown)
}
}
pub struct Lut16InstructionDecoder {
base_decoder: BasicInstructionDecoder,
lut16: Vec<Result<Rc<dyn Instruction>, InstructionDecodeError>>,
}
impl Lut16InstructionDecoder {
pub fn new(version: ArmVersion) -> Self {
let base_decoder = BasicInstructionDecoder::new(version);
let lut16 = (0..=u16::MAX)
.map(|i| base_decoder.try_decode(i as u32, InstructionSize::Ins16, ItState::new()))
.collect();
Self {
base_decoder,
lut16,
}
}
}
impl InstructionDecode for Lut16InstructionDecoder {
fn try_decode(
&self,
ins: u32,
size: InstructionSize,
state: ItState,
) -> Result<Rc<dyn Instruction>, InstructionDecodeError> {
match size {
InstructionSize::Ins16 => {
if state.0 == 0 {
self.lut16[ins as usize].clone()
} else {
self.base_decoder.try_decode(ins, size, state)
}
}
InstructionSize::Ins32 => self.base_decoder.try_decode(ins, size, state),
}
}
}
pub struct GroupedInstructionDecoder {
head_bit_count: u8,
entries: Vec<Vec<(InstructionPattern, Encoding, InstructionDecodingFunction)>>,
}
impl GroupedInstructionDecoder {
pub fn new(head_bit_count: u8) -> Self {
debug_assert!((head_bit_count > 0) && (head_bit_count < 32));
let mut entries = Vec::new();
entries.resize_with(1 << head_bit_count, Vec::new);
Self {
head_bit_count,
entries,
}
}
pub fn try_from_basic_decoder(
head_bit_count: u8,
version: ArmVersion,
) -> Result<Self, GroupingError> {
let mut result = Self::new(head_bit_count);
let basic_decoder = BasicInstructionDecoder::new(version);
for entry in basic_decoder.entries {
result.try_insert_from_decoder_entry(&entry)?;
}
Ok(result)
}
pub fn try_insert(
&mut self,
pattern: &InstructionPattern,
encoding: Encoding,
f: InstructionDecodingFunction,
) -> Result<(), GroupingError> {
let mut group = 0;
for pattern_bit in pattern.bits[0..self.head_bit_count as usize].iter() {
let bit = match pattern_bit {
InstructionPatternBit::OpcodeZero => 0,
InstructionPatternBit::OpcodeOne => 1,
_ => return Err(GroupingError {}),
};
group = (group << 1) | bit;
}
self.entries[group].push((pattern.clone(), encoding, f));
Ok(())
}
pub fn try_insert_from_decoder_entry(
&mut self,
entry: &BasicDecoderEntry,
) -> Result<(), GroupingError> {
for (tn, pattern) in entry.patterns.iter() {
self.try_insert(pattern, *tn, entry.decoder)?
}
Ok(())
}
}
impl InstructionDecode for GroupedInstructionDecoder {
fn try_decode(
&self,
ins: u32,
size: InstructionSize,
state: ItState,
) -> Result<Rc<dyn Instruction>, InstructionDecodeError> {
let group = ins >> (size.bit_count() - self.head_bit_count as usize);
for (pattern, tn, f) in self.entries[group as usize].iter() {
if pattern.test(ins, size)? {
if let Ok(ins) = (f)(*tn, ins, state) {
return Ok(ins);
}
}
}
Err(InstructionDecodeError::Unknown)
}
}
#[derive(Debug)]
pub struct GroupingError {}
pub struct Lut16AndGrouped32InstructionDecoder {
lut_decoder: Lut16InstructionDecoder,
group_decoder: GroupedInstructionDecoder,
}
impl Lut16AndGrouped32InstructionDecoder {
pub fn new(version: ArmVersion) -> Self {
let lut_decoder = Lut16InstructionDecoder::new(version);
let mut group_decoder = GroupedInstructionDecoder::new(5);
for entry in lut_decoder.base_decoder.entries.iter() {
for (tn, pattern) in entry
.patterns
.iter()
.filter(|(_, p)| p.size() == InstructionSize::Ins32)
{
group_decoder
.try_insert(pattern, *tn, entry.decoder)
.unwrap()
}
}
Self {
lut_decoder,
group_decoder,
}
}
}
impl InstructionDecode for Lut16AndGrouped32InstructionDecoder {
fn try_decode(
&self,
ins: u32,
size: InstructionSize,
state: ItState,
) -> Result<Rc<dyn Instruction>, InstructionDecodeError> {
match size {
InstructionSize::Ins16 => self.lut_decoder.try_decode(ins, size, state),
InstructionSize::Ins32 => self.group_decoder.try_decode(ins, size, state),
}
}
}
#[cfg(test)]
mod tests {
use super::{
BasicInstructionDecoder, Lut16AndGrouped32InstructionDecoder, Lut16InstructionDecoder,
};
use crate::{
core::ItState,
core::{ArmVersion::V7EM, Config, Processor},
decoder::InstructionDecode,
instructions::{InstructionSize, Mnemonic},
};
use rand::Rng;
use std::{
any::Any,
fs::File,
io::{BufRead, BufReader},
};
#[test]
fn test_dissassembly() {
let file = File::open("src/test_decoder.txt").unwrap();
let buf_reader = BufReader::new(file);
let decoder = BasicInstructionDecoder::new(V7EM);
let mut proc = Processor::new(Config::v7em());
let mut pc = 0x1000;
for line in buf_reader.lines().map(|l| l.unwrap()) {
if &line[..1] == "#" {
continue;
}
let pos = line.find(" ").unwrap();
let bytes = hex::decode(&line[..pos]).unwrap();
let mnemonic = &line[9..];
let halfword = u16::from_le_bytes(bytes[..2].try_into().unwrap());
let size = InstructionSize::from_halfword(halfword);
let ins: u32 = match size {
InstructionSize::Ins16 => {
assert_eq!(bytes.len(), 2);
halfword as u32
}
InstructionSize::Ins32 => {
assert_eq!(bytes.len(), 4);
(halfword as u32) << 16
| u16::from_le_bytes(bytes[2..4].try_into().unwrap()) as u32
}
};
let mut state = proc.registers.psr.it_state();
let cond = state.current_condition();
let Ok(ins) = decoder.try_decode(ins, size, state) else {
println!(
"Failed to decode instruction 0x{:08x} (size {})",
ins,
size.byte_count()
);
println!(" Mnemonic: {mnemonic}");
panic!();
};
let got_mnemonic = ins.mnemonic(pc, cond);
if got_mnemonic != mnemonic {
println!("Mnemonic generation failed:");
println!(" Expected: {mnemonic}");
println!(" Got : {got_mnemonic}");
panic!();
}
state.advance();
proc.registers.psr.set_it_state(state);
let _result = ins.execute(&mut proc);
pc += size.byte_count() as u32;
}
}
fn test_decoder(
a: &dyn InstructionDecode,
b: &dyn InstructionDecode,
ins: u32,
size: InstructionSize,
it: ItState,
) {
let ins_a = a.try_decode(ins, size, it);
let ins_b = b.try_decode(ins, size, it);
match (ins_a, ins_b) {
(Ok(ins_a), Ok(ins_b)) => {
assert_eq!(ins_a.type_id(), ins_b.type_id())
}
(Err(err_a), Err(err_b)) => {
assert_eq!(err_a, err_b)
}
_ => panic!(),
}
}
#[test]
fn test_instruction_decoders() {
let dec_a = BasicInstructionDecoder::new(V7EM);
let dec_b = Lut16InstructionDecoder::new(V7EM);
let dec_c = Lut16AndGrouped32InstructionDecoder::new(V7EM);
let it = ItState::new();
for i in 0..=u16::MAX {
test_decoder(&dec_a, &dec_b, i as u32, InstructionSize::Ins16, it);
test_decoder(&dec_a, &dec_c, i as u32, InstructionSize::Ins16, it);
}
let mut rng = rand::rng();
for _ in 0..=100000 {
let ins = rng.random();
test_decoder(&dec_a, &dec_b, ins, InstructionSize::Ins32, it);
test_decoder(&dec_a, &dec_c, ins, InstructionSize::Ins32, it);
}
}
}