use std::{
collections::{HashMap, HashSet},
sync::Arc,
};
use crate::{
assembly::{
BasicBlock, FlowType, HandlerEntryInfo, Immediate, Instruction, Operand, OperandType,
StackBehavior, INSTRUCTIONS, INSTRUCTIONS_FE,
},
file::{parser::Parser, File},
metadata::{
method::{ExceptionHandler, ExceptionHandlerFlags, Method},
token::Token,
},
utils::VisitedMap,
Result,
};
struct Decoder<'a> {
blocks: Vec<BasicBlock>,
exceptions: Option<&'a [ExceptionHandler]>,
visited: Arc<VisitedMap>,
parser: &'a mut Parser<'a>,
block_id: usize,
offset_start: usize,
rva_start: usize,
}
impl<'a> Decoder<'a> {
pub fn new(
parser: &'a mut Parser<'a>,
offset: usize,
rva: usize,
exceptions: Option<&'a [ExceptionHandler]>,
visited: Arc<VisitedMap>,
) -> Result<Self> {
if offset > parser.len() {
return Err(out_of_bounds_error!());
}
Ok(Decoder {
blocks: Vec::new(),
exceptions,
visited,
parser,
block_id: 0,
offset_start: offset,
rva_start: rva,
})
}
pub fn blocks(&self) -> &[BasicBlock] {
&self.blocks
}
pub fn into_blocks(self) -> Vec<BasicBlock> {
self.blocks
}
fn decode_blocks(&mut self) -> Result<()> {
let mut entry_points: HashSet<u64> = HashSet::new();
self.blocks
.push(BasicBlock::new(0, self.rva_start as u64, self.offset_start));
entry_points.insert(self.rva_start as u64);
if let Some(exceptions) = self.exceptions {
let rva_base = self.rva_start as u64;
let mut candidates: Vec<(u64, u32)> =
Vec::with_capacity(exceptions.len().saturating_mul(3));
for handler in exceptions {
let handler_rva = rva_base
.checked_add(u64::from(handler.handler_offset))
.ok_or(out_of_bounds_error!())?;
candidates.push((handler_rva, handler.handler_offset));
if handler.filter_offset > 0 {
let filter_rva = rva_base
.checked_add(u64::from(handler.filter_offset))
.ok_or(out_of_bounds_error!())?;
candidates.push((filter_rva, handler.filter_offset));
}
let try_rva = rva_base
.checked_add(u64::from(handler.try_offset))
.ok_or(out_of_bounds_error!())?;
candidates.push((try_rva, handler.try_offset));
}
for (entry_rva, entry_offset_u32) in candidates {
if entry_points.contains(&entry_rva) {
continue;
}
let entry_offset = self
.offset_start
.checked_add(entry_offset_u32 as usize)
.ok_or(out_of_bounds_error!())?;
if entry_offset < self.parser.len() && !self.visited.get(entry_offset) {
self.blocks
.push(BasicBlock::new(self.blocks.len(), entry_rva, entry_offset));
entry_points.insert(entry_rva);
}
}
}
while self.block_id < self.blocks.len() {
self.decode_single_block(&mut entry_points)?;
self.block_id = self.block_id.checked_add(1).ok_or(out_of_bounds_error!())?;
}
self.blocks.retain(|b| !b.instructions.is_empty());
self.blocks.sort_by_key(|b| b.rva);
for (idx, block) in self.blocks.iter_mut().enumerate() {
block.id = idx;
}
self.process_exception_handlers()?;
self.wire_control_flow_edges();
self.wire_exception_edges()?;
Ok(())
}
fn decode_single_block(&mut self, entry_points: &mut HashSet<u64>) -> Result<()> {
let block_id = self.block_id;
let (block_offset, block_rva) = {
let block = self.blocks.get(block_id).ok_or(out_of_bounds_error!())?;
(block.offset, block.rva)
};
if block_offset > self.parser.len() {
return Err(out_of_bounds_error!());
}
if self.visited.get(block_offset) {
return Ok(());
}
self.parser.seek(block_offset)?;
let mut current_offset = block_offset;
let mut current_rva = block_rva;
loop {
if current_offset >= self.parser.len() {
break;
}
if current_rva != block_rva && entry_points.contains(¤t_rva) {
break;
}
let instruction = decode_instruction(self.parser, current_rva)?;
let instr_size = usize::try_from(instruction.size).map_err(|_| {
malformed_error!(format!(
"instruction size {} exceeds platform limits at RVA 0x{:x}",
instruction.size, current_rva
))
})?;
self.visited.set_range(current_offset, true, instr_size);
{
let block = self
.blocks
.get_mut(block_id)
.ok_or(out_of_bounds_error!())?;
block.size = block
.size
.checked_add(instr_size)
.ok_or_else(|| malformed_error!("block size overflow"))?;
block.instructions.push(instruction.clone());
}
match instruction.flow_type {
FlowType::ConditionalBranch => {
for &target_rva in &instruction.branch_targets {
self.add_entry_point(target_rva, entry_points);
}
let fall_through_rva = current_rva
.checked_add(instruction.size)
.ok_or_else(|| malformed_error!("fall-through RVA overflow"))?;
self.add_entry_point(fall_through_rva, entry_points);
break;
}
FlowType::UnconditionalBranch | FlowType::Leave => {
for &target_rva in &instruction.branch_targets {
self.add_entry_point(target_rva, entry_points);
}
break;
}
FlowType::Switch => {
for &target_rva in &instruction.branch_targets {
self.add_entry_point(target_rva, entry_points);
}
let fall_through_rva = current_rva
.checked_add(instruction.size)
.ok_or_else(|| malformed_error!("fall-through RVA overflow"))?;
self.add_entry_point(fall_through_rva, entry_points);
break;
}
FlowType::Return | FlowType::Throw | FlowType::EndFinally => {
break;
}
_ => {
}
}
current_offset = current_offset
.checked_add(instr_size)
.ok_or_else(|| malformed_error!("instruction offset overflow"))?;
current_rva = current_rva
.checked_add(instruction.size)
.ok_or_else(|| malformed_error!("instruction RVA overflow"))?;
}
Ok(())
}
fn add_entry_point(&mut self, rva: u64, entry_points: &mut HashSet<u64>) {
if rva < self.rva_start as u64 {
return;
}
if entry_points.contains(&rva) {
return;
}
let Some(delta) = rva.checked_sub(self.rva_start as u64) else {
return;
};
let Ok(relative_offset) = usize::try_from(delta) else {
return; };
let Some(offset) = self.offset_start.checked_add(relative_offset) else {
return;
};
if offset >= self.parser.len() {
return;
}
if let Some((block_idx, split_instr_idx)) = self.find_block_containing_rva(rva) {
self.split_block_at(block_idx, split_instr_idx, rva, offset);
} else {
let new_block = BasicBlock::new(self.blocks.len(), rva, offset);
self.blocks.push(new_block);
}
entry_points.insert(rva);
}
fn find_block_containing_rva(&self, rva: u64) -> Option<(usize, usize)> {
for (block_idx, block) in self.blocks.iter().enumerate() {
if block.rva == rva {
return None;
}
let block_end_rva = block.rva.checked_add(block.size as u64)?;
if rva > block.rva && rva < block_end_rva {
for (instr_idx, instr) in block.instructions.iter().enumerate() {
if instr.rva == rva {
return Some((block_idx, instr_idx));
}
}
}
}
None
}
fn split_block_at(
&mut self,
block_idx: usize,
split_instr_idx: usize,
rva: u64,
offset: usize,
) {
if split_instr_idx == 0 {
return;
}
let mut new_block = BasicBlock::new(self.blocks.len(), rva, offset);
let Some(orig) = self.blocks.get(block_idx) else {
return;
};
let Some(tail) = orig.instructions.get(split_instr_idx..) else {
return;
};
new_block.instructions = tail.to_vec();
new_block.size = Self::compute_instructions_size(&new_block.instructions);
new_block.exceptions.clone_from(&orig.exceptions);
let Some(orig_mut) = self.blocks.get_mut(block_idx) else {
return;
};
orig_mut.instructions.truncate(split_instr_idx);
orig_mut.size = Self::compute_instructions_size(&orig_mut.instructions);
self.blocks.push(new_block);
}
fn compute_instructions_size(instructions: &[Instruction]) -> usize {
instructions.iter().fold(0usize, |acc, instr| {
let size = usize::try_from(instr.size).unwrap_or(usize::MAX);
acc.saturating_add(size)
})
}
fn process_exception_handlers(&mut self) -> Result<()> {
let Some(exceptions) = self.exceptions else {
return Ok(());
};
let rva_to_block: HashMap<u64, usize> = self
.blocks
.iter()
.enumerate()
.map(|(idx, block)| (block.rva, idx))
.collect();
let base_rva = self.rva_start as u64;
for (handler_idx, handler) in exceptions.iter().enumerate() {
let try_start = base_rva
.checked_add(u64::from(handler.try_offset))
.ok_or_else(|| malformed_error!("try_offset RVA overflow"))?;
let try_end = try_start
.checked_add(u64::from(handler.try_length))
.ok_or_else(|| malformed_error!("try region end RVA overflow"))?;
for block in &mut self.blocks {
if block.rva >= try_start && block.rva < try_end {
block.exceptions.push(handler_idx);
}
}
let handler_rva = base_rva
.checked_add(u64::from(handler.handler_offset))
.ok_or_else(|| malformed_error!("handler_offset RVA overflow"))?;
if let Some(&handler_block_idx) = rva_to_block.get(&handler_rva) {
if let Some(b) = self.blocks.get_mut(handler_block_idx) {
b.handler_entry = Some(HandlerEntryInfo::new(handler_idx, handler.flags));
}
}
if handler.flags == ExceptionHandlerFlags::FILTER && handler.filter_offset > 0 {
let filter_rva = base_rva
.checked_add(u64::from(handler.filter_offset))
.ok_or_else(|| malformed_error!("filter_offset RVA overflow"))?;
if let Some(&filter_block_idx) = rva_to_block.get(&filter_rva) {
if let Some(b) = self.blocks.get_mut(filter_block_idx) {
b.handler_entry = Some(HandlerEntryInfo::new(
handler_idx,
ExceptionHandlerFlags::FILTER,
));
}
}
}
}
Ok(())
}
fn wire_exception_edges(&mut self) -> Result<()> {
let Some(exceptions) = self.exceptions else {
return Ok(());
};
let rva_to_block: HashMap<u64, usize> = self
.blocks
.iter()
.enumerate()
.map(|(idx, block)| (block.rva, idx))
.collect();
let base_rva = self.rva_start as u64;
for handler in exceptions {
let handler_rva = base_rva
.checked_add(u64::from(handler.handler_offset))
.ok_or_else(|| malformed_error!("handler_offset RVA overflow"))?;
let Some(&handler_block_idx) = rva_to_block.get(&handler_rva) else {
continue;
};
let try_start = base_rva
.checked_add(u64::from(handler.try_offset))
.ok_or_else(|| malformed_error!("try_offset RVA overflow"))?;
let try_end = try_start
.checked_add(u64::from(handler.try_length))
.ok_or_else(|| malformed_error!("try region end RVA overflow"))?;
for block in &mut self.blocks {
if block.rva >= try_start && block.rva < try_end {
if !block.exception_successors.contains(&handler_block_idx) {
block.exception_successors.push(handler_block_idx);
}
}
}
}
Ok(())
}
fn wire_control_flow_edges(&mut self) {
let rva_to_block: HashMap<u64, usize> = self
.blocks
.iter()
.enumerate()
.map(|(idx, block)| (block.rva, idx))
.collect();
for block_idx in 0..self.blocks.len() {
let successors = self.compute_block_successors(block_idx, &rva_to_block);
if let Some(b) = self.blocks.get_mut(block_idx) {
b.successors.clone_from(&successors);
}
for &succ_idx in &successors {
if let Some(b) = self.blocks.get_mut(succ_idx) {
b.predecessors.push(block_idx);
}
}
}
}
fn compute_block_successors(
&self,
block_idx: usize,
rva_to_block: &HashMap<u64, usize>,
) -> Vec<usize> {
let Some(block) = self.blocks.get(block_idx) else {
return vec![];
};
let Some(last_instr) = block.instructions.last() else {
return vec![];
};
let fall_through_rva = block.rva.checked_add(block.size as u64);
match last_instr.flow_type {
FlowType::Return | FlowType::Throw => {
vec![]
}
FlowType::UnconditionalBranch => {
last_instr
.branch_targets
.iter()
.filter_map(|&target_rva| rva_to_block.get(&target_rva).copied())
.collect()
}
FlowType::ConditionalBranch => {
let mut successors = Vec::with_capacity(2);
for &target_rva in &last_instr.branch_targets {
if let Some(&target_idx) = rva_to_block.get(&target_rva) {
successors.push(target_idx);
}
}
if let Some(rva) = fall_through_rva {
if let Some(&fall_through_idx) = rva_to_block.get(&rva) {
successors.push(fall_through_idx);
}
}
successors
}
FlowType::Switch => {
let mut successors: Vec<usize> = last_instr
.branch_targets
.iter()
.filter_map(|&target_rva| rva_to_block.get(&target_rva).copied())
.collect();
if let Some(rva) = fall_through_rva {
if let Some(&fall_through_idx) = rva_to_block.get(&rva) {
successors.push(fall_through_idx);
}
}
successors
}
FlowType::Leave => {
last_instr
.branch_targets
.iter()
.filter_map(|&target_rva| rva_to_block.get(&target_rva).copied())
.collect()
}
FlowType::EndFinally => {
vec![]
}
FlowType::Sequential | FlowType::Call => {
fall_through_rva
.and_then(|rva| rva_to_block.get(&rva).copied())
.map(|idx| vec![idx])
.unwrap_or_default()
}
}
}
}
pub(crate) fn decode_method(
method: &Method,
file: &File,
shared_visited: Arc<VisitedMap>,
) -> Result<()> {
let rva = match method.rva {
Some(rva) => rva as usize,
None => return Ok(()),
};
let method_offset = file.rva_to_offset(rva)?;
if method_offset >= file.data().len() {
return Err(malformed_error!("Invalid method offset: {}", method_offset));
}
{
let Some(body) = method.body.get() else {
return Err(malformed_error!("Method does not have a valid body"));
};
if body.size_header >= file.data().len() {
return Err(malformed_error!(
"MethodHeader size exceeds file size - {}",
body.size_header
));
}
let Some(code_start) = method_offset.checked_add(body.size_header) else {
return Err(malformed_error!(
"Integer overflow size_header ({}) + method_offset ({})",
body.size_header,
method_offset
));
};
if body.size_code == 0 {
let _ = method.blocks.set(Vec::new());
return Ok(());
}
let mut parser = Parser::new(file.data());
let rva_start = rva
.checked_add(body.size_header)
.ok_or_else(|| malformed_error!("rva + size_header overflow"))?;
let mut decoder = Decoder::new(
&mut parser,
code_start,
rva_start,
Some(&body.exception_handlers),
shared_visited,
)?;
decoder.decode_blocks()?;
let _ = method.blocks.set(decoder.into_blocks());
}
Ok(())
}
pub fn decode_blocks(
data: &[u8],
offset: usize,
rva: usize,
max_size: Option<usize>,
) -> Result<Vec<BasicBlock>> {
if offset >= data.len() {
return Err(malformed_error!(
"Starting offset {} exceeds data length {}",
offset,
data.len()
));
}
let effective_data = if let Some(size) = max_size {
let end_offset = offset.saturating_add(size).min(data.len());
data.get(offset..end_offset).ok_or(out_of_bounds_error!())?
} else {
data.get(offset..).ok_or(out_of_bounds_error!())?
};
let mut parser = Parser::new(effective_data);
let visited = Arc::new(VisitedMap::new(effective_data.len()));
let mut decoder = Decoder::new(&mut parser, 0, rva, None, visited)?;
decoder.decode_blocks()?;
Ok(decoder.into_blocks())
}
pub fn decode_stream(parser: &mut Parser, rva: u64) -> Result<Vec<Instruction>> {
let mut current_rva = rva;
let mut instructions = Vec::new();
while parser.has_more_data() {
let current_offset = parser.pos();
let instruction = decode_instruction(parser, current_rva)?;
instructions.push(instruction);
let consumed = parser
.pos()
.checked_sub(current_offset)
.ok_or_else(|| malformed_error!("parser position regressed during decode"))?
as u64;
current_rva = current_rva
.checked_add(consumed)
.ok_or_else(|| malformed_error!("instruction stream RVA overflow"))?;
}
Ok(instructions)
}
pub fn decode_instruction(parser: &mut Parser, rva: u64) -> Result<Instruction> {
let offset = parser.pos() as u64;
let first_byte = parser.read_le::<u8>()?;
let (cil_instruction, prefix, opcode) = match first_byte {
0xFE => {
let second_byte = parser.read_le::<u8>()?;
match INSTRUCTIONS_FE.get(second_byte as usize) {
Some(instr) => (instr, 0xFE, second_byte),
None => return Err(malformed_error!("Invalid opcode: FE {:02X}", second_byte)),
}
}
_ => match INSTRUCTIONS.get(first_byte as usize) {
Some(instr) => (instr, 0, first_byte),
None => return Err(malformed_error!("Invalid opcode: {:X}", first_byte)),
},
};
if cil_instruction.instr.is_empty() {
return Err(malformed_error!("Reserved opcode: {:04X}", opcode));
}
let operand = match cil_instruction.op_type {
OperandType::None => Operand::None,
OperandType::Int8 => Operand::Immediate(Immediate::Int8(parser.read_le::<i8>()?)),
OperandType::UInt8 => Operand::Immediate(Immediate::UInt8(parser.read_le::<u8>()?)),
OperandType::Int16 => Operand::Immediate(Immediate::Int16(parser.read_le::<i16>()?)),
OperandType::UInt16 => Operand::Immediate(Immediate::UInt16(parser.read_le::<u16>()?)),
OperandType::Int32 => Operand::Immediate(Immediate::Int32(parser.read_le::<i32>()?)),
OperandType::UInt32 => Operand::Immediate(Immediate::UInt32(parser.read_le::<u32>()?)),
OperandType::Int64 => Operand::Immediate(Immediate::Int64(parser.read_le::<i64>()?)),
OperandType::UInt64 => Operand::Immediate(Immediate::UInt64(parser.read_le::<u64>()?)),
OperandType::Float32 => Operand::Immediate(Immediate::Float32(parser.read_le::<f32>()?)),
OperandType::Float64 => Operand::Immediate(Immediate::Float64(parser.read_le::<f64>()?)),
OperandType::Token => Operand::Token(Token::new(parser.read_le::<u32>()?)),
OperandType::Switch => {
let case_count = parser.read_le::<u32>()?;
let mut targets = Vec::with_capacity(case_count as usize);
for _ in 0..case_count as usize {
targets.push(parser.read_le::<i32>()?);
}
Operand::Switch(targets)
}
};
let size = (parser.pos() as u64)
.checked_sub(offset)
.ok_or_else(|| malformed_error!("instruction size underflow"))?;
let mut instruction = Instruction {
rva,
offset,
size,
opcode,
prefix,
mnemonic: cil_instruction.instr,
category: cil_instruction.category,
flow_type: cil_instruction.flow,
stack_behavior: StackBehavior {
pops: cil_instruction.stack_pops,
pushes: cil_instruction.stack_pushes,
#[allow(clippy::cast_possible_wrap)]
net_effect: (cil_instruction.stack_pushes as i8)
.wrapping_sub(cil_instruction.stack_pops as i8),
},
branch_targets: Vec::new(),
operand,
};
match instruction.flow_type {
FlowType::ConditionalBranch | FlowType::UnconditionalBranch | FlowType::Leave => {
if let Operand::Immediate(value) = instruction.operand {
let next_instruction_rva = rva
.checked_add(instruction.size)
.ok_or_else(|| malformed_error!("branch instruction RVA overflow"))?;
let branch_offset = <Immediate as Into<u64>>::into(value);
instruction
.branch_targets
.push(next_instruction_rva.wrapping_add(branch_offset));
}
}
FlowType::Switch => {
if let Operand::Switch(targets) = &instruction.operand {
let next_instruction_rva = rva
.checked_add(instruction.size)
.ok_or_else(|| malformed_error!("switch instruction RVA overflow"))?;
for &target in targets {
let offset = i64::from(target);
#[allow(clippy::cast_sign_loss)]
let abs_target = next_instruction_rva.cast_signed().wrapping_add(offset) as u64;
instruction.branch_targets.push(abs_target);
}
}
}
_ => {}
}
Ok(instruction)
}
#[cfg(test)]
mod tests {
use crate::{
assembly::{
decode_blocks, decode_instruction, decode_stream, FlowType, Immediate,
InstructionCategory, Operand,
},
Parser,
};
#[test]
fn decode_instruction_basic() {
let mut parser = Parser::new(&[0x11, 0x10]);
let rva = 0x1000;
let result = decode_instruction(&mut parser, rva).unwrap();
assert_eq!(result.rva, rva);
assert_eq!(result.offset, 0);
assert_eq!(result.size, 2);
assert_eq!(result.opcode, 0x11);
assert_eq!(result.prefix, 0);
assert_eq!(result.mnemonic, "ldloc.s");
assert_eq!(result.category, InstructionCategory::LoadStore);
assert_eq!(result.flow_type, FlowType::Sequential);
match &result.operand {
Operand::Immediate(Immediate::UInt8(val)) => assert_eq!(*val, 0x10),
_ => panic!("Expected Operand::Immediate(Immediate::UInt8)"),
}
}
#[test]
fn decode_instruction_two_byte() {
let mut parser = Parser::new(&[0xFE, 0x01]);
let rva = 0x1000;
let result = decode_instruction(&mut parser, rva).unwrap();
assert_eq!(result.opcode, 0x01);
assert_eq!(result.prefix, 0xFE);
assert_eq!(result.mnemonic, "ceq");
assert_eq!(result.category, InstructionCategory::Comparison);
assert_eq!(result.flow_type, FlowType::Sequential);
}
#[test]
fn decode_instruction_branch() {
let mut parser = Parser::new(&[0x2B, 0x0A]);
let rva = 0x1000;
let result = decode_instruction(&mut parser, rva).unwrap();
assert_eq!(result.mnemonic, "br.s");
assert_eq!(result.flow_type, FlowType::UnconditionalBranch);
assert_eq!(result.branch_targets.len(), 1);
assert_eq!(result.branch_targets[0], 0x100C); }
#[test]
fn decode_instruction_switch() {
let mut parser = Parser::new(&[
0x45, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
]);
let rva = 0x1000;
let result = decode_instruction(&mut parser, rva).unwrap();
assert_eq!(result.mnemonic, "switch");
assert_eq!(result.flow_type, FlowType::Switch);
assert_eq!(result.branch_targets.len(), 2);
assert_eq!(result.branch_targets[0], 0x1017); assert_eq!(result.branch_targets[1], 0x1021); }
#[test]
fn decode_instruction_invalid_opcode() {
let mut parser = Parser::new(&[0xFF, 0xFF]);
let rva = 0x1000;
let result = decode_instruction(&mut parser, rva);
assert!(result.is_err(), "Expected error for invalid opcode");
}
#[test]
fn decode_instruction_token() {
let mut parser = Parser::new(&[0xD0, 0x01, 0x00, 0x00, 0x02]);
let rva = 0x1000;
let result = decode_instruction(&mut parser, rva).unwrap();
assert_eq!(result.mnemonic, "ldtoken");
match &result.operand {
Operand::Token(token) => assert_eq!(token.value(), 0x02000001),
_ => panic!("Expected Operand::Token"),
}
}
#[test]
fn decode_stream_complex() {
let code = vec![
0x00, 0x2C, 0x05, 0x00, 0x2B, 0x03, 0x00, 0x2A, 0x00, 0x2A, ];
let mut parser = Parser::new(&code);
let result = decode_stream(&mut parser, 0x1000).unwrap();
assert_eq!(result.len(), 8);
}
#[test]
fn decode_blocks_simple() {
let code = [0x00, 0x2A]; let result = super::decode_blocks(&code, 0, 0x1000, None).unwrap();
assert_eq!(
result.len(),
1,
"Expected single basic block for linear code"
);
assert_eq!(
result[0].instructions.len(),
2,
"Expected 2 instructions in block"
);
assert_eq!(result[0].rva, 0x1000, "Expected correct starting RVA");
}
#[test]
fn decode_blocks_with_conditional_branch() {
let code = [
0x00, 0x2C, 0x02, 0x2A, 0x2A, ];
let result = super::decode_blocks(&code, 0, 0x1000, None).unwrap();
assert!(
result.len() >= 2,
"Expected multiple basic blocks due to branching"
);
let first_block = &result[0];
assert_eq!(
first_block.instructions.len(),
2,
"First block should have nop + brfalse.s"
);
assert_eq!(first_block.instructions[0].mnemonic, "nop");
assert_eq!(first_block.instructions[1].mnemonic, "brfalse.s");
}
#[test]
fn decode_blocks_with_unconditional_branch() {
let code = [
0x00, 0x2B, 0x01, 0x2A, 0x2A, ];
let result = super::decode_blocks(&code, 0, 0x1000, None).unwrap();
assert!(
result.len() >= 2,
"Expected multiple basic blocks due to branching, got {}",
result.len()
);
let first_block = &result[0];
assert_eq!(
first_block.instructions.len(),
2,
"First block should have nop + br.s"
);
assert_eq!(first_block.instructions[1].mnemonic, "br.s");
}
#[test]
fn decode_blocks_with_switch() {
let code = [
0x00, 0x45, 0x02, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x02, 0x00, 0x00,
0x00, 0x2A, 0x2A, ];
let result = super::decode_blocks(&code, 0, 0x1000, None).unwrap();
assert!(
result.len() >= 2,
"Expected multiple basic blocks due to switch"
);
let first_block = &result[0];
assert_eq!(first_block.instructions.len(), 2);
assert_eq!(first_block.instructions[0].mnemonic, "nop");
assert_eq!(first_block.instructions[1].mnemonic, "switch");
}
#[test]
fn decode_blocks_with_offset() {
let code = [
0xFF, 0xFF, 0xFF, 0x00, 0x2A, ];
let result = super::decode_blocks(&code, 3, 0x1000, None).unwrap();
assert_eq!(result.len(), 1, "Expected single basic block");
assert_eq!(result[0].instructions.len(), 2, "Expected 2 instructions");
assert_eq!(result[0].instructions[0].mnemonic, "nop");
assert_eq!(result[0].instructions[1].mnemonic, "ret");
}
#[test]
fn decode_blocks_with_max_size() {
let code = [
0x00, 0x2A, 0x00, 0x2A, ];
let result = super::decode_blocks(&code, 0, 0x1000, Some(2)).unwrap();
assert_eq!(result.len(), 1, "Expected single basic block");
assert_eq!(
result[0].instructions.len(),
2,
"Expected only 2 instructions due to max_size"
);
assert_eq!(result[0].instructions[0].mnemonic, "nop");
assert_eq!(result[0].instructions[1].mnemonic, "ret");
}
#[test]
fn decode_blocks_invalid_offset() {
let code = [0x00, 0x2A];
let result = super::decode_blocks(&code, 10, 0x1000, None);
assert!(result.is_err(), "Expected error for invalid offset");
}
#[test]
fn decode_blocks_empty_data() {
let code = [];
let result = super::decode_blocks(&code, 0, 0x1000, None);
assert!(
result.is_err(),
"Expected error for empty data with offset 0"
);
}
#[test]
fn decode_invalid_fe_instruction() {
let code = [0xFE, 0xFF]; let mut parser = Parser::new(&code);
let result = decode_instruction(&mut parser, 0x1000);
assert!(result.is_err());
}
#[test]
fn decode_blocks_offset_out_of_bounds() {
let code = [0x00, 0x2A]; let result = decode_blocks(&code, 10, 0x1000, None); assert!(result.is_err());
}
#[test]
fn decode_empty_data() {
let code = [];
let result = decode_blocks(&code, 0, 0x1000, None);
if let Ok(blocks) = result {
assert!(blocks.is_empty());
}
}
#[test]
fn decode_blocks_conditional_fall_through() {
let code = [
0x16, 0x31, 0x05, 0x06, 0x16, 0x2B, 0x01, 0x16, 0x0B, 0x2A, ];
let result = decode_blocks(&code, 0, 0x1000, None);
assert!(result.is_ok(), "decode_blocks failed: {:?}", result.err());
let blocks = result.unwrap();
eprintln!("=== Decoded blocks ===");
for block in &blocks {
eprintln!(
"Block {}: RVA 0x{:04X}, {} instructions",
block.id,
block.rva,
block.instructions.len()
);
for instr in &block.instructions {
eprintln!(" 0x{:04X}: {}", instr.offset, instr.mnemonic);
}
}
let has_fall_through = blocks.iter().any(|b| {
b.instructions.iter().any(|i| i.offset == 0x0003) });
assert!(
has_fall_through,
"Fall-through instruction at IL offset 0x03 was not decoded!"
);
let all_offsets: Vec<u64> = blocks
.iter()
.flat_map(|b| b.instructions.iter().map(|i| i.offset))
.collect();
eprintln!("All instruction offsets: {:?}", all_offsets);
assert!(all_offsets.contains(&0x0001), "ble.s at IL 0x01 missing");
assert!(
all_offsets.contains(&0x0003),
"fall-through ldloc.0 at IL 0x03 missing"
);
assert!(
all_offsets.contains(&0x0008),
"branch target at IL 0x08 missing"
);
}
#[test]
fn decode_instruction_uint8_operand() {
let mut parser = Parser::new(&[0x11, 0xFF]); let rva = 0x1000;
let result = decode_instruction(&mut parser, rva).unwrap();
match &result.operand {
Operand::Immediate(Immediate::UInt8(val)) => assert_eq!(*val, 255), _ => panic!("Expected Operand::Immediate(Immediate::UInt8)"),
}
}
#[test]
fn decode_instruction_uint16_operand() {
}
#[test]
fn decode_instruction_int16_operand() {
let mut parser = Parser::new(&[0xFE, 0x09, 0xFF, 0xFF]); let rva = 0x1000;
let result = decode_instruction(&mut parser, rva).unwrap();
assert_eq!(result.mnemonic, "ldarg");
match &result.operand {
Operand::Immediate(Immediate::Int16(val)) => assert_eq!(*val, -1),
_ => panic!("Expected Operand::Immediate(Immediate::Int16)"),
}
}
#[test]
fn decode_instruction_uint32_operand() {
let mut parser = Parser::new(&[
0x45, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, ]);
let rva = 0x1000;
let result = decode_instruction(&mut parser, rva).unwrap();
assert_eq!(result.mnemonic, "switch");
assert_eq!(result.flow_type, FlowType::Switch);
assert_eq!(result.branch_targets.len(), 1);
}
#[test]
fn decode_instruction_uint64_operand() {
let mut parser = Parser::new(&[0x21, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF]); let rva = 0x1000;
let result = decode_instruction(&mut parser, rva).unwrap();
assert_eq!(result.mnemonic, "ldc.i8");
match &result.operand {
Operand::Immediate(Immediate::Int64(val)) => assert_eq!(*val, -1),
_ => panic!("Expected Operand::Immediate(Immediate::Int64)"),
}
}
#[test]
fn decode_bounds_error() {
let data = [0x00];
let result = decode_blocks(&data, 10, 0x1000, None);
assert!(result.is_err());
}
#[test]
fn decode_blocks_access() {
let data = [0x00, 0x2A];
let blocks = decode_blocks(&data, 0, 0x1000, None).unwrap();
assert!(!blocks.is_empty());
assert_eq!(blocks.len(), 1); }
#[test]
fn decode_blocks_basic_coverage() {
let data = [
0x00, 0x2A, ];
let blocks = decode_blocks(&data, 0, 0x1000, Some(2)).unwrap();
assert!(!blocks.is_empty());
assert_eq!(blocks.len(), 1);
let block = &blocks[0];
assert_eq!(block.rva, 0x1000);
assert_eq!(block.offset, 0);
assert!(block.size > 0);
}
#[test]
fn decode_blocks_max_size_limit() {
let data = [0x00, 0x00, 0x00, 0x2A];
let blocks = decode_blocks(&data, 0, 0x1000, Some(2)).unwrap();
assert!(!blocks.is_empty());
let total_size: usize = blocks.iter().map(|b| b.size).sum();
assert!(total_size <= 2);
}
#[test]
fn decode_stream_empty() {
let data = [];
let mut parser = Parser::new(&data);
let result = decode_stream(&mut parser, 0x1000).unwrap();
assert_eq!(result.len(), 0);
}
#[test]
fn decode_blocks_invalid_method_body() {
let data = [0x00];
let result = decode_blocks(&data, 0, 0x1000, None);
if let Ok(blocks) = result {
assert!(!blocks.is_empty());
}
}
#[test]
fn decode_instruction_leave_s() {
let mut parser = Parser::new(&[0xDE, 0x05]); let rva = 0x1000;
let result = decode_instruction(&mut parser, rva).unwrap();
assert_eq!(result.mnemonic, "leave.s");
assert_eq!(result.flow_type, FlowType::Leave);
assert_eq!(result.size, 2);
assert_eq!(result.branch_targets.len(), 1);
assert_eq!(result.branch_targets[0], 0x1007);
}
#[test]
fn decode_instruction_leave() {
let mut parser = Parser::new(&[0xDD, 0x0A, 0x00, 0x00, 0x00]); let rva = 0x1000;
let result = decode_instruction(&mut parser, rva).unwrap();
assert_eq!(result.mnemonic, "leave");
assert_eq!(result.flow_type, FlowType::Leave);
assert_eq!(result.size, 5);
assert_eq!(result.branch_targets.len(), 1);
assert_eq!(result.branch_targets[0], 0x100F);
}
#[test]
fn decode_blocks_with_leave() {
let code = [
0x00, 0xDE, 0x01, 0x00, 0x2A, ];
let blocks = decode_blocks(&code, 0, 0x1000, None).unwrap();
assert!(
blocks.len() >= 2,
"Expected at least 2 blocks, got {}",
blocks.len()
);
let leave_block = blocks.iter().find(|b| {
b.instructions
.last()
.is_some_and(|i| i.mnemonic == "leave.s")
});
assert!(
leave_block.is_some(),
"Should have a block ending with leave.s"
);
let leave_block = leave_block.unwrap();
assert!(
!leave_block.successors.is_empty(),
"Block ending with leave.s should have successors"
);
}
}