use super::{categorize_instruction, DisassemblyConfig};
use crate::{
types::{Architecture, ControlFlow as FlowType, Instruction},
BinaryError, Result,
};
use iced_x86::*;
pub fn disassemble(
data: &[u8],
address: u64,
architecture: Architecture,
config: &DisassemblyConfig,
) -> Result<Vec<Instruction>> {
let bitness = match architecture {
Architecture::X86 => 32,
Architecture::X86_64 => 64,
_ => {
return Err(BinaryError::unsupported_arch(format!(
"iced-x86 only supports x86/x64, got {:?}",
architecture
)));
}
};
let mut decoder = create_decoder(bitness, data, address)?;
let mut formatter = create_formatter();
let mut result = Vec::new();
let max_instructions = config.max_instructions;
let mut instr = iced_x86::Instruction::default();
let mut count = 0;
while decoder.can_decode() && count < max_instructions {
decoder.decode_out(&mut instr);
if config.skip_invalid && instr.code() == Code::INVALID {
continue;
}
let mut output = String::new();
formatter.format(&instr, &mut output);
let (mnemonic, operands) = parse_formatted_instruction(&output);
let category = categorize_instruction(&mnemonic);
let flow = if config.analyze_control_flow {
analyze_iced_control_flow(&instr, &operands)
} else {
FlowType::Sequential
};
let instruction_bytes = data
[((instr.ip() - address) as usize)..((instr.ip() - address) as usize + instr.len())]
.to_vec();
let instruction = Instruction {
address: instr.ip(),
bytes: instruction_bytes,
mnemonic,
operands,
category,
flow,
size: instr.len(),
};
result.push(instruction);
count += 1;
}
Ok(result)
}
fn create_decoder(bitness: u32, data: &[u8], address: u64) -> Result<Decoder<'_>> {
let decoder_options = DecoderOptions::NONE;
let decoder = match bitness {
16 => Decoder::with_ip(16, data, address, decoder_options),
32 => Decoder::with_ip(32, data, address, decoder_options),
64 => Decoder::with_ip(64, data, address, decoder_options),
_ => {
return Err(BinaryError::unsupported_arch(format!(
"Unsupported bitness: {}",
bitness
)));
}
};
Ok(decoder)
}
fn create_formatter() -> NasmFormatter {
NasmFormatter::new()
}
fn parse_formatted_instruction(formatted: &str) -> (String, String) {
let parts: Vec<&str> = formatted.trim().splitn(2, ' ').collect();
let mnemonic = parts[0].to_string();
let operands = if parts.len() > 1 {
parts[1].to_string()
} else {
String::new()
};
(mnemonic, operands)
}
fn analyze_iced_control_flow(instr: &iced_x86::Instruction, _operands: &str) -> FlowType {
match instr.flow_control() {
FlowControl::Next => FlowType::Sequential,
FlowControl::UnconditionalBranch => {
if let Some(target) = get_branch_target(instr) {
FlowType::Jump(target)
} else {
FlowType::Unknown
}
}
FlowControl::ConditionalBranch => {
if let Some(target) = get_branch_target(instr) {
FlowType::ConditionalJump(target)
} else {
FlowType::Unknown
}
}
FlowControl::Call => {
if let Some(target) = get_branch_target(instr) {
FlowType::Call(target)
} else {
FlowType::Unknown
}
}
FlowControl::Return => FlowType::Return,
FlowControl::Interrupt => FlowType::Interrupt,
FlowControl::IndirectBranch | FlowControl::IndirectCall => FlowType::Unknown,
FlowControl::Exception => FlowType::Interrupt,
FlowControl::XbeginXabortXend => FlowType::Unknown,
}
}
fn get_branch_target(instr: &iced_x86::Instruction) -> Option<u64> {
for i in 0..instr.op_count() {
match instr.op_kind(i) {
OpKind::NearBranch16 | OpKind::NearBranch32 | OpKind::NearBranch64 => {
return Some(instr.near_branch_target());
}
_ => continue,
}
}
None
}
#[allow(dead_code)]
pub fn analyze_instruction_details(instr: &iced_x86::Instruction) -> InstructionDetails {
let mut operands = Vec::new();
let mut memory_accesses = Vec::new();
let mut registers_read = Vec::new();
let mut registers_written = Vec::new();
for i in 0..instr.op_count() {
let operand_info = format_operand_info(instr, i);
operands.push(operand_info);
if matches!(instr.op_kind(i), OpKind::Memory) {
memory_accesses.push(format!("mem_access_{}", i));
}
}
for i in 0..instr.op_count() {
if let OpKind::Register = instr.op_kind(i) {
let reg = instr.op_register(i);
let reg_name = format!("{:?}", reg);
registers_read.push(reg_name.clone());
registers_written.push(reg_name);
}
}
InstructionDetails {
operands,
memory_accesses,
registers_read,
registers_written,
encoding: format!("{:?}", instr.encoding()),
cpuid_features: get_cpuid_features(instr),
stack_pointer_increment: instr.stack_pointer_increment(),
}
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct InstructionDetails {
pub operands: Vec<String>,
pub memory_accesses: Vec<String>,
pub registers_read: Vec<String>,
pub registers_written: Vec<String>,
pub encoding: String,
pub cpuid_features: Vec<String>,
pub stack_pointer_increment: i32,
}
#[allow(dead_code)]
fn format_operand_info(instr: &iced_x86::Instruction, operand_index: u32) -> String {
match instr.op_kind(operand_index) {
OpKind::Register => {
format!("reg:{:?}", instr.op_register(operand_index))
}
OpKind::NearBranch16 | OpKind::NearBranch32 | OpKind::NearBranch64 => {
format!("branch:0x{:x}", instr.near_branch_target())
}
OpKind::FarBranch16 | OpKind::FarBranch32 => {
format!(
"far_branch:0x{:x}:0x{:x}",
instr.far_branch_selector(),
instr.far_branch32()
)
}
OpKind::Immediate8 => {
format!("imm8:0x{:x}", instr.immediate8())
}
OpKind::Immediate16 => {
format!("imm16:0x{:x}", instr.immediate16())
}
OpKind::Immediate32 => {
format!("imm32:0x{:x}", instr.immediate32())
}
OpKind::Immediate64 => {
format!("imm64:0x{:x}", instr.immediate64())
}
OpKind::Immediate8to16 => {
format!("imm8to16:0x{:x}", instr.immediate8to16())
}
OpKind::Immediate8to32 => {
format!("imm8to32:0x{:x}", instr.immediate8to32())
}
OpKind::Immediate8to64 => {
format!("imm8to64:0x{:x}", instr.immediate8to64())
}
OpKind::Immediate32to64 => {
format!("imm32to64:0x{:x}", instr.immediate32to64())
}
OpKind::Memory => {
format!(
"mem:[{:?}+{:?}*{}+0x{:x}]",
instr.memory_base(),
instr.memory_index(),
instr.memory_index_scale(),
instr.memory_displacement64()
)
}
_ => format!("operand_{}", operand_index),
}
}
#[allow(dead_code)]
fn get_cpuid_features(instr: &iced_x86::Instruction) -> Vec<String> {
let mut features = Vec::new();
for feature in instr.cpuid_features() {
features.push(format!("{:?}", feature));
}
features
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{ControlFlow as FlowType, InstructionCategory};
#[test]
fn test_iced_engine_x86_64() {
let config = DisassemblyConfig::default();
let data = &[0x90];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config);
assert!(result.is_ok());
let instructions = result.unwrap();
assert!(!instructions.is_empty());
assert_eq!(instructions[0].mnemonic, "nop");
assert_eq!(instructions[0].address, 0x1000);
}
#[test]
fn test_iced_engine_x86() {
let config = DisassemblyConfig::default();
let data = &[0x90];
let result = disassemble(data, 0x1000, Architecture::X86, &config);
assert!(result.is_ok());
let instructions = result.unwrap();
assert!(!instructions.is_empty());
}
#[test]
fn test_unsupported_architecture() {
let config = DisassemblyConfig::default();
let data = &[0x90];
let result = disassemble(data, 0x1000, Architecture::Arm, &config);
assert!(result.is_err());
}
#[test]
fn test_instruction_parsing() {
let (mnemonic, operands) = parse_formatted_instruction("mov eax, ebx");
assert_eq!(mnemonic, "mov");
assert_eq!(operands, "eax, ebx");
let (mnemonic, operands) = parse_formatted_instruction("nop");
assert_eq!(mnemonic, "nop");
assert_eq!(operands, "");
}
#[test]
fn test_arithmetic_instructions_x86_64() {
let config = DisassemblyConfig::default();
let data = &[0x01, 0xd8];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Arithmetic);
assert_eq!(result[0].mnemonic, "add");
let data = &[0x29, 0xd8];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Arithmetic);
assert_eq!(result[0].mnemonic, "sub");
let data = &[0xff, 0xc0];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Arithmetic);
assert_eq!(result[0].mnemonic, "inc");
let data = &[0xff, 0xc8];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Arithmetic);
assert_eq!(result[0].mnemonic, "dec");
}
#[test]
fn test_logic_instructions_x86_64() {
let config = DisassemblyConfig::default();
let data = &[0x21, 0xd8];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Logic);
assert_eq!(result[0].mnemonic, "and");
let data = &[0x09, 0xd8];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Logic);
assert_eq!(result[0].mnemonic, "or");
let data = &[0x31, 0xc0];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Logic);
assert_eq!(result[0].mnemonic, "xor");
}
#[test]
fn test_memory_instructions_x86_64() {
let config = DisassemblyConfig::default();
let data = &[0x89, 0xd8];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Memory);
assert_eq!(result[0].mnemonic, "mov");
let data = &[0x50];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Memory);
assert_eq!(result[0].mnemonic, "push");
let data = &[0x58];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Memory);
assert_eq!(result[0].mnemonic, "pop");
}
#[test]
fn test_control_instructions_x86_64() {
let config = DisassemblyConfig {
analyze_control_flow: true,
..DisassemblyConfig::default()
};
let data = &[0xeb, 0x05];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Control);
assert_eq!(result[0].mnemonic, "jmp");
if let FlowType::Jump(target) = result[0].flow {
assert_eq!(target, 0x1007); } else {
panic!("Expected Jump flow type");
}
let data = &[0x74, 0x03];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Control);
assert_eq!(result[0].mnemonic, "je");
if let FlowType::ConditionalJump(target) = result[0].flow {
assert_eq!(target, 0x1005); } else {
panic!("Expected ConditionalJump flow type");
}
let data = &[0xe8, 0x00, 0x00, 0x00, 0x00];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Control);
assert_eq!(result[0].mnemonic, "call");
if let FlowType::Call(target) = result[0].flow {
assert_eq!(target, 0x1005); } else {
panic!("Expected Call flow type");
}
let data = &[0xc3];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Control);
assert_eq!(result[0].mnemonic, "ret");
assert_eq!(result[0].flow, FlowType::Return);
}
#[test]
fn test_system_instructions_x86_64() {
let config = DisassemblyConfig {
analyze_control_flow: true,
..DisassemblyConfig::default()
};
let data = &[0xcc];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::System);
assert_eq!(result[0].mnemonic, "int3");
assert_eq!(result[0].flow, FlowType::Interrupt);
let data = &[0x0f, 0x05];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::System);
assert_eq!(result[0].mnemonic, "syscall");
assert!(matches!(
result[0].flow,
FlowType::Interrupt | FlowType::Unknown
));
}
#[test]
fn test_float_instructions_x86_64() {
let config = DisassemblyConfig::default();
let data = &[0xd8, 0xc1];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Float);
assert!(result[0].mnemonic.starts_with("fadd"));
}
#[test]
fn test_control_flow_analysis_disabled() {
let config = DisassemblyConfig {
analyze_control_flow: false,
..DisassemblyConfig::default()
};
let data = &[0xeb, 0x05];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].flow, FlowType::Sequential);
}
#[test]
fn test_skip_invalid_instructions() {
let config = DisassemblyConfig {
skip_invalid: true,
..DisassemblyConfig::default()
};
let data = &[0x90, 0xff, 0xff, 0xff, 0x90]; let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert!(!result.is_empty());
assert_eq!(result[0].mnemonic, "nop");
}
#[test]
fn test_max_instructions_limit() {
let config = DisassemblyConfig {
max_instructions: 2,
..DisassemblyConfig::default()
};
let data = &[0x90, 0x90, 0x90, 0x90, 0x90]; let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].mnemonic, "nop");
assert_eq!(result[1].mnemonic, "nop");
}
#[test]
fn test_empty_data() {
let config = DisassemblyConfig::default();
let data = &[];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result.len(), 0);
}
#[test]
fn test_single_byte_instruction() {
let config = DisassemblyConfig::default();
let data = &[0x90];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].mnemonic, "nop");
assert_eq!(result[0].size, 1);
assert_eq!(result[0].bytes, vec![0x90]);
}
#[test]
fn test_multi_byte_instruction() {
let config = DisassemblyConfig::default();
let data = &[0xb8, 0x00, 0x10, 0x00, 0x00];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].mnemonic, "mov");
assert_eq!(result[0].size, 5);
assert_eq!(result[0].bytes, vec![0xb8, 0x00, 0x10, 0x00, 0x00]);
}
#[test]
fn test_instruction_addressing() {
let config = DisassemblyConfig::default();
let base_addr = 0x401000;
let data = &[0x90, 0x90, 0x90]; let result = disassemble(data, base_addr, Architecture::X86_64, &config).unwrap();
assert_eq!(result.len(), 3);
assert_eq!(result[0].address, base_addr);
assert_eq!(result[1].address, base_addr + 1);
assert_eq!(result[2].address, base_addr + 2);
}
#[test]
fn test_x86_32bit_mode() {
let config = DisassemblyConfig::default();
let data = &[0x90]; let result = disassemble(data, 0x1000, Architecture::X86, &config).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].mnemonic, "nop");
}
#[test]
fn test_create_decoder_bitness() {
let result = create_decoder(16, &[0x90], 0x1000);
assert!(result.is_ok());
let result = create_decoder(32, &[0x90], 0x1000);
assert!(result.is_ok());
let result = create_decoder(64, &[0x90], 0x1000);
assert!(result.is_ok());
let result = create_decoder(128, &[0x90], 0x1000);
assert!(result.is_err());
}
#[test]
fn test_create_formatter() {
let formatter = create_formatter();
assert!(std::mem::size_of_val(&formatter) > 0);
}
#[test]
fn test_parse_formatted_instruction_edge_cases() {
let (mnemonic, operands) = parse_formatted_instruction("add eax, ebx, ecx");
assert_eq!(mnemonic, "add");
assert_eq!(operands, "eax, ebx, ecx");
let (mnemonic, operands) = parse_formatted_instruction(" mov eax, ebx ");
assert_eq!(mnemonic, "mov");
assert_eq!(operands, " eax, ebx");
let (mnemonic, operands) = parse_formatted_instruction("");
assert_eq!(mnemonic, "");
assert_eq!(operands, "");
let (mnemonic, operands) = parse_formatted_instruction(" ret ");
assert_eq!(mnemonic, "ret");
assert_eq!(operands, "");
}
#[test]
fn test_analyze_iced_control_flow_variants() {
use iced_x86::*;
let mut decoder = Decoder::with_ip(64, &[0x90], 0x1000, DecoderOptions::NONE);
let mut instr = Instruction::default();
decoder.decode_out(&mut instr);
let flow = analyze_iced_control_flow(&instr, "");
assert_eq!(flow, FlowType::Sequential); }
#[test]
fn test_get_branch_target() {
use iced_x86::*;
let data = &[0xeb, 0x05]; let mut decoder = Decoder::with_ip(64, data, 0x1000, DecoderOptions::NONE);
let mut instr = Instruction::default();
decoder.decode_out(&mut instr);
let target = get_branch_target(&instr);
assert!(target.is_some());
assert_eq!(target.unwrap(), 0x1007);
let data = &[0x90]; let mut decoder = Decoder::with_ip(64, data, 0x1000, DecoderOptions::NONE);
let mut instr = Instruction::default();
decoder.decode_out(&mut instr);
let target = get_branch_target(&instr);
assert!(target.is_none());
}
#[test]
fn test_analyze_instruction_details() {
use iced_x86::*;
let data = &[0x89, 0xd8];
let mut decoder = Decoder::with_ip(64, data, 0x1000, DecoderOptions::NONE);
let mut instr = Instruction::default();
decoder.decode_out(&mut instr);
let details = analyze_instruction_details(&instr);
assert!(!details.operands.is_empty());
assert!(!details.encoding.is_empty());
}
#[test]
fn test_format_operand_info() {
use iced_x86::*;
let data = &[0x89, 0xd8]; let mut decoder = Decoder::with_ip(64, data, 0x1000, DecoderOptions::NONE);
let mut instr = Instruction::default();
decoder.decode_out(&mut instr);
let operand_info = format_operand_info(&instr, 0);
assert!(operand_info.contains("reg:"));
let data = &[0xb8, 0x10, 0x00, 0x00, 0x00]; let mut decoder = Decoder::with_ip(64, data, 0x1000, DecoderOptions::NONE);
let mut instr = Instruction::default();
decoder.decode_out(&mut instr);
let operand_info = format_operand_info(&instr, 1);
assert!(operand_info.contains("imm"));
}
#[test]
fn test_get_cpuid_features() {
use iced_x86::*;
let data = &[0x90]; let mut decoder = Decoder::with_ip(64, data, 0x1000, DecoderOptions::NONE);
let mut instr = Instruction::default();
decoder.decode_out(&mut instr);
let features = get_cpuid_features(&instr);
assert!(features.is_empty() || !features.is_empty()); }
#[test]
fn test_instruction_details_struct() {
let details = InstructionDetails {
operands: vec!["eax".to_string(), "ebx".to_string()],
memory_accesses: vec!["mem_access_0".to_string()],
registers_read: vec!["eax".to_string()],
registers_written: vec!["eax".to_string()],
encoding: "Legacy".to_string(),
cpuid_features: vec!["FPU".to_string()],
stack_pointer_increment: 0,
};
assert_eq!(details.operands.len(), 2);
assert_eq!(details.memory_accesses.len(), 1);
assert_eq!(details.stack_pointer_increment, 0);
}
#[test]
fn test_complex_instruction_sequence() {
let config = DisassemblyConfig {
analyze_control_flow: true,
..DisassemblyConfig::default()
};
let data = &[0x55, 0x89, 0xe5, 0xc3]; let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result.len(), 3);
assert_eq!(result[0].mnemonic, "push");
assert_eq!(result[1].mnemonic, "mov");
assert_eq!(result[2].mnemonic, "ret");
assert_eq!(result[2].flow, FlowType::Return);
}
#[test]
fn test_vector_instructions() {
let config = DisassemblyConfig::default();
let data = &[0x0f, 0x10, 0xc1];
let result = disassemble(data, 0x1000, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].category, InstructionCategory::Memory); assert_eq!(result[0].mnemonic, "movups");
}
#[test]
fn test_instruction_bytes_accuracy() {
let config = DisassemblyConfig::default();
let base_addr = 0x1000;
let test_cases = vec![
(&[0x90][..], 1), (&[0x89, 0xd8][..], 2), (&[0x0f, 0x10, 0xc1][..], 3), (&[0xb8, 0x00, 0x10, 0x00, 0x00][..], 5), ];
for (data, expected_size) in test_cases {
let result = disassemble(data, base_addr, Architecture::X86_64, &config).unwrap();
assert_eq!(result[0].size, expected_size);
assert_eq!(result[0].bytes.len(), expected_size);
assert_eq!(result[0].bytes, data);
}
}
}