use crate::{
types::{Architecture, Instruction, InstructionCategory},
AnalysisConfig, BinaryError, BinaryFile, Result,
};
#[cfg(feature = "disasm-capstone")]
use crate::types::ControlFlow as FlowType;
#[cfg(feature = "disasm-capstone")]
mod capstone_engine;
#[cfg(feature = "disasm-iced")]
mod iced_engine;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DisassemblyEngine {
#[cfg(feature = "disasm-capstone")]
Capstone,
#[cfg(feature = "disasm-iced")]
Iced,
Auto,
}
#[derive(Debug, Clone)]
pub struct DisassemblyConfig {
pub engine: DisassemblyEngine,
pub max_instructions: usize,
pub detailed: bool,
pub analyze_control_flow: bool,
pub skip_invalid: bool,
}
impl Default for DisassemblyConfig {
fn default() -> Self {
Self {
engine: DisassemblyEngine::Auto,
max_instructions: 10000,
detailed: true,
analyze_control_flow: true,
skip_invalid: true,
}
}
}
pub struct Disassembler {
config: DisassemblyConfig,
architecture: Architecture,
}
impl Disassembler {
pub fn new(architecture: Architecture) -> Result<Self> {
Ok(Self {
config: DisassemblyConfig::default(),
architecture,
})
}
pub fn with_config(architecture: Architecture, config: DisassemblyConfig) -> Result<Self> {
Ok(Self {
config,
architecture,
})
}
pub fn disassemble(&self, data: &[u8], address: u64) -> Result<Vec<Instruction>> {
let engine = self.select_engine()?;
match engine {
#[cfg(feature = "disasm-capstone")]
DisassemblyEngine::Capstone => {
capstone_engine::disassemble(data, address, self.architecture, &self.config)
}
#[cfg(feature = "disasm-iced")]
DisassemblyEngine::Iced => {
iced_engine::disassemble(data, address, self.architecture, &self.config)
}
DisassemblyEngine::Auto => {
#[cfg(feature = "disasm-capstone")]
{
capstone_engine::disassemble(data, address, self.architecture, &self.config)
}
#[cfg(all(feature = "disasm-iced", not(feature = "disasm-capstone")))]
{
iced_engine::disassemble(data, address, self.architecture, &self.config)
}
#[cfg(not(any(feature = "disasm-capstone", feature = "disasm-iced")))]
{
Err(BinaryError::feature_not_available(
"No disassembly engine available. Enable 'disasm-capstone' or 'disasm-iced' feature.",
))
}
}
}
}
pub fn disassemble_section(
&self,
binary: &BinaryFile,
section_name: &str,
) -> Result<Vec<Instruction>> {
for section in binary.sections() {
if section.name == section_name {
if let Some(data) = §ion.data {
return self.disassemble(data, section.address);
} else {
return Err(BinaryError::invalid_data(
"Section data not available for disassembly",
));
}
}
}
Err(BinaryError::invalid_data(format!(
"Section '{}' not found",
section_name
)))
}
pub fn disassemble_at(
&self,
data: &[u8],
address: u64,
length: usize,
) -> Result<Vec<Instruction>> {
if data.len() < length {
return Err(BinaryError::invalid_data(
"Insufficient data for disassembly",
));
}
self.disassemble(&data[..length], address)
}
fn select_engine(&self) -> Result<DisassemblyEngine> {
match self.config.engine {
#[cfg(feature = "disasm-capstone")]
DisassemblyEngine::Capstone => Ok(DisassemblyEngine::Capstone),
#[cfg(feature = "disasm-iced")]
DisassemblyEngine::Iced => Ok(DisassemblyEngine::Iced),
DisassemblyEngine::Auto => {
match self.architecture {
Architecture::X86 | Architecture::X86_64 => {
#[cfg(feature = "disasm-iced")]
{
Ok(DisassemblyEngine::Iced)
}
#[cfg(all(feature = "disasm-capstone", not(feature = "disasm-iced")))]
{
Ok(DisassemblyEngine::Capstone)
}
#[cfg(not(any(feature = "disasm-capstone", feature = "disasm-iced")))]
{
Err(BinaryError::feature_not_available(
"No disassembly engine available",
))
}
}
_ => {
#[cfg(feature = "disasm-capstone")]
{
Ok(DisassemblyEngine::Capstone)
}
#[cfg(not(feature = "disasm-capstone"))]
{
Err(BinaryError::unsupported_arch(format!(
"Architecture {:?} requires Capstone engine",
self.architecture
)))
}
}
}
}
}
}
}
pub fn disassemble_binary(
binary: &BinaryFile,
config: &AnalysisConfig,
) -> Result<Vec<Instruction>> {
let disasm_config = DisassemblyConfig {
engine: config.disassembly_engine,
max_instructions: config.max_analysis_size / 16, detailed: true,
analyze_control_flow: true,
skip_invalid: true,
};
let disassembler = Disassembler::with_config(binary.architecture(), disasm_config)?;
let mut all_instructions = Vec::new();
for section in binary.sections() {
if section.permissions.execute {
if let Some(data) = §ion.data {
match disassembler.disassemble(data, section.address) {
Ok(mut instructions) => {
all_instructions.append(&mut instructions);
}
Err(_) => {
continue;
}
}
}
}
}
Ok(all_instructions)
}
fn categorize_instruction(mnemonic: &str) -> InstructionCategory {
let mnemonic_lower = mnemonic.to_lowercase();
if mnemonic_lower.starts_with("add")
|| mnemonic_lower.starts_with("sub")
|| mnemonic_lower.starts_with("mul")
|| mnemonic_lower.starts_with("div")
|| mnemonic_lower.starts_with("inc")
|| mnemonic_lower.starts_with("dec")
{
InstructionCategory::Arithmetic
} else if mnemonic_lower.starts_with("and")
|| mnemonic_lower.starts_with("or")
|| mnemonic_lower.starts_with("xor")
|| mnemonic_lower.starts_with("not")
|| mnemonic_lower.starts_with("shl")
|| mnemonic_lower.starts_with("shr")
{
InstructionCategory::Logic
} else if mnemonic_lower.starts_with("mov")
|| mnemonic_lower.starts_with("lea")
|| mnemonic_lower.starts_with("push")
|| mnemonic_lower.starts_with("pop")
|| mnemonic_lower.starts_with("load")
|| mnemonic_lower.starts_with("store")
{
InstructionCategory::Memory
} else if mnemonic_lower.starts_with("jmp")
|| mnemonic_lower.starts_with("je")
|| mnemonic_lower.starts_with("jne")
|| mnemonic_lower.starts_with("jz")
|| mnemonic_lower.starts_with("jnz")
|| mnemonic_lower.starts_with("call")
|| mnemonic_lower.starts_with("ret")
|| mnemonic_lower.starts_with("br")
|| mnemonic_lower.starts_with("bl")
{
InstructionCategory::Control
} else if mnemonic_lower.starts_with("int")
|| mnemonic_lower.starts_with("syscall")
|| mnemonic_lower.starts_with("sysenter")
|| mnemonic_lower.starts_with("sysexit")
{
InstructionCategory::System
} else if mnemonic_lower.contains("aes")
|| mnemonic_lower.contains("sha")
|| mnemonic_lower.contains("crypto")
{
InstructionCategory::Crypto
} else if mnemonic_lower.starts_with("fadd")
|| mnemonic_lower.starts_with("fsub")
|| mnemonic_lower.starts_with("fmul")
|| mnemonic_lower.starts_with("fdiv")
{
InstructionCategory::Float
} else if mnemonic_lower.contains("xmm")
|| mnemonic_lower.contains("ymm")
|| mnemonic_lower.contains("zmm")
|| mnemonic_lower.starts_with("v")
{
InstructionCategory::Vector
} else {
InstructionCategory::Unknown
}
}
#[cfg(feature = "disasm-capstone")]
fn analyze_control_flow(mnemonic: &str, operands: &str) -> FlowType {
let mnemonic_lower = mnemonic.to_lowercase();
if mnemonic_lower == "ret" || mnemonic_lower == "retn" {
FlowType::Return
} else if mnemonic_lower == "call" {
if let Some(addr) = extract_address_from_operands(operands) {
FlowType::Call(addr)
} else {
FlowType::Unknown }
} else if mnemonic_lower.starts_with("jmp") {
if let Some(addr) = extract_address_from_operands(operands) {
FlowType::Jump(addr)
} else {
FlowType::Unknown }
} else if mnemonic_lower.starts_with('j') && mnemonic_lower.len() > 1 {
if let Some(addr) = extract_address_from_operands(operands) {
FlowType::ConditionalJump(addr)
} else {
FlowType::Unknown }
} else if mnemonic_lower == "int" || mnemonic_lower == "syscall" {
FlowType::Interrupt
} else {
FlowType::Sequential
}
}
#[cfg(feature = "disasm-capstone")]
fn extract_address_from_operands(operands: &str) -> Option<u64> {
if let Some(hex_part) = operands.strip_prefix("0x") {
if let Ok(addr) = u64::from_str_radix(hex_part, 16) {
return Some(addr);
}
}
if let Ok(addr) = operands.parse::<u64>() {
return Some(addr);
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::*;
#[test]
fn test_disassembler_creation() {
let result = Disassembler::new(Architecture::X86_64);
assert!(result.is_ok());
}
#[test]
fn test_config_default() {
let config = DisassemblyConfig::default();
assert_eq!(config.engine, DisassemblyEngine::Auto);
assert_eq!(config.max_instructions, 10000);
assert!(config.detailed);
assert!(config.analyze_control_flow);
}
#[test]
fn test_instruction_categorization() {
assert_eq!(
categorize_instruction("add"),
InstructionCategory::Arithmetic
);
assert_eq!(categorize_instruction("mov"), InstructionCategory::Memory);
assert_eq!(categorize_instruction("jmp"), InstructionCategory::Control);
assert_eq!(categorize_instruction("and"), InstructionCategory::Logic);
assert_eq!(
categorize_instruction("syscall"),
InstructionCategory::System
);
}
#[test]
#[cfg(feature = "disasm-capstone")]
fn test_control_flow_analysis() {
assert_eq!(analyze_control_flow("ret", ""), FlowType::Return);
assert_eq!(
analyze_control_flow("call", "0x1000"),
FlowType::Call(0x1000)
);
assert_eq!(
analyze_control_flow("jmp", "0x2000"),
FlowType::Jump(0x2000)
);
assert_eq!(
analyze_control_flow("je", "0x3000"),
FlowType::ConditionalJump(0x3000)
);
assert_eq!(
analyze_control_flow("mov", "eax, ebx"),
FlowType::Sequential
);
}
#[test]
#[cfg(feature = "disasm-capstone")]
fn test_address_extraction() {
assert_eq!(extract_address_from_operands("0x1000"), Some(0x1000));
assert_eq!(extract_address_from_operands("4096"), Some(4096));
assert_eq!(extract_address_from_operands("eax"), None);
}
}