use crate::{BinaryInfo, DisassemblyReport, DisassemblyResult, FileArchitecture, Result};
use iced_x86::{FlowControl, Formatter, IntelFormatter, Mnemonic, OpKind, Register};
use std::collections::HashMap;
#[must_use]
pub fn capstone_compat_formatter() -> IntelFormatter {
let mut fmt = IntelFormatter::new();
let opts = fmt.options_mut();
opts.set_hex_prefix("0x");
opts.set_hex_suffix("");
opts.set_uppercase_hex(false);
opts.set_small_hex_numbers_in_decimal(false);
opts.set_add_leading_zero_to_hex_numbers(false);
opts.set_space_after_operand_separator(true);
opts.set_space_between_memory_add_operators(true);
opts.set_space_between_memory_mul_operators(false);
opts.set_uppercase_mnemonics(false);
opts.set_uppercase_registers(false);
opts.set_uppercase_keywords(false);
opts.set_uppercase_decorators(false);
opts.set_uppercase_prefixes(false);
opts.set_memory_size_options(iced_x86::MemorySizeOptions::Always);
fmt
}
#[derive(Debug, Clone)]
pub struct Instruction {
pub arch: FileArchitecture,
pub bitness: u32,
pub offset: u64,
pub length: u32,
pub iced: iced_x86::Instruction,
}
impl Instruction {
#[must_use]
pub fn new(arch: FileArchitecture, bitness: u32, ins: &DecodedInsn) -> Self {
Self {
arch,
bitness,
offset: ins.offset,
length: ins.length,
iced: ins.iced,
}
}
#[must_use]
pub fn format_mnemonic(&self) -> String {
let mut fmt = capstone_compat_formatter();
let mut out = String::new();
fmt.format_mnemonic(&self.iced, &mut out);
out
}
#[must_use]
pub fn format_operands(&self) -> Option<String> {
if self.iced.op_count() == 0 {
return None;
}
let mut fmt = capstone_compat_formatter();
let mut out = String::new();
fmt.format_all_operands(&self.iced, &mut out);
Some(out)
}
pub fn bytes_in<'b>(&self, binary_info: &'b BinaryInfo<'_>) -> Result<&'b [u8]> {
binary_info.bytes_at(self.offset, self.length)
}
pub fn bytes_hex(&self, binary_info: &BinaryInfo<'_>) -> Result<String> {
Ok(hex::encode(self.bytes_in(binary_info)?))
}
#[must_use]
pub fn mnemonic_enum(&self) -> Mnemonic {
self.iced.mnemonic()
}
#[must_use]
pub fn code(&self) -> iced_x86::Code {
self.iced.code()
}
#[must_use]
pub fn op_count(&self) -> u32 {
self.iced.op_count()
}
#[must_use]
pub fn op_kind(&self, i: u32) -> OpKind {
self.iced.op_kind(i)
}
#[must_use]
pub fn op_register(&self, i: u32) -> Register {
self.iced.op_register(i)
}
#[must_use]
pub fn memory_base(&self) -> Register {
self.iced.memory_base()
}
#[must_use]
pub fn memory_index(&self) -> Register {
self.iced.memory_index()
}
#[must_use]
pub fn memory_displacement64(&self) -> u64 {
self.iced.memory_displacement64()
}
#[must_use]
pub fn memory_segment(&self) -> Register {
self.iced.memory_segment()
}
#[must_use]
pub fn near_branch_target(&self) -> u64 {
self.iced.near_branch_target()
}
#[must_use]
pub fn flow_control(&self) -> FlowControl {
self.iced.flow_control()
}
#[must_use]
pub fn is_call(&self) -> bool {
matches!(
self.iced.flow_control(),
FlowControl::Call | FlowControl::IndirectCall
)
}
#[must_use]
pub fn is_jmp(&self) -> bool {
matches!(
self.iced.flow_control(),
FlowControl::UnconditionalBranch | FlowControl::IndirectBranch
)
}
#[must_use]
pub fn is_conditional_jmp(&self) -> bool {
matches!(self.iced.flow_control(), FlowControl::ConditionalBranch)
}
#[must_use]
pub fn is_ret(&self) -> bool {
matches!(self.iced.flow_control(), FlowControl::Return)
}
pub fn get_printable_len(&self) -> Result<u64> {
if self.iced.op_count() != 2 {
return Ok(0);
}
let (chars, ascii_len, utf16_len): (Vec<u8>, u64, u64) = match self.iced.op_kind(1) {
OpKind::Immediate8 => (vec![self.iced.immediate8()], 1, 0),
OpKind::Immediate16 => (self.iced.immediate16().to_le_bytes().to_vec(), 2, 1),
OpKind::Immediate32 => (self.iced.immediate32().to_le_bytes().to_vec(), 4, 2),
OpKind::Immediate64 => (self.iced.immediate64().to_le_bytes().to_vec(), 8, 4),
_ => return Ok(0),
};
if is_printable_ascii(&chars)? {
return Ok(ascii_len);
}
if utf16_len > 0 && is_printable_utf16le(&chars)? {
return Ok(utf16_len);
}
Ok(0)
}
pub fn get_data_refs(&self, report: &DisassemblyReport) -> Result<Vec<u64>> {
if !matches!(
self.iced.flow_control(),
FlowControl::Next | FlowControl::Exception
) {
return Ok(vec![]);
}
if matches!(
self.iced.mnemonic(),
Mnemonic::Cmp
| Mnemonic::Cmpsb
| Mnemonic::Cmpsw
| Mnemonic::Cmpsd
| Mnemonic::Cmpsq
| Mnemonic::Test
) {
return Ok(vec![]);
}
let mut res = Vec::new();
for i in 0..self.iced.op_count() {
let value: u64 = match self.iced.op_kind(i) {
OpKind::Immediate8 => self.iced.immediate8() as u64,
OpKind::Immediate16 => self.iced.immediate16() as u64,
OpKind::Immediate32 => self.iced.immediate32() as u64,
OpKind::Immediate64 => self.iced.immediate64(),
OpKind::Memory => self.iced.memory_displacement64(),
_ => 0,
};
if value != 0 && report.is_addr_within_memory_image(&value)? {
res.push(value);
}
}
Ok(res)
}
}
#[derive(Debug, Clone)]
pub struct Function {
pub arch: crate::FileArchitecture,
pub format: crate::FileFormat,
pub bitness: u32,
pub offset: u64,
blocks: HashMap<u64, Vec<Instruction>>,
pub apirefs: HashMap<u64, (Option<String>, Option<String>)>,
pub blockrefs: HashMap<u64, Vec<u64>>,
pub inrefs: Vec<u64>,
pub outrefs: HashMap<u64, Vec<u64>>,
pub binweight: u32,
characteristics: String,
confidence: f32,
function_name: String,
tfidf: f32,
pub is_exported: bool,
pub stringrefs: Vec<u64>,
}
impl Function {
pub fn new(disassembly: &DisassemblyResult, function_offset: &u64) -> Result<Function> {
let f =
Function {
arch: disassembly.binary_info.file_architecture,
format: disassembly.binary_info.file_format,
bitness: disassembly.binary_info.bitness,
offset: *function_offset,
blocks: Function::parse_blocks(
disassembly,
&disassembly.get_blocks_as_decoded(function_offset)?,
)?,
apirefs: disassembly.get_api_refs(function_offset)?,
blockrefs: disassembly.get_block_refs(function_offset)?,
inrefs: disassembly.get_in_refs(function_offset)?,
outrefs: disassembly.get_out_refs(function_offset)?,
binweight: 0,
characteristics: if disassembly.candidates.contains_key(function_offset) {
disassembly.candidates[function_offset].get_characteristics()?
} else {
"-----------".to_string()
},
confidence: if disassembly.candidates.contains_key(function_offset) {
disassembly.candidates[function_offset].get_confidence()?
} else {
0.0
},
function_name: match disassembly.function_symbols.get(function_offset) {
Some(s) => s.clone(),
_ => String::new(),
},
tfidf: if disassembly.candidates.contains_key(function_offset) {
disassembly.candidates[function_offset].get_tfidf()?
} else {
0.0
},
is_exported: {
let base = disassembly.binary_info.base_addr;
disassembly.binary_info.exports.iter().any(|(_n, rva, _f)| {
base.checked_add(*rva as u64) == Some(*function_offset)
})
},
stringrefs: Vec::new(),
};
let mut f = f;
for block in f.blocks.values() {
for ins in block {
if ins.get_printable_len().unwrap_or(0) > 0 {
f.stringrefs.push(ins.offset);
}
}
}
f.stringrefs.sort_unstable();
Ok(f)
}
fn parse_blocks(
disassembly: &DisassemblyResult,
block_dict: &HashMap<u64, Vec<DecodedInsn>>,
) -> Result<HashMap<u64, Vec<Instruction>>> {
let mut blocks = HashMap::with_capacity(block_dict.len());
for (offset, block) in block_dict {
let mut instructions = Vec::with_capacity(block.len());
for ins in block {
instructions.push(Instruction::new(
disassembly.binary_info.file_architecture,
disassembly.binary_info.bitness,
ins,
));
}
blocks.insert(*offset, instructions);
}
Ok(blocks)
}
pub fn get_blocks(&self) -> Result<&HashMap<u64, Vec<Instruction>>> {
Ok(&self.blocks)
}
pub fn get_instructions(&self) -> Result<Vec<&Instruction>> {
let mut res = vec![];
for b in self.blocks.values() {
for i in b {
res.push(i);
}
}
Ok(res)
}
pub fn get_num_instructions(&self) -> Result<usize> {
Ok(self.blocks.values().map(Vec::len).sum())
}
pub fn get_num_outrefs(&self) -> Result<usize> {
Ok(self.outrefs.values().map(Vec::len).sum())
}
#[must_use]
pub fn function_name(&self) -> &str {
&self.function_name
}
pub fn is_api_thunk(&self) -> Result<bool> {
if self.get_num_instructions()? != 1 {
return Ok(false);
}
let first_ins = &self.blocks[&self.offset][0];
if !matches!(first_ins.mnemonic_enum(), Mnemonic::Jmp | Mnemonic::Call) {
return Ok(false);
}
if self.apirefs.is_empty() {
return Ok(false);
}
Ok(true)
}
#[must_use]
pub fn dominator_tree(&self) -> HashMap<u64, u64> {
use std::collections::BTreeSet;
let entry = self.offset;
let all_blocks: Vec<u64> = self.blocks.keys().copied().collect();
if !self.blocks.contains_key(&entry) || all_blocks.len() <= 1 {
return HashMap::new();
}
let mut preds: HashMap<u64, Vec<u64>> = HashMap::with_capacity(all_blocks.len());
for b in &all_blocks {
preds.insert(*b, Vec::new());
}
for (src, dsts) in &self.blockrefs {
for d in dsts {
if let Some(p) = preds.get_mut(d)
&& !p.contains(src)
{
p.push(*src);
}
}
}
let all_set: BTreeSet<u64> = all_blocks.iter().copied().collect();
let mut dom: HashMap<u64, BTreeSet<u64>> = HashMap::with_capacity(all_blocks.len());
let mut entry_only = BTreeSet::new();
entry_only.insert(entry);
dom.insert(entry, entry_only);
for b in &all_blocks {
if *b != entry {
dom.insert(*b, all_set.clone());
}
}
let mut changed = true;
while changed {
changed = false;
for b in &all_blocks {
if *b == entry {
continue;
}
let bp = match preds.get(b) {
Some(p) if !p.is_empty() => p,
_ => continue,
};
let mut new_dom: Option<BTreeSet<u64>> = None;
for p in bp {
if let Some(dp) = dom.get(p) {
match new_dom {
None => new_dom = Some(dp.clone()),
Some(ref mut nd) => {
*nd = nd.intersection(dp).copied().collect();
}
}
}
}
let mut nd = new_dom.unwrap_or_default();
nd.insert(*b);
if dom[b] != nd {
dom.insert(*b, nd);
changed = true;
}
}
}
let mut idom = HashMap::with_capacity(all_blocks.len().saturating_sub(1));
for b in &all_blocks {
if *b == entry {
continue;
}
if dom[b] == all_set {
continue;
}
let mut best: Option<(u64, usize)> = None;
for c in &dom[b] {
if c == b {
continue;
}
let size = dom.get(c).map_or(0, |s| s.len());
if best.is_none_or(|(_, s)| size > s) {
best = Some((*c, size));
}
}
if let Some((c, _)) = best {
idom.insert(*b, c);
}
}
idom
}
#[must_use]
pub fn nesting_depth(&self) -> HashMap<u64, u32> {
use std::collections::HashSet;
let idom = self.dominator_tree();
let entry = self.offset;
let all_blocks: HashSet<u64> = self.blocks.keys().copied().collect();
let mut depth: HashMap<u64, u32> = all_blocks.iter().map(|b| (*b, 0u32)).collect();
let dominates = |dominator: u64, block: u64| -> bool {
if dominator == block {
return true;
}
let mut cur = block;
for _ in 0..self.blocks.len() {
let Some(&parent) = idom.get(&cur) else {
return dominator == entry && cur == entry;
};
if parent == dominator {
return true;
}
if parent == cur {
return false;
}
cur = parent;
}
false
};
let mut back_edges: Vec<(u64, u64)> = Vec::new();
for (src, dsts) in &self.blockrefs {
if !all_blocks.contains(src) {
continue;
}
for d in dsts {
if all_blocks.contains(d) && dominates(*d, *src) {
back_edges.push((*src, *d));
}
}
}
if back_edges.is_empty() {
return depth;
}
let mut preds: HashMap<u64, Vec<u64>> = HashMap::with_capacity(all_blocks.len());
for b in &all_blocks {
preds.insert(*b, Vec::new());
}
for (src, dsts) in &self.blockrefs {
for d in dsts {
if let Some(p) = preds.get_mut(d) {
p.push(*src);
}
}
}
for (s, h) in back_edges {
let mut loop_blocks: HashSet<u64> = HashSet::new();
loop_blocks.insert(h);
if s != h {
let mut stack = vec![s];
loop_blocks.insert(s);
while let Some(b) = stack.pop() {
if let Some(bp) = preds.get(&b) {
for p in bp {
if loop_blocks.insert(*p) {
stack.push(*p);
}
}
}
}
}
for b in loop_blocks {
if let Some(d) = depth.get_mut(&b) {
*d += 1;
}
}
}
depth
}
#[must_use]
pub fn pic_hash(&self) -> u64 {
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
let mut block_offsets: Vec<u64> = self.blocks.keys().copied().collect();
block_offsets.sort_unstable();
let mut buf = Vec::with_capacity(32);
for off in block_offsets {
let block = &self.blocks[&off];
for ins in block {
buf.clear();
Self::pic_signature_into(&ins.iced, &mut buf);
hasher.update(&buf);
}
}
let out = hasher.finalize();
let mut bytes = [0u8; 8];
bytes.copy_from_slice(&out[..8]);
u64::from_le_bytes(bytes)
}
#[must_use]
pub fn opcode_hash(&self) -> u64 {
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
let mut block_offsets: Vec<u64> = self.blocks.keys().copied().collect();
block_offsets.sort_unstable();
for off in block_offsets {
let block = &self.blocks[&off];
for ins in block {
let m = ins.iced.mnemonic() as u32;
hasher.update(m.to_le_bytes());
}
}
let out = hasher.finalize();
let mut bytes = [0u8; 8];
bytes.copy_from_slice(&out[..8]);
u64::from_le_bytes(bytes)
}
fn pic_signature_into(iced: &iced_x86::Instruction, out: &mut Vec<u8>) {
out.extend_from_slice(&(iced.code() as u32).to_le_bytes());
let count = iced.op_count();
out.push(count as u8);
for i in 0..count {
let kind = iced.op_kind(i);
out.push(kind as u8);
match kind {
OpKind::Register => {
out.extend_from_slice(&(iced.op_register(i) as u16).to_le_bytes());
}
OpKind::Memory => {
out.extend_from_slice(&(iced.memory_base() as u16).to_le_bytes());
out.extend_from_slice(&(iced.memory_index() as u16).to_le_bytes());
out.push(iced.memory_index_scale() as u8);
}
OpKind::NearBranch16 | OpKind::NearBranch32 | OpKind::NearBranch64 => {
}
OpKind::Immediate8 => out.push(iced.immediate8()),
OpKind::Immediate16 => out.extend_from_slice(&iced.immediate16().to_le_bytes()),
OpKind::Immediate32 => out.extend_from_slice(&iced.immediate32().to_le_bytes()),
OpKind::Immediate64 => out.extend_from_slice(&iced.immediate64().to_le_bytes()),
OpKind::Immediate8to16
| OpKind::Immediate8to32
| OpKind::Immediate8to64
| OpKind::Immediate32to64 => {
out.extend_from_slice(&iced.immediate(i).to_le_bytes());
}
_ => {}
}
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct DecodedInsn {
pub offset: u64,
pub length: u32,
pub iced: iced_x86::Instruction,
}
impl DecodedInsn {
pub fn bytes_in<'b>(&self, binary_info: &'b BinaryInfo<'_>) -> Result<&'b [u8]> {
binary_info.bytes_at(self.offset, self.length)
}
}
pub fn is_printable_ascii(chars: &[u8]) -> Result<bool> {
for c in chars {
if c >= &127 || !b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+, -./:;<=>?@[\\]^_`{|}~ ".contains(c){
return Ok(false)
}
}
Ok(true)
}
pub fn is_printable_utf16le(chars: &[u8]) -> Result<bool> {
let mut i = 1;
let mut u = vec![];
while i < chars.len() {
if i % 2 != 0 && chars[i] != 0x00 {
return Ok(false);
} else if i % 2 == 0 {
u.push(chars[i]);
}
i += 1;
}
is_printable_ascii(&u)
}