use anyhow::Result;
use log::debug;
// because we use zydis data structures throughout our API
// make this dependency public.
// this way, our users can do `use lancelot::analysis::dis::zydis`
// and not have any version conflicts.
pub use zydis;
use crate::{
arch::Arch,
module::{Module, Permissions},
util, VA,
};
pub fn get_disassembler(module: &Module) -> Result<zydis::Decoder> {
let mut decoder = match module.arch {
Arch::X64 => zydis::Decoder::new(zydis::MachineMode::LONG_64, zydis::AddressWidth::_64)?,
Arch::X32 => zydis::Decoder::new(zydis::MachineMode::LEGACY_32, zydis::AddressWidth::_32)?,
};
// modes described here: https://github.com/zyantific/zydis/blob/5af06d64432aaa3f6af3cd3e120eefa061b790ab/include/Zydis/Decoder.h#L55
//
// performance, captured empirically:
// - minimal mode - 8.7M instructions/second
// - full mode - 4.5M instructions/second
decoder.enable_mode(zydis::DecoderMode::MINIMAL, false)?;
decoder.enable_mode(zydis::DecoderMode::KNC, false)?;
decoder.enable_mode(zydis::DecoderMode::MPX, false)?;
decoder.enable_mode(zydis::DecoderMode::CET, false)?;
decoder.enable_mode(zydis::DecoderMode::LZCNT, false)?;
decoder.enable_mode(zydis::DecoderMode::TZCNT, false)?;
decoder.enable_mode(zydis::DecoderMode::WBNOINVD, false)?;
decoder.enable_mode(zydis::DecoderMode::CLDEMOTE, false)?;
Ok(decoder)
}
pub fn linear_disassemble<'a>(
decoder: &'a zydis::Decoder,
buf: &'a [u8],
) -> Box<dyn Iterator<Item = (usize, zydis::Result<Option<zydis::DecodedInstruction>>)> + 'a> {
let mut offset = 0usize;
let mut insn_count = 0usize;
let iter = std::iter::from_fn(move || {
if offset >= buf.len() {
debug!("decoded {} instructions", insn_count);
return None;
}
let insn_offset = offset;
let insn_buf = &buf[insn_offset..];
let insn = decoder.decode(insn_buf);
if let Ok(Some(insn)) = &insn {
// see discussion of linear vs thorough disassemble in this module doc for
// call_targets. thorough is 4x more expensive, with limited
// results.
// linear disassembly:
offset += insn.length as usize;
// thorough disassembly:
// offset += 1;
insn_count += 1;
} else {
offset += 1;
}
Some((insn_offset, insn))
});
Box::new(iter)
}
/// Does the given instruction have a fallthrough flow?
pub fn does_insn_fallthrough(insn: &zydis::DecodedInstruction) -> bool {
match insn.mnemonic {
zydis::Mnemonic::JMP => false,
zydis::Mnemonic::RET => false,
zydis::Mnemonic::IRET => false,
zydis::Mnemonic::IRETD => false,
zydis::Mnemonic::IRETQ => false,
// we consider an INT3 (breakpoint) to not flow through.
// we rely on this to deal with non-ret functions, as some
// compilers may insert a CC byte following the call.
//
// really, we'd want to do a real non-ret analysis.
// but thats still a TODO.
//
// see aadtb.dll:0x180001940 for an example.
zydis::Mnemonic::INT3 => false,
zydis::Mnemonic::INT => {
match insn.operands[0].imm.value {
// handled by nt!KiFastFailDispatch on Win8+
// see: https://doar-e.github.io/blog/2013/10/12/having-a-look-at-the-windows-userkernel-exceptions-dispatcher/
0x29 => false,
// handled by nt!KiRaiseAssertion
// see: http://www.osronline.com/article.cfm%5Earticle=474.htm
0x2C => false,
// probably indicates bad code,
// but this hasn't be thoroughly vetted yet.
_ => {
debug!("{:#x?}", insn);
true
}
}
}
// TODO: call may not fallthrough if function is noret.
// will need another pass to clean this up.
zydis::Mnemonic::CALL => true,
_ => true,
}
}
fn print_op(_op: &zydis::DecodedOperand) {
/*
if cfg!(feature = "dump_serde") {
use serde_json;
let s = serde_json::to_string(op).unwrap();
println!("op: {}", s);
} else {
*/
println!("op: TODO(print_op)");
//}
}
/// zydis supports implicit operands,
/// which we don't currently use in our analysis.
/// so, fetch the first explicit operand to an instruction.
pub fn get_first_operand(insn: &zydis::DecodedInstruction) -> Option<&zydis::DecodedOperand> {
insn.operands
.iter()
.find(|op| op.visibility == zydis::OperandVisibility::EXPLICIT)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Target {
// if direct, the address of the destination.
Direct(VA),
// if indirect, the VA is the address of the pointer.
// e.g. 0x401000 in call [0x401000]
// this may very well be zero or other junk.
// this value might be useful to lookup against:
// - imports
// - jump tables
Indirect(VA),
}
// for a memory operand, like `mov eax, [0x401000]`
// fetch the pointer, rather than the dest,
// so like `0x401000`.
#[allow(clippy::if_same_then_else)]
pub fn get_memory_operand_ptr(
va: VA,
insn: &zydis::DecodedInstruction,
op: &zydis::DecodedOperand,
) -> Result<Option<VA>> {
if op.mem.base == zydis::Register::NONE
&& op.mem.index == zydis::Register::NONE
&& op.mem.scale == 0
&& op.mem.disp.has_displacement
{
// the operand is a deref of a memory address.
// for example: JMP [0x0]
// this means: read the ptr from 0x0, and then jump to it.
//
// we'll have to make some assumptions here:
// - the ptr doesn't change (can detect via mem segment perms)
// - the ptr is fixed up (TODO)
//
// see doctest: [test simple memory ptr operand]()
if op.mem.disp.displacement < 0 {
Ok(None)
} else {
Ok(Some(op.mem.disp.displacement as VA))
}
} else if op.mem.base == zydis::Register::RIP
// only valid on x64
&& op.mem.index == zydis::Register::NONE
&& op.mem.scale == 0
&& op.mem.disp.has_displacement
{
// this is RIP-relative addressing.
// it works like a relative immediate,
// that is: dst = *(rva + displacement + instruction len)
match util::va_add_signed(va + insn.length as u64, op.mem.disp.displacement) {
None => Ok(None),
Some(ptr) => Ok(Some(ptr)),
}
} else if op.mem.base != zydis::Register::NONE {
// this is something like `CALL [eax+4]`
// can't resolve without emulation
// TODO: add test
Ok(None)
} else if op.mem.scale > 0 {
// this is something like `JMP [0x1000+eax*4]` (32-bit)
Ok(None)
} else {
println!("{va:#x}: get mem op xref");
print_op(op);
panic!("not supported");
}
}
// for a memory operand, like `mov eax, [0x401000]`
// fetch what the pointer points to,
// which is *not* `0x401000` in this example.
#[allow(clippy::if_same_then_else)]
pub fn get_memory_operand_xref(
module: &Module,
va: VA,
insn: &zydis::DecodedInstruction,
op: &zydis::DecodedOperand,
) -> Result<Option<VA>> {
if let Some(ptr) = get_memory_operand_ptr(va, insn, op)? {
let dst = match module.read_va_at_va(ptr) {
Ok(dst) => dst,
Err(_) => return Ok(None),
};
// must be mapped
if module.probe_va(dst, Permissions::RWX) {
// this is the happy path!
Ok(Some(dst))
} else {
// invalid address
Ok(None)
}
} else {
Ok(None)
}
}
pub fn get_pointer_operand_xref(op: &zydis::DecodedOperand) -> Result<Option<VA>> {
// ref: https://c9x.me/x86/html/file_module_x86_id_147.html
//
// > Far Jumps in Real-Address or Virtual-8086 Mode.
// > When executing a far jump in real address or virtual-8086 mode,
// > the processor jumps to the code segment and offset specified with the
// > target operand. Here the target operand specifies an absolute far
// > address either directly with a pointer (ptr16:16 or ptr16:32) or
// > indirectly with a memory location (m16:16 or m16:32). With the
// > pointer method, the segment and address of the called procedure is
// > encoded in the instruction, using a 4-byte (16-bit operand size) or
// > 6-byte (32-bit operand size) far address immediate.
// TODO: do something intelligent with the segment.
Ok(Some(op.ptr.offset as u64))
}
pub fn get_immediate_operand_xref(
module: &Module,
va: VA,
insn: &zydis::DecodedInstruction,
op: &zydis::DecodedOperand,
) -> Result<Option<VA>> {
if op.imm.is_relative {
// the operand is an immediate constant relative to $PC.
// destination = $pc + immediate + insn.len
//
// see doctest: [test relative immediate operand]()
let imm = if op.imm.is_signed {
util::u64_i64(op.imm.value)
} else {
op.imm.value as i64
};
let dst = match util::va_add_signed(va + insn.length as u64, imm) {
None => return Ok(None),
Some(dst) => dst,
};
// must be mapped
if module.probe_va(dst, Permissions::RWX) {
Ok(Some(dst))
} else {
// invalid address
Ok(None)
}
} else {
// the operand is an immediate absolute address.
let dst = if op.imm.is_signed {
let imm = util::u64_i64(op.imm.value);
if imm < 0 {
// obviously this isn't an address if negative.
return Ok(None);
}
imm as u64
} else {
op.imm.value
};
// must be mapped
if module.probe_va(dst, Permissions::RWX) {
Ok(Some(dst))
} else {
// invalid address
Ok(None)
}
}
}
pub fn get_operand_xref(
module: &Module,
va: VA,
insn: &zydis::DecodedInstruction,
op: &zydis::DecodedOperand,
) -> Result<Option<Target>> {
match op.ty {
// like: .text:0000000180001041 FF 15 D1 78 07 00 call cs:__imp_RtlVirtualUnwind_0
// 0x0000000000001041: call [0x0000000000079980]
zydis::OperandType::MEMORY => match get_memory_operand_ptr(va, insn, op) {
Ok(Some(ptr)) => Ok(Some(Target::Indirect(ptr))),
Ok(None) => Ok(None),
Err(e) => Err(e),
},
// like: EA 33 D2 B9 60 80 40 jmp far ptr 4080h:60B9D233h
// "ptr": {
// "segment": 16512,
// "offset": 1622790707
// },
zydis::OperandType::POINTER => match get_pointer_operand_xref(op) {
Ok(Some(ptr)) => Ok(Some(Target::Indirect(ptr))),
Ok(None) => Ok(None),
Err(e) => Err(e),
},
zydis::OperandType::IMMEDIATE => match get_immediate_operand_xref(module, va, insn, op) {
Ok(Some(va)) => Ok(Some(Target::Direct(va))),
Ok(None) => Ok(None),
Err(e) => Err(e),
},
// like: CALL [rax]
// which cannot be resolved without emulation.
zydis::OperandType::REGISTER => Ok(Some(Target::Indirect(0x0))),
zydis::OperandType::UNUSED => Ok(None),
}
}
#[cfg(test)]
mod tests {
use crate::{analysis::dis::*, rsrc::*, test::*};
#[test]
fn test_get_memory_operand_ptr() {
//```
// .text:00000001800134D4 call cs:KernelBaseGetGlobalData
//```
//
// this should result in a call flow to IAT entry 0x1800773F0
let buf = get_buf(Rsrc::K32);
let pe = crate::loader::pe::PE::from_bytes(&buf).unwrap();
let insn = read_insn(&pe.module, 0x1800134D4);
let op = get_first_operand(&insn).unwrap();
let xref = get_memory_operand_ptr(0x1800134D4, &insn, &op).unwrap();
assert_eq!(xref.is_some(), true);
assert_eq!(xref.unwrap(), 0x1800773F0);
}
#[test]
fn test_get_memory_operand_xref_simple() {
// 0: ff 25 06 00 00 00 +-> jmp DWORD PTR ds:0x6
// 6: 00 00 00 00 +-- dw 0x0
let module = load_shellcode32(b"\xFF\x25\x06\x00\x00\x00\x00\x00\x00\x00");
let insn = read_insn(&module, 0x0);
let op = get_first_operand(&insn).unwrap();
let xref = get_memory_operand_xref(&module, 0x0, &insn, &op).unwrap();
assert_eq!(xref.is_some(), true);
assert_eq!(xref.unwrap(), 0x0);
}
#[test]
fn test_get_memory_operand_xref_rip_relative() {
// FF 15 00 00 00 00 CALL $+5
// 00 00 00 00 00 00 00 00 dq 0x0
let module = load_shellcode64(b"\xFF\x15\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00");
let insn = read_insn(&module, 0x0);
let op = get_first_operand(&insn).unwrap();
let xref = get_memory_operand_xref(&module, 0x0, &insn, &op).unwrap();
assert_eq!(xref.is_some(), true);
assert_eq!(xref.unwrap(), 0x0);
}
#[test]
fn test_get_pointer_operand_xref() {
// this is a far ptr jump from addr 0x0 to itmodule:
// JMP FAR PTR 0:00000000
// [ EA ] [ 00 00 00 00 ] [ 00 00 ]
// opcode ptr segment
let module = load_shellcode32(b"\xEA\x00\x00\x00\x00\x00\x00");
let insn = read_insn(&module, 0x0);
let op = get_first_operand(&insn).unwrap();
let xref = get_pointer_operand_xref(&op).unwrap();
assert_eq!(xref.is_some(), true, "has pointer operand xref");
assert_eq!(xref.unwrap(), 0x0, "correct pointer operand xref");
}
#[test]
fn test_get_immediate_operand_xref() {
// this is a jump from addr 0x0 to itmodule:
// JMP $+0;
let module = load_shellcode32(b"\xEB\xFE");
let insn = read_insn(&module, 0x0);
let op = get_first_operand(&insn).unwrap();
let xref = get_immediate_operand_xref(&module, 0x0, &insn, &op).unwrap();
assert_eq!(xref.is_some(), true, "has immediate operand");
assert_eq!(xref.unwrap(), 0x0, "correct immediate operand");
// this is a jump from addr 0x0 to -1, which is unmapped
// JMP $-1;
let module = load_shellcode32(b"\xEB\xFD");
let insn = read_insn(&module, 0x0);
let op = get_first_operand(&insn).unwrap();
let xref = get_immediate_operand_xref(&module, 0x0, &insn, &op).unwrap();
assert_eq!(xref.is_some(), false, "does not have immediate operand");
}
#[test]
fn test_format_insn() {
use crate::analysis::dis::zydis;
let buf = get_buf(Rsrc::K32);
let pe = crate::loader::pe::PE::from_bytes(&buf).unwrap();
let mut formatter = zydis::Formatter::new(zydis::FormatterStyle::INTEL).unwrap();
struct UserData {
names: std::collections::BTreeMap<VA, String>,
orig_print_address_abs: Option<zydis::Hook>,
}
let mut userdata = Box::new(UserData {
names: Default::default(),
orig_print_address_abs: None,
});
let orig = formatter
.set_print_address_abs(Box::new(
|formatter: &zydis::Formatter,
buf: &mut zydis::FormatterBuffer,
ctx: &mut zydis::FormatterContext,
userdata: Option<&mut dyn core::any::Any>|
-> zydis::Result<()> {
// programming error: userdata must be provided.
// TODO: enforce via types.
let userdata = userdata.expect("no userdata");
// programming error: userdata must be a Box<UserData>.
// TODO: enforce via types.
let userdata = userdata.downcast_ref::<Box<UserData>>().expect("incorrect userdata");
let absolute_address = unsafe {
// safety: the insn and operands come from zydis, so we assume they contain
// valid data.
let insn: &zydis::DecodedInstruction = &*ctx.instruction;
let op: &zydis::DecodedOperand = &*ctx.operand;
insn.calc_absolute_address(ctx.runtime_address, op)
.expect("failed to calculate absolute address")
};
if let Some(name) = userdata.names.get(&absolute_address) {
// name is found in map, use that.
return buf.get_string()?.append(name);
} else {
// name is not found, use original formatter.
// programming error: the original hook must be recorded.
// TODO: enforce via types.
let orig = userdata.orig_print_address_abs.as_ref().expect("no original hook");
if let zydis::Hook::PrintAddressAbs(Some(f)) = orig {
// safety: zydis::Formatter <-> zydis::ffi::ZydisFormatter is safe according to
// here: https://docs.rs/zydis/3.1.2/src/zydis/formatter.rs.html#306
let status =
unsafe { f(formatter as *const _ as *const zydis::ffi::ZydisFormatter, buf, ctx) };
if status.is_error() {
return Err(status);
} else {
return Ok(());
}
} else {
// I'm not sure how this could ever be the case, as zydis initializes the hook
// with a default. I suppose if you explicitly set
// the callback to NULL/None? Which we don't do here.
panic!("unexpected original hook");
}
}
},
))
.unwrap();
userdata.orig_print_address_abs = Some(orig);
// format a global address (KernelBaseGetGlobalData).
//
// call to KernelBaseGetGlobalData:
// ```
// .text:00000001800134D0 48 83 EC 48 sub rsp, 48h
// .text:00000001800134D4 FF 15 16 3F 06 00 call cs:KernelBaseGetGlobalData ; .idata:00000001800773F0
// .text:00000001800134DA 0F 10 50 40 movups xmm2, xmmword ptr [rax+40h]
// ```
userdata
.names
.insert(0x1800773F0, String::from("KernelBaseGetGlobalData"));
let mut buffer = [0u8; 200];
let mut buffer = zydis::OutputBuffer::new(&mut buffer[..]);
let insn = read_insn(&pe.module, 0x1800134D4);
formatter
.format_instruction(&insn, &mut buffer, Some(0x1800134D4), Some(&mut userdata))
.unwrap();
assert_eq!(buffer.as_str().unwrap(), "call [KernelBaseGetGlobalData]");
// but fall-back to the original formatter if symbol is not present.
//
// call to BaseFormatObjectAttributes:
// ```
// .text:000000018001995E 45 33 C0 xor r8d, r8d
// .text:0000000180019961 FF 15 D1 D7 05 00 call cs:BaseFormatObjectAttributes_0 ; .idata:0000000180077138
// .text:0000000180019967 85 C0 test eax, eax
// ```
let mut buffer = [0u8; 200];
let mut buffer = zydis::OutputBuffer::new(&mut buffer[..]);
let insn = read_insn(&pe.module, 0x180019961);
formatter
.format_instruction(&insn, &mut buffer, Some(0x180019961), Some(&mut userdata))
.unwrap();
assert_eq!(buffer.as_str().unwrap(), "call [0x0000000180077138]");
}
}