lancelot 0.8.6

binary analysis framework for x32/x64 PE files
use std::{cmp::min, fmt::Write};

use anyhow::Result;

use crate::{analysis::dis::zydis, arch::Arch, aspace::AddressSpace, VA};

use super::Workspace;

#[derive(Default, Clone)]
struct OriginalHooks {
    print_address_abs: Option<zydis::Hook>,
    print_mnemonic:    Option<zydis::Hook>,
    pre_instruction:   Option<zydis::Hook>,

#[derive(Clone, Copy)]
pub struct FormatterOptions {
    colors:          bool,
    /// show up to the given number of bytes for each instruction,
    /// or ... (ellipsis) when truncated.
    /// use zero to disable this column.
    /// unit: bytes
    /// default: 8
    /// max: 16
    hex_column_size: usize,

    /// pad mnemonics to at least the given width.
    /// won't truncate longer instructions, just wont be nicely aligned.
    /// you probably don't need to touch this unless you want to.
    /// unit: characters
    /// default: 7
    mnemonic_width: usize,

struct UserData<'a> {
    ws:      &'a dyn Workspace,
    orig:    OriginalHooks,
    options: FormatterOptions,

pub struct FormatterBuilder {
    options: FormatterOptions,

impl FormatterBuilder {
    pub fn build(self) -> Formatter {

    pub fn with_colors(mut self, colors: bool) -> FormatterBuilder {
        self.options.colors = colors;

    pub fn with_hex_column_size(mut self, hex_column_size: usize) -> FormatterBuilder {
        // 0x10: max instruction length
        self.options.hex_column_size = min(hex_column_size, 0x10);

pub struct Formatter {
    options: FormatterOptions,
    inner:   zydis::Formatter,
    orig:    OriginalHooks,

pub const TOKEN_USER_SYMBOLNAME: zydis::Token = zydis::Token(zydis::TOKEN_USER.0 + 1);
pub const TOKEN_USER_HEX: zydis::Token = zydis::Token(zydis::TOKEN_USER.0 + 2);

impl Formatter {
    // default theme
    // TODO: move this to a struct that can be configured
    const COLOR_ADDRESS_ABS: ansi_term::Color = ansi_term::Color::Blue;
    const COLOR_ADDRESS_REL: ansi_term::Color = ansi_term::Color::Blue;
    const COLOR_DECORATOR: ansi_term::Color = Formatter::GREY;
    const COLOR_DELIMITER: ansi_term::Color = Formatter::GREY;
    const COLOR_DISPLACEMENT: ansi_term::Color = ansi_term::Color::Blue;
    const COLOR_HEX: ansi_term::Color = ansi_term::Color::Cyan;
    const COLOR_IMMEDIATE: ansi_term::Color = ansi_term::Color::Blue;
    const COLOR_INVALID: ansi_term::Color = ansi_term::Color::Red;
    const COLOR_MNEMONIC: ansi_term::Color = ansi_term::Color::Green;
    const COLOR_PARENTHESIS_CLOSE: ansi_term::Color = Formatter::GREY;
    const COLOR_PARENTHESIS_OPEN: ansi_term::Color = Formatter::GREY;
    const COLOR_PREFIX: ansi_term::Color = Formatter::GREY;
    const COLOR_REGISTER: ansi_term::Color = ansi_term::Color::Yellow;
    const COLOR_SYMBOL: ansi_term::Color = Formatter::GREY;
    const COLOR_SYMBOLNAME: ansi_term::Color = ansi_term::Color::Purple;
    const COLOR_TYPECAST: ansi_term::Color = Formatter::GREY;
    const COLOR_USER: ansi_term::Color = Formatter::GREY;
    const COLOR_WHITESPACE: ansi_term::Color = ansi_term::Color::Black;
    const GREY: ansi_term::Color = ansi_term::Color::Fixed(242);

    pub fn new() -> Formatter {
        FormatterBuilder {
            options: FormatterOptions {
                colors:          true,
                hex_column_size: 7,
                mnemonic_width:  7,

    pub fn with_options() -> FormatterBuilder {
        FormatterBuilder {
            options: FormatterOptions {
                colors:          true,
                hex_column_size: 7,
                mnemonic_width:  7,

    pub fn from_options(options: FormatterOptions) -> Formatter {
        let mut inner = zydis::Formatter::new(zydis::FormatterStyle::INTEL).unwrap();

        let mut orig: OriginalHooks = Default::default();

        let f = inner
                |_formatter: &zydis::Formatter,
                 buf: &mut zydis::FormatterBuffer,
                 ctx: &mut zydis::FormatterContext,
                 userdata: Option<&mut dyn core::any::Any>|
                 -> zydis::Result<()> {
                    // programming error: userdata must be provided. this is guaranteed within
                    // Formatter.
                    let userdata = userdata.expect("no userdata");

                    // programming error: userdata must be a Box<UserData>. this is guaranteed
                    // within Formatter.
                    let userdata = userdata.downcast_ref::<UserData>().expect("incorrect userdata");

                    let va = ctx.runtime_address;

                    if let Some(sec) = userdata
                        .find(|&sec| sec.virtual_range.contains(&va))
                    } else {

                    match {
                        Arch::X32 => {
                        Arch::X64 => {

                    buf.get_string()?.append("  ")?;

                    if userdata.options.hex_column_size > 0 {
                        let mut insn_buf = [0u8; 0x10];
                        let insn_len = (unsafe { &*ctx.instruction }).length as usize;
                        let col_count = userdata.options.hex_column_size;
                            .read_into(va, &mut insn_buf[..insn_len])
                            .expect("failed to read instruction");

                        let mut hex = String::new();
                        for (i, b) in insn_buf.iter().enumerate().take(col_count) {
                            if insn_len > col_count && i == col_count - 1 {
                                // instruction is larger than reserved space,
                                // and this is the final spot for hex,
                                // which is 3 characters wide,
                                // so show "..." instead of the last byte.
                            } else if i < insn_len {
                                // most common case: bytes of the instruction

                                if i != 0 {
                                    hex.write_str(" ").unwrap();

                            } else {
                                // common case, insn is smaller than reserved space,
                                // so fill with spaces.
                                hex.write_str("   ").unwrap();


                        buf.get_string()?.append("  ")?;

        orig.pre_instruction = Some(f);

        let f = inner
                |formatter: &zydis::Formatter,
                 buf: &mut zydis::FormatterBuffer,
                 ctx: &mut zydis::FormatterContext,
                 userdata: Option<&mut dyn core::any::Any>|
                 -> zydis::Result<()> {
                    // programming error: userdata must be provided. this is guaranteed within
                    // Formatter.
                    let userdata = userdata.expect("no userdata");

                    // programming error: userdata must be a Box<UserData>. this is guaranteed
                    // within Formatter.
                    let userdata = userdata.downcast_ref::<UserData>().expect("incorrect userdata");

                    let absolute_address = unsafe {
                        // safety: the insn and operands come from zydis, so we assume they contain
                        // valid data.
                        let insn: &zydis::DecodedInstruction = &*ctx.instruction;
                        let op: &zydis::DecodedOperand = &*ctx.operand;
                        insn.calc_absolute_address(ctx.runtime_address, op)
                            .expect("failed to calculate absolute address")

                    if let Some(name) = {
                        // name is found in map, use that.
                        return buf.get_string()?.append(name);
                    } else {
                        // name is not found, use original formatter.

                        // programming error: the original hook must be recorded. this is guaranteed
                        // within Formatter.
                        let orig = userdata.orig.print_address_abs.as_ref().expect("no original hook");

                        if let zydis::Hook::PrintAddressAbs(Some(f)) = orig {
                            // safety: zydis::Formatter <-> zydis::ffi::ZydisFormatter is safe according to
                            // here:
                            let status =
                                unsafe { f(formatter as *const _ as *const zydis::ffi::ZydisFormatter, buf, ctx) };
                            if status.is_error() {
                            } else {
                        } else {
                            // I'm not sure how this could ever be the case, as zydis initializes the hook
                            // with a default. I suppose if you explicitly set
                            // the callback to NULL/None? Which we don't do here.
                            panic!("unexpected original hook");
        orig.print_address_abs = Some(f);

        let f = inner
                |formatter: &zydis::Formatter,
                 buf: &mut zydis::FormatterBuffer,
                 ctx: &mut zydis::FormatterContext,
                 userdata: Option<&mut dyn core::any::Any>|
                 -> zydis::Result<()> {
                    // programming error: userdata must be provided. this is guaranteed within
                    // Formatter.
                    let userdata = userdata.expect("no userdata");

                    // programming error: userdata must be a Box<UserData>. this is guaranteed
                    // within Formatter.
                    let userdata = userdata.downcast_ref::<UserData>().expect("incorrect userdata");

                    let orig = userdata.orig.print_mnemonic.as_ref().expect("no original hook");

                    if let zydis::Hook::PrintMnemonic(Some(f)) = orig {
                        // safety: zydis::Formatter <-> zydis::ffi::ZydisFormatter is safe according to
                        // here:
                        let status = unsafe { f(formatter as *const _ as *const zydis::ffi::ZydisFormatter, buf, ctx) };
                        if status.is_error() {
                            return Err(status);

                        let (_, mnemonic) = buf.get_token()?.get_value()?;

                        if mnemonic.len() < userdata.options.mnemonic_width {
                            let mut padding = String::new();

                            for _ in 0..userdata.options.mnemonic_width - mnemonic.len() {
                                padding.write_str(" ").unwrap();


                    } else {
                        // I'm not sure how this could ever be the case, as zydis initializes the hook
                        // with a default. I suppose if you explicitly set
                        // the callback to NULL/None? Which we don't do here.
                        panic!("unexpected original hook");
        orig.print_mnemonic = Some(f);

        Formatter { options, inner, orig }

    fn get_token_color(token: zydis::Token) -> ansi_term::Color {
        match token {
            zydis::TOKEN_INVALID => Formatter::COLOR_INVALID,
            zydis::TOKEN_WHITESPACE => Formatter::COLOR_WHITESPACE,
            zydis::TOKEN_DELIMITER => Formatter::COLOR_DELIMITER,
            zydis::TOKEN_PREFIX => Formatter::COLOR_PREFIX,
            zydis::TOKEN_MNEMONIC => Formatter::COLOR_MNEMONIC,
            zydis::TOKEN_REGISTER => Formatter::COLOR_REGISTER,
            zydis::TOKEN_ADDRESS_ABS => Formatter::COLOR_ADDRESS_ABS,
            zydis::TOKEN_ADDRESS_REL => Formatter::COLOR_ADDRESS_REL,
            zydis::TOKEN_DISPLACEMENT => Formatter::COLOR_DISPLACEMENT,
            zydis::TOKEN_IMMEDIATE => Formatter::COLOR_IMMEDIATE,
            zydis::TOKEN_TYPECAST => Formatter::COLOR_TYPECAST,
            zydis::TOKEN_DECORATOR => Formatter::COLOR_DECORATOR,
            zydis::TOKEN_SYMBOL => Formatter::COLOR_SYMBOL,
            zydis::TOKEN_USER => Formatter::COLOR_USER,
            TOKEN_USER_HEX => Formatter::COLOR_HEX,
            _ => unimplemented!("token: {}", token),

    fn render_token<T: Write>(&self, o: &mut T, token: zydis::Token, s: &str) -> Result<()> {
        if self.options.colors {
            // force this into a string, or else the formatting control codes will not be written.
            // from the documentation:
            // > If you do want to get at the escape codes, then you can convert the ANSIString 
            // > to a string as you would any other Display value.
            let s = Formatter::get_token_color(token).paint(s).to_string();
        } else {


    pub fn format_instruction(&self, ws: &dyn Workspace, insn: &zydis::DecodedInstruction, va: VA) -> Result<String> {
        let mut buffer = [0u8; 400];

        // we pass our userdata to ZydisFormatterFormatInstruction.
        // but to make it work, we have to play games with the lifetimes:
        // we need to convince the compiler that the userdata pointer lives long enough
        // to by used by the callbacks.
        // we do this by extending the workspace lifetime from '_ to 'static.
        // userdata is passed into ZydisFormatterFormatInstruction,
        // which passes userdata to each of the formatter callbacks.
        // those read strictly from insn/ctx/userdata and write strictly to output
        // buffer. there is no state maintained within these routines.
        // the callbacks won't be invoked beyond the call into FormatInstruction.
        // therefore, i believe its safe to extend the lifetime here to work with zydis.
        //let x = unsafe { std::mem::transmute::<&'_ PEWorkspace, &'static
        // PEWorkspace>(ws) };
        let x = unsafe { std::mem::transmute::<&'_ dyn Workspace, &'static dyn Workspace>(ws) };

        let mut ud = UserData {
            orig:    self.orig.clone(),
            ws:      x,
            options: self.options,

        let mut out = String::new();
        for (token, s) in self
            .tokenize_instruction(insn, &mut buffer, Some(va), Some(&mut ud))?
            self.render_token(&mut out, token, s)?;


impl Default for Formatter {
    fn default() -> Self {

mod tests {
    use super::{super::*, *};
    use crate::rsrc::*;
    use anyhow::Result;

    fn with_colors() -> Result<()> {
        let buf = get_buf(Rsrc::NOP);
        let pe = crate::loader::pe::PE::from_bytes(&buf)?;
        let config = config::empty();

        let ws = PEWorkspace::from_pe(config, pe)?;

        let fmt = Formatter::with_options().with_colors(true).build();

        // ```
        //     .text:00401C4E 000 68 F4 61 40 00          push    offset ModuleName ; "mscoree.dll"
        //     .text:00401C53 004 FF 15 00 60 40 00       call    ds:GetModuleHandleA
        //     .text:00401C59 000 85 C0                   test    eax, eax
        // ```
        let insn = crate::test::read_insn(&, 0x401C53);
        let s = fmt.format_instruction(&ws, &insn, 0x401C53)?;


    fn no_colors() -> Result<()> {
        let buf = get_buf(Rsrc::NOP);
        let pe = crate::loader::pe::PE::from_bytes(&buf)?;
        let config = config::empty();

        let ws = PEWorkspace::from_pe(config, pe)?;

        let fmt = Formatter::with_options()

        // ```
        //     .text:00401C4E 000 68 F4 61 40 00          push    offset ModuleName ; "mscoree.dll"
        //     .text:00401C53 004 FF 15 00 60 40 00       call    ds:GetModuleHandleA
        //     .text:00401C59 000 85 C0                   test    eax, eax
        // ```
        let insn = crate::test::read_insn(&, 0x401C53);
            fmt.format_instruction(&ws, &insn, 0x401C53)?,
            ".text:00401c53  call    [kernel32.dll!GetModuleHandleA]"


    fn hex() -> Result<()> {
        let buf = get_buf(Rsrc::NOP);
        let pe = crate::loader::pe::PE::from_bytes(&buf)?;
        let config = config::empty();

        let ws = PEWorkspace::from_pe(config, pe)?;

        // ```
        //     .text:00401C4E 000 68 F4 61 40 00          push    offset ModuleName ; "mscoree.dll"
        //     .text:00401C53 004 FF 15 00 60 40 00       call    ds:GetModuleHandleA
        //     .text:00401C59 000 85 C0                   test    eax, eax
        // ```
        let insn = crate::test::read_insn(&, 0x401C53);

        let fmt = Formatter::with_options()
            fmt.format_instruction(&ws, &insn, 0x401C53)?,
            ".text:00401c53  call    [kernel32.dll!GetModuleHandleA]"

        let fmt = Formatter::with_options()
            fmt.format_instruction(&ws, &insn, 0x401C53)?,
            ".text:00401c53  FF 15 00 60 40 00     call    [kernel32.dll!GetModuleHandleA]"
