chipi-core 0.9.1

Core library for chipi: parser, IR, and code generation backends for instruction decoder generation
Documentation
//! Binary Ninja Architecture plugin code generation.
//!
//! Generates a self-contained Python Architecture plugin for Binary Ninja
//! from a validated `.chipi` definition and Binary Ninja-specific configuration.

use std::fmt::Write;

use crate::codegen_python::{self, DisplayConfig};
use crate::config::BinjaOptions;
use crate::tree::DecodeNode;
use crate::types::*;

/// ITYPE constant prefix used throughout the generated code.
const ITYPE_PREFIX: &str = "ITYPE";

/// Generate a complete Binary Ninja Architecture plugin as a Python string.
pub fn generate_binja_code(def: &ValidatedDef, tree: &DecodeNode, opts: &BinjaOptions) -> String {
    let mut out = String::new();

    let display = DisplayConfig {
        type_prefixes: opts.display_prefixes.clone(),
    };

    emit_header(&mut out);
    emit_helpers(&mut out, def);
    codegen_python::emit_display_format_helpers(&mut out, def);
    emit_map_functions(&mut out, def);
    emit_subdecoder_functions(&mut out, def);
    emit_itype_constants(&mut out, def);
    codegen_python::emit_decode_function(&mut out, def, tree, ITYPE_PREFIX);
    codegen_python::emit_format_function(&mut out, def, ITYPE_PREFIX, &display);
    emit_architecture_class(&mut out, def, opts);

    out
}

/// Emit the file header with imports.
fn emit_header(out: &mut String) {
    writeln!(out, "# Auto-generated by https://github.com/ioncodes/chipi").unwrap();
    writeln!(out, "# Do not edit.").unwrap();
    writeln!(out).unwrap();
    writeln!(out, "from binaryninja import (").unwrap();
    writeln!(out, "    Architecture,").unwrap();
    writeln!(out, "    Endianness,").unwrap();
    writeln!(out, "    InstructionInfo,").unwrap();
    writeln!(out, "    InstructionTextToken,").unwrap();
    writeln!(out, "    InstructionTextTokenType,").unwrap();
    writeln!(out, "    BranchType,").unwrap();
    writeln!(out, "    RegisterInfo,").unwrap();
    writeln!(out, "    log_error,").unwrap();
    writeln!(out, ")").unwrap();
    writeln!(out).unwrap();
    writeln!(out).unwrap();
}

/// Emit helper functions.
fn emit_helpers(out: &mut String, def: &ValidatedDef) {
    if codegen_python::needs_sign_extend(def) {
        codegen_python::emit_sign_extend_helper(out);
    }
    if codegen_python::needs_rotate_helpers(def) {
        codegen_python::emit_rotate_helpers(out);
    }
}

/// Emit map lookup functions.
fn emit_map_functions(out: &mut String, def: &ValidatedDef) {
    codegen_python::emit_map_functions_python(out, &def.maps);
}

/// Emit sub-decoder dispatch functions.
fn emit_subdecoder_functions(out: &mut String, def: &ValidatedDef) {
    for sd in &def.sub_decoders {
        codegen_python::emit_subdecoder_python(out, sd);
    }
}

/// Emit ITYPE_* constants.
fn emit_itype_constants(out: &mut String, def: &ValidatedDef) {
    for (i, instr) in def.instructions.iter().enumerate() {
        writeln!(
            out,
            "{}_{} = {}",
            ITYPE_PREFIX,
            instr.name.to_ascii_uppercase(),
            i
        )
        .unwrap();
    }
    writeln!(out).unwrap();
    writeln!(out).unwrap();
}

/// Emit the Architecture subclass.
fn emit_architecture_class(out: &mut String, def: &ValidatedDef, opts: &BinjaOptions) {
    let class_name = format!("{}Architecture", capitalize(&opts.architecture_name));
    let unit_bytes = def.config.width / 8;
    let max_insn_bytes = unit_bytes
        * def
            .instructions
            .iter()
            .map(|i| i.unit_count())
            .max()
            .unwrap_or(1);

    // Class definition
    writeln!(out, "class {}(Architecture):", class_name).unwrap();
    writeln!(out, "    name = \"{}\"", opts.architecture_name).unwrap();
    writeln!(out, "    address_size = {}", opts.address_size).unwrap();
    writeln!(out, "    default_int_size = {}", opts.default_int_size).unwrap();
    writeln!(out, "    max_instr_length = {}", max_insn_bytes).unwrap();
    let endian_enum = match opts.endianness.as_str() {
        "BigEndian" | "big" => "Endianness.BigEndian",
        _ => "Endianness.LittleEndian",
    };
    writeln!(out, "    endianness = {}", endian_enum).unwrap();
    writeln!(out).unwrap();

    // Registers
    writeln!(out, "    regs = {{").unwrap();
    for name in &opts.register_names {
        writeln!(
            out,
            "        \"{}\": RegisterInfo(\"{}\", {}),",
            name, name, opts.register_size
        )
        .unwrap();
    }
    writeln!(out, "    }}").unwrap();
    writeln!(out).unwrap();

    // Stack pointer
    if let Some(ref sp) = opts.stack_pointer {
        writeln!(out, "    stack_pointer = \"{}\"", sp).unwrap();
        writeln!(out).unwrap();
    }

    // get_instruction_info
    emit_get_instruction_info(out, def, opts, max_insn_bytes);

    // get_instruction_text
    emit_get_instruction_text(out, max_insn_bytes);

    // get_instruction_low_level_il
    emit_get_instruction_low_level_il(out, max_insn_bytes);

    // Register the architecture
    writeln!(out).unwrap();
    writeln!(out, "{}.register()", class_name).unwrap();
}

/// Emit `get_instruction_info`.
fn emit_get_instruction_info(
    out: &mut String,
    def: &ValidatedDef,
    opts: &BinjaOptions,
    max_insn_bytes: u32,
) {
    writeln!(out, "    def get_instruction_info(self, data, addr):").unwrap();
    writeln!(out, "        if len(data) < {}:", def.config.width / 8).unwrap();
    writeln!(out, "            return None").unwrap();
    writeln!(
        out,
        "        result = _decode(bytes(data[:{}]))",
        max_insn_bytes
    )
    .unwrap();
    writeln!(out, "        if result is None:").unwrap();
    writeln!(out, "            return None").unwrap();
    writeln!(out, "        itype, fields, size = result").unwrap();
    writeln!(out, "        info = InstructionInfo()").unwrap();
    writeln!(out, "        info.length = size").unwrap();

    // Branch analysis from flow config
    let has_flow = !opts.flow.stops.is_empty()
        || !opts.flow.calls.is_empty()
        || !opts.flow.branches.is_empty()
        || !opts.flow.unconditional_branches.is_empty()
        || !opts.flow.returns.is_empty();

    if has_flow {
        let mut emitted_any = false;

        // Unconditional branches
        if !opts.flow.unconditional_branches.is_empty() {
            let itypes = collect_itypes(&opts.flow.unconditional_branches, def);
            if !itypes.is_empty() {
                writeln!(
                    out,
                    "        {}itype in ({},):",
                    if emitted_any { "elif " } else { "if " },
                    itypes.join(", ")
                )
                .unwrap();
                writeln!(
                    out,
                    "            info.add_branch(BranchType.UnconditionalBranch)"
                )
                .unwrap();
                emitted_any = true;
            }
        }

        // Conditional branches
        let cond_branches: Vec<&String> = opts
            .flow
            .branches
            .iter()
            .filter(|b| !opts.flow.unconditional_branches.contains(b))
            .collect();
        if !cond_branches.is_empty() {
            let itypes = collect_itypes_refs(&cond_branches, def);
            if !itypes.is_empty() {
                writeln!(
                    out,
                    "        {}itype in ({},):",
                    if emitted_any { "elif " } else { "if " },
                    itypes.join(", ")
                )
                .unwrap();
                writeln!(out, "            info.add_branch(BranchType.TrueBranch)").unwrap();
                writeln!(
                    out,
                    "            info.add_branch(BranchType.FalseBranch, addr + size)"
                )
                .unwrap();
                emitted_any = true;
            }
        }

        // Calls
        if !opts.flow.calls.is_empty() {
            let itypes = collect_itypes(&opts.flow.calls, def);
            if !itypes.is_empty() {
                writeln!(
                    out,
                    "        {}itype in ({},):",
                    if emitted_any { "elif " } else { "if " },
                    itypes.join(", ")
                )
                .unwrap();
                writeln!(
                    out,
                    "            info.add_branch(BranchType.CallDestination)"
                )
                .unwrap();
                emitted_any = true;
            }
        }

        // Returns
        if !opts.flow.returns.is_empty() {
            let itypes = collect_itypes(&opts.flow.returns, def);
            if !itypes.is_empty() {
                writeln!(
                    out,
                    "        {}itype in ({},):",
                    if emitted_any { "elif " } else { "if " },
                    itypes.join(", ")
                )
                .unwrap();
                writeln!(
                    out,
                    "            info.add_branch(BranchType.FunctionReturn)"
                )
                .unwrap();
                emitted_any = true;
            }
        }

        // Stops
        if !opts.flow.stops.is_empty() {
            let itypes = collect_itypes(&opts.flow.stops, def);
            if !itypes.is_empty() {
                writeln!(
                    out,
                    "        {}itype in ({},):",
                    if emitted_any { "elif " } else { "if " },
                    itypes.join(", ")
                )
                .unwrap();
                writeln!(
                    out,
                    "            info.add_branch(BranchType.FunctionReturn)"
                )
                .unwrap();
                let _ = emitted_any;
            }
        }
    }

    writeln!(out, "        return info").unwrap();
    writeln!(out).unwrap();
}

/// Emit `get_instruction_text`.
fn emit_get_instruction_text(out: &mut String, max_insn_bytes: u32) {
    writeln!(out, "    def get_instruction_text(self, data, addr):").unwrap();
    writeln!(
        out,
        "        result = _decode(bytes(data[:{}]))",
        max_insn_bytes
    )
    .unwrap();
    writeln!(out, "        if result is None:").unwrap();
    writeln!(out, "            return None").unwrap();
    writeln!(out, "        itype, fields, size = result").unwrap();
    writeln!(
        out,
        "        mnemonic, operands = _format_insn(itype, fields)"
    )
    .unwrap();
    writeln!(out, "        tokens = []").unwrap();
    writeln!(out, "        tokens.append(InstructionTextToken(").unwrap();
    writeln!(
        out,
        "            InstructionTextTokenType.InstructionToken, mnemonic))"
    )
    .unwrap();
    writeln!(out, "        if operands:").unwrap();
    writeln!(out, "            tokens.append(InstructionTextToken(").unwrap();
    writeln!(
        out,
        "                InstructionTextTokenType.TextToken, \" \"))"
    )
    .unwrap();
    writeln!(out, "            tokens.append(InstructionTextToken(").unwrap();
    writeln!(
        out,
        "                InstructionTextTokenType.TextToken, operands))"
    )
    .unwrap();
    writeln!(out, "        return tokens, size").unwrap();
    writeln!(out).unwrap();
}

/// Emit `get_instruction_low_level_il`.
fn emit_get_instruction_low_level_il(out: &mut String, max_insn_bytes: u32) {
    writeln!(
        out,
        "    def get_instruction_low_level_il(self, data, addr, il):"
    )
    .unwrap();
    writeln!(
        out,
        "        result = _decode(bytes(data[:{}]))",
        max_insn_bytes
    )
    .unwrap();
    writeln!(out, "        if result is None:").unwrap();
    writeln!(out, "            return None").unwrap();
    writeln!(out, "        _, _, size = result").unwrap();
    writeln!(out, "        il.append(il.nop())").unwrap();
    writeln!(out, "        return size").unwrap();
    writeln!(out).unwrap();
}

/// Collect ITYPE constants for instruction names.
fn collect_itypes(names: &[String], def: &ValidatedDef) -> Vec<String> {
    names
        .iter()
        .filter(|name| def.instructions.iter().any(|i| i.name == **name))
        .map(|name| format!("{}_{}", ITYPE_PREFIX, name.to_ascii_uppercase()))
        .collect()
}

fn collect_itypes_refs(names: &[&String], def: &ValidatedDef) -> Vec<String> {
    names
        .iter()
        .filter(|name| def.instructions.iter().any(|i| i.name == ***name))
        .map(|name| format!("{}_{}", ITYPE_PREFIX, name.to_ascii_uppercase()))
        .collect()
}

/// Capitalize the first letter.
fn capitalize(s: &str) -> String {
    let mut chars = s.chars();
    match chars.next() {
        None => String::new(),
        Some(c) => c.to_ascii_uppercase().to_string() + chars.as_str(),
    }
}