use std::collections::HashMap;
use std::fmt::Write;
use crate::backend::OperandKind;
use crate::codegen_python::{self, DisplayConfig};
use crate::config::IdaOptions;
use crate::tree::DecodeNode;
use crate::types::*;
const ITYPE_PREFIX: &str = "ITYPE";
pub fn generate_ida_code(
def: &ValidatedDef,
tree: &DecodeNode,
opts: &IdaOptions,
type_map: &HashMap<String, String>,
) -> String {
let mut out = String::new();
let display = DisplayConfig {
type_prefixes: opts.display_prefixes.clone(),
};
emit_header(&mut out);
emit_helpers(&mut out, def);
codegen_python::emit_display_format_helpers(&mut out, def);
emit_map_functions(&mut out, def);
emit_subdecoder_functions(&mut out, def);
emit_itype_constants(&mut out, def);
emit_field_names_table(&mut out, def);
codegen_python::emit_decode_function(&mut out, def, tree, ITYPE_PREFIX);
codegen_python::emit_format_function(&mut out, def, ITYPE_PREFIX, &display);
emit_processor_class(&mut out, def, opts, type_map);
out
}
fn emit_header(out: &mut String) {
writeln!(out, "# Auto-generated by https://github.com/ioncodes/chipi").unwrap();
writeln!(out, "# Do not edit.").unwrap();
writeln!(out).unwrap();
writeln!(out, "import ida_idp").unwrap();
writeln!(out, "import ida_ua").unwrap();
writeln!(out, "import ida_bytes").unwrap();
writeln!(out, "import ida_idaapi").unwrap();
writeln!(out, "import ida_nalt").unwrap();
writeln!(out, "import ida_problems").unwrap();
writeln!(out, "import ida_xref").unwrap();
writeln!(out).unwrap();
writeln!(out).unwrap();
}
fn emit_helpers(out: &mut String, def: &ValidatedDef) {
if codegen_python::needs_sign_extend(def) {
codegen_python::emit_sign_extend_helper(out);
}
if codegen_python::needs_rotate_helpers(def) {
codegen_python::emit_rotate_helpers(out);
}
}
fn emit_map_functions(out: &mut String, def: &ValidatedDef) {
codegen_python::emit_map_functions_python(out, &def.maps);
}
fn emit_subdecoder_functions(out: &mut String, def: &ValidatedDef) {
for sd in &def.sub_decoders {
codegen_python::emit_subdecoder_python(out, sd);
}
}
fn emit_itype_constants(out: &mut String, def: &ValidatedDef) {
for (i, instr) in def.instructions.iter().enumerate() {
writeln!(
out,
"{}_{} = {}",
ITYPE_PREFIX,
instr.name.to_ascii_uppercase(),
i
)
.unwrap();
}
writeln!(out).unwrap();
writeln!(out).unwrap();
}
fn emit_field_names_table(out: &mut String, def: &ValidatedDef) {
writeln!(out, "_FIELD_NAMES = {{").unwrap();
for instr in &def.instructions {
let itype_const = format!("{}_{}", ITYPE_PREFIX, instr.name.to_ascii_uppercase());
let field_names: Vec<String> = instr
.resolved_fields
.iter()
.map(|f| format!("\"{}\"", f.name))
.collect();
writeln!(out, " {}: [{}],", itype_const, field_names.join(", ")).unwrap();
}
writeln!(out, "}}").unwrap();
writeln!(out).unwrap();
writeln!(out).unwrap();
}
fn emit_processor_class(
out: &mut String,
def: &ValidatedDef,
opts: &IdaOptions,
type_map: &HashMap<String, String>,
) {
let class_name = format!("{}Processor", capitalize(&opts.processor_name));
let unit_bytes = def.config.width / 8;
let max_insn_bytes = unit_bytes
* def
.instructions
.iter()
.map(|i| i.unit_count())
.max()
.unwrap_or(1);
writeln!(out, "class {}(ida_idp.processor_t):", class_name).unwrap();
writeln!(out).unwrap();
writeln!(out, " id = {:#x}", opts.processor_id).unwrap();
if opts.flags.is_empty() {
writeln!(out, " flag = ida_idp.PR_DEFSEG32 | ida_idp.PR_USE32").unwrap();
} else {
let flag_strs: Vec<String> = opts
.flags
.iter()
.map(|f| format!("ida_idp.{}", f))
.collect();
writeln!(out, " flag = {}", flag_strs.join(" | ")).unwrap();
}
writeln!(out, " cnbits = 8").unwrap();
writeln!(out, " dnbits = 8").unwrap();
writeln!(out).unwrap();
writeln!(out, " psnames = [\"{}\"]", opts.processor_name).unwrap();
writeln!(
out,
" plnames = [\"{}\"]",
codegen_python::escape_python_str(&opts.processor_long_name)
)
.unwrap();
writeln!(out).unwrap();
writeln!(out, " reg_names = [").unwrap();
for name in &opts.register_names {
writeln!(out, " \"{}\",", name).unwrap();
}
writeln!(out, " ]").unwrap();
writeln!(out).unwrap();
let first_sreg_idx = opts
.register_names
.iter()
.position(|n| opts.segment_registers.first().map_or(false, |s| n == s))
.unwrap_or(opts.register_names.len().saturating_sub(2));
let last_sreg_idx = opts
.register_names
.iter()
.position(|n| opts.segment_registers.last().map_or(false, |s| n == s))
.unwrap_or(opts.register_names.len().saturating_sub(1));
writeln!(out, " reg_first_sreg = {}", first_sreg_idx).unwrap();
writeln!(out, " reg_last_sreg = {}", last_sreg_idx).unwrap();
writeln!(out, " segreg_size = 0").unwrap();
writeln!(out, " reg_code_sreg = {}", first_sreg_idx).unwrap();
writeln!(out, " reg_data_sreg = {}", last_sreg_idx).unwrap();
writeln!(out).unwrap();
emit_instruc_table(out, def, opts, type_map);
emit_assembler_info(out, opts);
writeln!(out, " def __init__(self):").unwrap();
writeln!(out, " ida_idp.processor_t.__init__(self)").unwrap();
writeln!(out).unwrap();
emit_ana_method(out, def, opts, type_map, max_insn_bytes);
emit_out_insn_method(out, def, opts, type_map, max_insn_bytes);
emit_out_operand_method(out);
emit_emu_method(out, def, opts);
writeln!(out).unwrap();
writeln!(out, "def PROCESSOR_ENTRY():").unwrap();
writeln!(out, " return {}()", class_name).unwrap();
}
fn emit_instruc_table(
out: &mut String,
def: &ValidatedDef,
opts: &IdaOptions,
type_map: &HashMap<String, String>,
) {
writeln!(out, " instruc = [").unwrap();
for instr in &def.instructions {
let features = compute_instruction_features(instr, opts, type_map);
if features.is_empty() {
writeln!(
out,
" {{\"name\": \"{}\", \"feature\": 0}},",
instr.name
)
.unwrap();
} else {
writeln!(
out,
" {{\"name\": \"{}\", \"feature\": {}}},",
instr.name,
features.join(" | ")
)
.unwrap();
}
}
writeln!(out, " ]").unwrap();
writeln!(out, " instruc_start = 0").unwrap();
writeln!(out, " instruc_end = len(instruc)").unwrap();
writeln!(out).unwrap();
}
fn compute_instruction_features(
instr: &ValidatedInstruction,
opts: &IdaOptions,
type_map: &HashMap<String, String>,
) -> Vec<String> {
let mut features = Vec::new();
if opts.flow.stops.contains(&instr.name) {
features.push("ida_idp.CF_STOP".to_string());
}
if opts.flow.calls.contains(&instr.name) {
features.push("ida_idp.CF_CALL".to_string());
}
if opts.flow.branches.contains(&instr.name)
|| opts.flow.unconditional_branches.contains(&instr.name)
{
features.push("ida_idp.CF_JUMP".to_string());
}
for (i, field) in instr.resolved_fields.iter().enumerate() {
if i >= 6 {
break; }
let flag_num = i + 1;
let kind = classify_operand(&field.name, &field.resolved_type, opts, type_map);
match kind {
OperandKind::Register => {
if i == 0 && is_likely_destination(&field.name) {
features.push(format!("ida_idp.CF_CHG{}", flag_num));
} else {
features.push(format!("ida_idp.CF_USE{}", flag_num));
}
}
_ => {
features.push(format!("ida_idp.CF_USE{}", flag_num));
}
}
}
features
}
fn classify_operand(
field_name: &str,
resolved: &ResolvedFieldType,
opts: &IdaOptions,
type_map: &HashMap<String, String>,
) -> OperandKind {
if let Some(kind) = opts.operand_types.get(field_name) {
return *kind;
}
if let Some(alias) = &resolved.alias_name {
if let Some(mapped) = type_map.get(alias) {
let lower = mapped.to_ascii_lowercase();
if lower.contains("reg") {
return OperandKind::Register;
}
}
}
let lower = field_name.to_ascii_lowercase();
if matches!(
lower.as_str(),
"rd" | "rs" | "rt" | "ra" | "rb" | "rc" | "rn" | "rm" | "rz" | "reg" | "dreg" | "sreg"
) || lower.starts_with("ar") && lower.len() <= 3
{
return OperandKind::Register;
}
if matches!(
lower.as_str(),
"addr" | "target" | "dest" | "address" | "ea"
) {
return OperandKind::Address;
}
if matches!(lower.as_str(), "mem" | "memory" | "disp" | "displacement") {
return OperandKind::Memory;
}
OperandKind::Immediate
}
fn is_likely_destination(name: &str) -> bool {
let lower = name.to_ascii_lowercase();
matches!(lower.as_str(), "rd" | "rt" | "d" | "dreg" | "dst")
}
fn emit_assembler_info(out: &mut String, opts: &IdaOptions) {
writeln!(out, " assembler = {{").unwrap();
writeln!(out, " \"flag\": ida_idp.AS_COLON | ida_idp.ASH_HEXF3 | ida_idp.ASB_BINF0 | ida_idp.ASO_OCTF1 | ida_idp.AS_NCMAS,").unwrap();
writeln!(out, " \"uflag\": 0,").unwrap();
writeln!(
out,
" \"name\": \"{} assembler\",",
codegen_python::escape_python_str(&opts.processor_long_name)
)
.unwrap();
writeln!(out, " \"origin\": \".org\",").unwrap();
writeln!(out, " \"end\": \".end\",").unwrap();
writeln!(out, " \"cmnt\": \";\",").unwrap();
writeln!(out, " \"ascsep\": '\"',").unwrap();
writeln!(out, " \"accsep\": \"'\",").unwrap();
writeln!(out, " \"esccodes\": \"\\\"'\",").unwrap();
writeln!(out, " \"a_ascii\": \".ascii\",").unwrap();
writeln!(out, " \"a_byte\": \".byte\",").unwrap();
writeln!(out, " \"a_word\": \".word\",").unwrap();
writeln!(out, " \"a_dword\": \".dword\",").unwrap();
writeln!(out, " \"a_qword\": \".quad\",").unwrap();
writeln!(out, " \"a_float\": \".float\",").unwrap();
writeln!(out, " \"a_double\": \".double\",").unwrap();
writeln!(out, " \"a_bss\": \"dfs %s\",").unwrap();
writeln!(out, " \"a_seg\": \"seg\",").unwrap();
writeln!(out, " \"a_curip\": \".\",").unwrap();
writeln!(out, " \"a_public\": \".global\",").unwrap();
writeln!(out, " \"a_weak\": \"weak\",").unwrap();
writeln!(out, " \"a_extrn\": \".extern\",").unwrap();
writeln!(out, " \"a_comdef\": \"\",").unwrap();
writeln!(out, " \"a_align\": \".align\",").unwrap();
writeln!(out, " \"lbrace\": \"(\",").unwrap();
writeln!(out, " \"rbrace\": \")\",").unwrap();
writeln!(out, " \"a_mod\": \"%\",").unwrap();
writeln!(out, " \"a_band\": \"&\",").unwrap();
writeln!(out, " \"a_bor\": \"|\",").unwrap();
writeln!(out, " \"a_xor\": \"^\",").unwrap();
writeln!(out, " \"a_bnot\": \"~\",").unwrap();
writeln!(out, " \"a_shl\": \"<<\",").unwrap();
writeln!(out, " \"a_shr\": \">>\",").unwrap();
writeln!(out, " \"a_sizeof_fmt\": \"size %s\",").unwrap();
writeln!(out, " }}").unwrap();
writeln!(out).unwrap();
}
fn emit_ana_method(
out: &mut String,
def: &ValidatedDef,
opts: &IdaOptions,
type_map: &HashMap<String, String>,
max_insn_bytes: u32,
) {
writeln!(out, " def ev_ana_insn(self, insn):").unwrap();
writeln!(
out,
" data = ida_bytes.get_bytes(insn.ea, {})",
max_insn_bytes
)
.unwrap();
writeln!(out, " if data is None:").unwrap();
writeln!(out, " return 0").unwrap();
writeln!(out, " result = _decode(data)").unwrap();
writeln!(out, " if result is None:").unwrap();
writeln!(out, " return 0").unwrap();
writeln!(out, " itype, fields, size = result").unwrap();
writeln!(out, " insn.itype = itype").unwrap();
writeln!(out, " insn.size = size").unwrap();
writeln!(out, " # Populate operands from decoded fields").unwrap();
writeln!(out, " op_idx = 0").unwrap();
writeln!(out, " for name, value in fields.items():").unwrap();
writeln!(out, " if op_idx >= 6:").unwrap();
writeln!(out, " break").unwrap();
writeln!(
out,
" # Skip sub-decoder dicts (extension opcodes) - not operands"
)
.unwrap();
writeln!(out, " if isinstance(value, dict):").unwrap();
writeln!(out, " continue").unwrap();
writeln!(
out,
" # Ensure value is unsigned for IDA's ea_t fields"
)
.unwrap();
writeln!(out, " if isinstance(value, int) and value < 0:").unwrap();
writeln!(out, " value = value & 0xFFFFFFFF").unwrap();
writeln!(out, " op = insn.ops[op_idx]").unwrap();
emit_operand_classification(out, def, opts, type_map);
writeln!(out, " op_idx += 1").unwrap();
writeln!(out, " return insn.size").unwrap();
writeln!(out).unwrap();
}
fn emit_operand_classification(
out: &mut String,
def: &ValidatedDef,
opts: &IdaOptions,
type_map: &HashMap<String, String>,
) {
let mut field_kinds: HashMap<String, OperandKind> = HashMap::new();
for instr in &def.instructions {
for field in &instr.resolved_fields {
field_kinds.entry(field.name.clone()).or_insert_with(|| {
classify_operand(&field.name, &field.resolved_type, opts, type_map)
});
}
}
let reg_fields: Vec<&String> = field_kinds
.iter()
.filter(|(_, k)| **k == OperandKind::Register)
.map(|(n, _)| n)
.collect();
let addr_fields: Vec<&String> = field_kinds
.iter()
.filter(|(_, k)| **k == OperandKind::Address)
.map(|(n, _)| n)
.collect();
let mem_fields: Vec<&String> = field_kinds
.iter()
.filter(|(_, k)| **k == OperandKind::Memory)
.map(|(n, _)| n)
.collect();
let mut first = true;
if !reg_fields.is_empty() {
let names: Vec<String> = reg_fields.iter().map(|n| format!("\"{}\"", n)).collect();
writeln!(
out,
" {}name in [{}]:{}",
if first { "if " } else { "elif " },
names.join(", "),
""
)
.unwrap();
writeln!(out, " op.type = ida_ua.o_reg").unwrap();
writeln!(out, " op.reg = value").unwrap();
first = false;
}
if !addr_fields.is_empty() {
let names: Vec<String> = addr_fields.iter().map(|n| format!("\"{}\"", n)).collect();
writeln!(
out,
" {}name in [{}]:",
if first { "if " } else { "elif " },
names.join(", ")
)
.unwrap();
writeln!(out, " op.type = ida_ua.o_near").unwrap();
if opts.bytes_per_unit > 1 {
writeln!(
out,
" op.addr = value * {}",
opts.bytes_per_unit
)
.unwrap();
} else {
writeln!(out, " op.addr = value").unwrap();
}
first = false;
}
if !mem_fields.is_empty() {
let names: Vec<String> = mem_fields.iter().map(|n| format!("\"{}\"", n)).collect();
writeln!(
out,
" {}name in [{}]:",
if first { "if " } else { "elif " },
names.join(", ")
)
.unwrap();
writeln!(out, " op.type = ida_ua.o_mem").unwrap();
if opts.bytes_per_unit > 1 {
writeln!(
out,
" op.addr = value * {}",
opts.bytes_per_unit
)
.unwrap();
} else {
writeln!(out, " op.addr = value").unwrap();
}
first = false;
}
if first {
writeln!(out, " op.type = ida_ua.o_imm").unwrap();
writeln!(out, " op.value = value").unwrap();
} else {
writeln!(out, " else:").unwrap();
writeln!(out, " op.type = ida_ua.o_imm").unwrap();
writeln!(out, " op.value = value").unwrap();
}
}
fn emit_out_insn_method(
out: &mut String,
def: &ValidatedDef,
opts: &IdaOptions,
type_map: &HashMap<String, String>,
max_insn_bytes: u32,
) {
writeln!(out, " def ev_out_insn(self, outctx):").unwrap();
writeln!(out, " insn = outctx.insn").unwrap();
writeln!(
out,
" # Re-decode to get full fields (including sub-decoder dicts)"
)
.unwrap();
writeln!(
out,
" data = ida_bytes.get_bytes(insn.ea, {})",
max_insn_bytes
)
.unwrap();
writeln!(out, " if data is None:").unwrap();
writeln!(out, " return False").unwrap();
writeln!(out, " result = _decode(data)").unwrap();
writeln!(out, " if result is None:").unwrap();
writeln!(out, " return False").unwrap();
writeln!(out, " _, fields, _ = result").unwrap();
if opts.bytes_per_unit > 1 {
let mut addr_field_names: Vec<String> = Vec::new();
for instr in &def.instructions {
for field in &instr.resolved_fields {
let kind = classify_operand(&field.name, &field.resolved_type, opts, type_map);
if kind == OperandKind::Address && !addr_field_names.contains(&field.name) {
addr_field_names.push(field.name.clone());
}
}
}
if !addr_field_names.is_empty() {
let names: Vec<String> = addr_field_names
.iter()
.map(|n| format!("\"{}\"", n))
.collect();
writeln!(out, " _ADDR_FIELDS = {{{}}}", names.join(", ")).unwrap();
writeln!(
out,
" fields = {{k: (v * {} if k in _ADDR_FIELDS else v) for k, v in fields.items()}}",
opts.bytes_per_unit
)
.unwrap();
}
}
writeln!(
out,
" mnemonic, operands = _format_insn(insn.itype, fields)"
)
.unwrap();
writeln!(out, " outctx.out_custom_mnem(mnemonic)").unwrap();
writeln!(out, " if operands:").unwrap();
writeln!(out, " outctx.out_line(\" \" + operands)").unwrap();
writeln!(out, " outctx.flush_outbuf()").unwrap();
writeln!(out, " return True").unwrap();
writeln!(out).unwrap();
}
fn emit_out_operand_method(out: &mut String) {
writeln!(out, " def ev_out_operand(self, outctx, op):").unwrap();
writeln!(out, " return True").unwrap();
writeln!(out).unwrap();
}
fn emit_emu_method(out: &mut String, _def: &ValidatedDef, _opts: &IdaOptions) {
writeln!(out, " def ev_emu_insn(self, insn):").unwrap();
writeln!(out, " feature = insn.get_canon_feature()").unwrap();
writeln!(out, " if feature & ida_idp.CF_JUMP:").unwrap();
writeln!(
out,
" ida_problems.remember_problem(ida_problems.PR_JUMP, insn.ea)"
)
.unwrap();
writeln!(out, " if feature & ida_idp.CF_STOP == 0:").unwrap();
writeln!(
out,
" ida_xref.add_cref(insn.ea, insn.ea + insn.size, ida_xref.fl_F)"
)
.unwrap();
writeln!(out, " # Add xrefs for address operands").unwrap();
writeln!(out, " for i in range(6):").unwrap();
writeln!(out, " op = insn.ops[i]").unwrap();
writeln!(out, " if op.type == ida_ua.o_void:").unwrap();
writeln!(out, " break").unwrap();
writeln!(out, " if op.type == ida_ua.o_near:").unwrap();
writeln!(out, " if feature & ida_idp.CF_CALL:").unwrap();
writeln!(
out,
" insn.add_cref(op.addr, op.offb, ida_xref.fl_CN)"
)
.unwrap();
writeln!(out, " else:").unwrap();
writeln!(
out,
" insn.add_cref(op.addr, op.offb, ida_xref.fl_JN)"
)
.unwrap();
writeln!(out, " elif op.type == ida_ua.o_mem:").unwrap();
writeln!(
out,
" insn.add_dref(op.addr, op.offb, ida_xref.dr_R)"
)
.unwrap();
writeln!(out, " return True").unwrap();
writeln!(out).unwrap();
}
fn capitalize(s: &str) -> String {
let mut chars = s.chars();
match chars.next() {
None => String::new(),
Some(c) => c.to_ascii_uppercase().to_string() + chars.as_str(),
}
}