Skip to main content

chipi_core/
codegen_binja.rs

1//! Binary Ninja Architecture plugin code generation.
2//!
3//! Generates a self-contained Python Architecture plugin for Binary Ninja
4//! from a validated `.chipi` definition and Binary Ninja-specific configuration.
5
6use std::fmt::Write;
7
8use crate::backend::binja::BinjaOptions;
9use crate::codegen_python::{self, DisplayConfig};
10use crate::tree::DecodeNode;
11use crate::types::*;
12
13/// ITYPE constant prefix used throughout the generated code.
14const ITYPE_PREFIX: &str = "ITYPE";
15
16/// Generate a complete Binary Ninja Architecture plugin as a Python string.
17pub fn generate_binja_code(def: &ValidatedDef, tree: &DecodeNode, opts: &BinjaOptions) -> String {
18    let mut out = String::new();
19
20    let display = DisplayConfig {
21        type_prefixes: opts.display_prefixes.clone(),
22    };
23
24    emit_header(&mut out);
25    emit_helpers(&mut out, def);
26    codegen_python::emit_display_format_helpers(&mut out, def);
27    emit_map_functions(&mut out, def);
28    emit_subdecoder_functions(&mut out, def);
29    emit_itype_constants(&mut out, def);
30    codegen_python::emit_decode_function(&mut out, def, tree, ITYPE_PREFIX);
31    codegen_python::emit_format_function(&mut out, def, ITYPE_PREFIX, &display);
32    emit_architecture_class(&mut out, def, opts);
33
34    out
35}
36
37/// Emit the file header with imports.
38fn emit_header(out: &mut String) {
39    writeln!(out, "# Auto-generated by https://github.com/ioncodes/chipi").unwrap();
40    writeln!(out, "# Do not edit.").unwrap();
41    writeln!(out).unwrap();
42    writeln!(out, "from binaryninja import (").unwrap();
43    writeln!(out, "    Architecture,").unwrap();
44    writeln!(out, "    Endianness,").unwrap();
45    writeln!(out, "    InstructionInfo,").unwrap();
46    writeln!(out, "    InstructionTextToken,").unwrap();
47    writeln!(out, "    InstructionTextTokenType,").unwrap();
48    writeln!(out, "    BranchType,").unwrap();
49    writeln!(out, "    RegisterInfo,").unwrap();
50    writeln!(out, "    log_error,").unwrap();
51    writeln!(out, ")").unwrap();
52    writeln!(out).unwrap();
53    writeln!(out).unwrap();
54}
55
56/// Emit helper functions.
57fn emit_helpers(out: &mut String, def: &ValidatedDef) {
58    if codegen_python::needs_sign_extend(def) {
59        codegen_python::emit_sign_extend_helper(out);
60    }
61    if codegen_python::needs_rotate_helpers(def) {
62        codegen_python::emit_rotate_helpers(out);
63    }
64}
65
66/// Emit map lookup functions.
67fn emit_map_functions(out: &mut String, def: &ValidatedDef) {
68    codegen_python::emit_map_functions_python(out, &def.maps);
69}
70
71/// Emit sub-decoder dispatch functions.
72fn emit_subdecoder_functions(out: &mut String, def: &ValidatedDef) {
73    for sd in &def.sub_decoders {
74        codegen_python::emit_subdecoder_python(out, sd);
75    }
76}
77
78/// Emit ITYPE_* constants.
79fn emit_itype_constants(out: &mut String, def: &ValidatedDef) {
80    for (i, instr) in def.instructions.iter().enumerate() {
81        writeln!(
82            out,
83            "{}_{} = {}",
84            ITYPE_PREFIX,
85            instr.name.to_ascii_uppercase(),
86            i
87        )
88        .unwrap();
89    }
90    writeln!(out).unwrap();
91    writeln!(out).unwrap();
92}
93
94/// Emit the Architecture subclass.
95fn emit_architecture_class(out: &mut String, def: &ValidatedDef, opts: &BinjaOptions) {
96    let class_name = format!("{}Architecture", capitalize(&opts.architecture_name));
97    let unit_bytes = def.config.width / 8;
98    let max_insn_bytes = unit_bytes
99        * def
100            .instructions
101            .iter()
102            .map(|i| i.unit_count())
103            .max()
104            .unwrap_or(1);
105
106    // Class definition
107    writeln!(out, "class {}(Architecture):", class_name).unwrap();
108    writeln!(out, "    name = \"{}\"", opts.architecture_name).unwrap();
109    writeln!(out, "    address_size = {}", opts.address_size).unwrap();
110    writeln!(out, "    default_int_size = {}", opts.default_int_size).unwrap();
111    writeln!(out, "    max_instr_length = {}", max_insn_bytes).unwrap();
112    let endian_enum = match opts.endianness.as_str() {
113        "BigEndian" | "big" => "Endianness.BigEndian",
114        _ => "Endianness.LittleEndian",
115    };
116    writeln!(out, "    endianness = {}", endian_enum).unwrap();
117    writeln!(out).unwrap();
118
119    // Registers
120    writeln!(out, "    regs = {{").unwrap();
121    for name in &opts.register_names {
122        writeln!(
123            out,
124            "        \"{}\": RegisterInfo(\"{}\", {}),",
125            name, name, opts.register_size
126        )
127        .unwrap();
128    }
129    writeln!(out, "    }}").unwrap();
130    writeln!(out).unwrap();
131
132    // Stack pointer
133    if let Some(ref sp) = opts.stack_pointer {
134        writeln!(out, "    stack_pointer = \"{}\"", sp).unwrap();
135        writeln!(out).unwrap();
136    }
137
138    // get_instruction_info
139    emit_get_instruction_info(out, def, opts, max_insn_bytes);
140
141    // get_instruction_text
142    emit_get_instruction_text(out, max_insn_bytes);
143
144    // get_instruction_low_level_il
145    emit_get_instruction_low_level_il(out, max_insn_bytes);
146
147    // Register the architecture
148    writeln!(out).unwrap();
149    writeln!(out, "{}.register()", class_name).unwrap();
150}
151
152/// Emit `get_instruction_info`.
153fn emit_get_instruction_info(
154    out: &mut String,
155    def: &ValidatedDef,
156    opts: &BinjaOptions,
157    max_insn_bytes: u32,
158) {
159    writeln!(out, "    def get_instruction_info(self, data, addr):").unwrap();
160    writeln!(out, "        if len(data) < {}:", def.config.width / 8).unwrap();
161    writeln!(out, "            return None").unwrap();
162    writeln!(
163        out,
164        "        result = _decode(bytes(data[:{}]))",
165        max_insn_bytes
166    )
167    .unwrap();
168    writeln!(out, "        if result is None:").unwrap();
169    writeln!(out, "            return None").unwrap();
170    writeln!(out, "        itype, fields, size = result").unwrap();
171    writeln!(out, "        info = InstructionInfo()").unwrap();
172    writeln!(out, "        info.length = size").unwrap();
173
174    // Branch analysis from flow config
175    let has_flow = !opts.flow.stops.is_empty()
176        || !opts.flow.calls.is_empty()
177        || !opts.flow.branches.is_empty()
178        || !opts.flow.unconditional_branches.is_empty()
179        || !opts.flow.returns.is_empty();
180
181    if has_flow {
182        let mut emitted_any = false;
183
184        // Unconditional branches
185        if !opts.flow.unconditional_branches.is_empty() {
186            let itypes = collect_itypes(&opts.flow.unconditional_branches, def);
187            if !itypes.is_empty() {
188                writeln!(
189                    out,
190                    "        {}itype in ({},):",
191                    if emitted_any { "elif " } else { "if " },
192                    itypes.join(", ")
193                )
194                .unwrap();
195                writeln!(
196                    out,
197                    "            info.add_branch(BranchType.UnconditionalBranch)"
198                )
199                .unwrap();
200                emitted_any = true;
201            }
202        }
203
204        // Conditional branches
205        let cond_branches: Vec<&String> = opts
206            .flow
207            .branches
208            .iter()
209            .filter(|b| !opts.flow.unconditional_branches.contains(b))
210            .collect();
211        if !cond_branches.is_empty() {
212            let itypes = collect_itypes_refs(&cond_branches, def);
213            if !itypes.is_empty() {
214                writeln!(
215                    out,
216                    "        {}itype in ({},):",
217                    if emitted_any { "elif " } else { "if " },
218                    itypes.join(", ")
219                )
220                .unwrap();
221                writeln!(out, "            info.add_branch(BranchType.TrueBranch)").unwrap();
222                writeln!(
223                    out,
224                    "            info.add_branch(BranchType.FalseBranch, addr + size)"
225                )
226                .unwrap();
227                emitted_any = true;
228            }
229        }
230
231        // Calls
232        if !opts.flow.calls.is_empty() {
233            let itypes = collect_itypes(&opts.flow.calls, def);
234            if !itypes.is_empty() {
235                writeln!(
236                    out,
237                    "        {}itype in ({},):",
238                    if emitted_any { "elif " } else { "if " },
239                    itypes.join(", ")
240                )
241                .unwrap();
242                writeln!(
243                    out,
244                    "            info.add_branch(BranchType.CallDestination)"
245                )
246                .unwrap();
247                emitted_any = true;
248            }
249        }
250
251        // Returns
252        if !opts.flow.returns.is_empty() {
253            let itypes = collect_itypes(&opts.flow.returns, def);
254            if !itypes.is_empty() {
255                writeln!(
256                    out,
257                    "        {}itype in ({},):",
258                    if emitted_any { "elif " } else { "if " },
259                    itypes.join(", ")
260                )
261                .unwrap();
262                writeln!(
263                    out,
264                    "            info.add_branch(BranchType.FunctionReturn)"
265                )
266                .unwrap();
267                emitted_any = true;
268            }
269        }
270
271        // Stops
272        if !opts.flow.stops.is_empty() {
273            let itypes = collect_itypes(&opts.flow.stops, def);
274            if !itypes.is_empty() {
275                writeln!(
276                    out,
277                    "        {}itype in ({},):",
278                    if emitted_any { "elif " } else { "if " },
279                    itypes.join(", ")
280                )
281                .unwrap();
282                writeln!(
283                    out,
284                    "            info.add_branch(BranchType.FunctionReturn)"
285                )
286                .unwrap();
287                let _ = emitted_any;
288            }
289        }
290    }
291
292    writeln!(out, "        return info").unwrap();
293    writeln!(out).unwrap();
294}
295
296/// Emit `get_instruction_text`.
297fn emit_get_instruction_text(out: &mut String, max_insn_bytes: u32) {
298    writeln!(out, "    def get_instruction_text(self, data, addr):").unwrap();
299    writeln!(
300        out,
301        "        result = _decode(bytes(data[:{}]))",
302        max_insn_bytes
303    )
304    .unwrap();
305    writeln!(out, "        if result is None:").unwrap();
306    writeln!(out, "            return None").unwrap();
307    writeln!(out, "        itype, fields, size = result").unwrap();
308    writeln!(
309        out,
310        "        mnemonic, operands = _format_insn(itype, fields)"
311    )
312    .unwrap();
313    writeln!(out, "        tokens = []").unwrap();
314    writeln!(out, "        tokens.append(InstructionTextToken(").unwrap();
315    writeln!(
316        out,
317        "            InstructionTextTokenType.InstructionToken, mnemonic))"
318    )
319    .unwrap();
320    writeln!(out, "        if operands:").unwrap();
321    writeln!(out, "            tokens.append(InstructionTextToken(").unwrap();
322    writeln!(
323        out,
324        "                InstructionTextTokenType.TextToken, \" \"))"
325    )
326    .unwrap();
327    writeln!(out, "            tokens.append(InstructionTextToken(").unwrap();
328    writeln!(
329        out,
330        "                InstructionTextTokenType.TextToken, operands))"
331    )
332    .unwrap();
333    writeln!(out, "        return tokens, size").unwrap();
334    writeln!(out).unwrap();
335}
336
337/// Emit `get_instruction_low_level_il`.
338fn emit_get_instruction_low_level_il(out: &mut String, max_insn_bytes: u32) {
339    writeln!(
340        out,
341        "    def get_instruction_low_level_il(self, data, addr, il):"
342    )
343    .unwrap();
344    writeln!(
345        out,
346        "        result = _decode(bytes(data[:{}]))",
347        max_insn_bytes
348    )
349    .unwrap();
350    writeln!(out, "        if result is None:").unwrap();
351    writeln!(out, "            return None").unwrap();
352    writeln!(out, "        _, _, size = result").unwrap();
353    writeln!(out, "        il.append(il.nop())").unwrap();
354    writeln!(out, "        return size").unwrap();
355    writeln!(out).unwrap();
356}
357
358/// Collect ITYPE constants for instruction names.
359fn collect_itypes(names: &[String], def: &ValidatedDef) -> Vec<String> {
360    names
361        .iter()
362        .filter(|name| def.instructions.iter().any(|i| i.name == **name))
363        .map(|name| format!("{}_{}", ITYPE_PREFIX, name.to_ascii_uppercase()))
364        .collect()
365}
366
367fn collect_itypes_refs(names: &[&String], def: &ValidatedDef) -> Vec<String> {
368    names
369        .iter()
370        .filter(|name| def.instructions.iter().any(|i| i.name == ***name))
371        .map(|name| format!("{}_{}", ITYPE_PREFIX, name.to_ascii_uppercase()))
372        .collect()
373}
374
375/// Capitalize the first letter.
376fn capitalize(s: &str) -> String {
377    let mut chars = s.chars();
378    match chars.next() {
379        None => String::new(),
380        Some(c) => c.to_ascii_uppercase().to_string() + chars.as_str(),
381    }
382}