1use std::collections::HashMap;
7use std::fmt::Write;
8
9use crate::backend::ida::IdaOptions;
10use crate::backend::OperandKind;
11use crate::codegen_python::{self, DisplayConfig};
12use crate::tree::DecodeNode;
13use crate::types::*;
14
15const ITYPE_PREFIX: &str = "ITYPE";
17
18pub fn generate_ida_code(
20 def: &ValidatedDef,
21 tree: &DecodeNode,
22 opts: &IdaOptions,
23 type_map: &HashMap<String, String>,
24) -> String {
25 let mut out = String::new();
26
27 let display = DisplayConfig {
28 type_prefixes: opts.display_prefixes.clone(),
29 };
30
31 emit_header(&mut out);
32 emit_helpers(&mut out, def);
33 codegen_python::emit_display_format_helpers(&mut out, def);
34 emit_map_functions(&mut out, def);
35 emit_subdecoder_functions(&mut out, def);
36 emit_itype_constants(&mut out, def);
37 emit_field_names_table(&mut out, def);
38 codegen_python::emit_decode_function(&mut out, def, tree, ITYPE_PREFIX);
39 codegen_python::emit_format_function(&mut out, def, ITYPE_PREFIX, &display);
40 emit_processor_class(&mut out, def, opts, type_map);
41
42 out
43}
44
45fn emit_header(out: &mut String) {
47 writeln!(out, "# Auto-generated by https://github.com/ioncodes/chipi").unwrap();
48 writeln!(out, "# Do not edit.").unwrap();
49 writeln!(out).unwrap();
50 writeln!(out, "import ida_idp").unwrap();
51 writeln!(out, "import ida_ua").unwrap();
52 writeln!(out, "import ida_bytes").unwrap();
53 writeln!(out, "import ida_idaapi").unwrap();
54 writeln!(out, "import ida_nalt").unwrap();
55 writeln!(out, "import ida_problems").unwrap();
56 writeln!(out, "import ida_xref").unwrap();
57 writeln!(out).unwrap();
58 writeln!(out).unwrap();
59}
60
61fn emit_helpers(out: &mut String, def: &ValidatedDef) {
63 if codegen_python::needs_sign_extend(def) {
64 codegen_python::emit_sign_extend_helper(out);
65 }
66 if codegen_python::needs_rotate_helpers(def) {
67 codegen_python::emit_rotate_helpers(out);
68 }
69}
70
71fn emit_map_functions(out: &mut String, def: &ValidatedDef) {
73 codegen_python::emit_map_functions_python(out, &def.maps);
74}
75
76fn emit_subdecoder_functions(out: &mut String, def: &ValidatedDef) {
78 for sd in &def.sub_decoders {
79 codegen_python::emit_subdecoder_python(out, sd);
80 }
81}
82
83fn emit_itype_constants(out: &mut String, def: &ValidatedDef) {
85 for (i, instr) in def.instructions.iter().enumerate() {
86 writeln!(
87 out,
88 "{}_{} = {}",
89 ITYPE_PREFIX,
90 instr.name.to_ascii_uppercase(),
91 i
92 )
93 .unwrap();
94 }
95 writeln!(out).unwrap();
96 writeln!(out).unwrap();
97}
98
99fn emit_field_names_table(out: &mut String, def: &ValidatedDef) {
102 writeln!(out, "_FIELD_NAMES = {{").unwrap();
103 for instr in &def.instructions {
104 let itype_const = format!("{}_{}", ITYPE_PREFIX, instr.name.to_ascii_uppercase());
105 let field_names: Vec<String> = instr
106 .resolved_fields
107 .iter()
108 .map(|f| format!("\"{}\"", f.name))
109 .collect();
110 writeln!(out, " {}: [{}],", itype_const, field_names.join(", ")).unwrap();
111 }
112 writeln!(out, "}}").unwrap();
113 writeln!(out).unwrap();
114 writeln!(out).unwrap();
115}
116
117fn emit_processor_class(
119 out: &mut String,
120 def: &ValidatedDef,
121 opts: &IdaOptions,
122 type_map: &HashMap<String, String>,
123) {
124 let class_name = format!("{}Processor", capitalize(&opts.processor_name));
125 let unit_bytes = def.config.width / 8;
126 let max_insn_bytes = unit_bytes
127 * def
128 .instructions
129 .iter()
130 .map(|i| i.unit_count())
131 .max()
132 .unwrap_or(1);
133
134 writeln!(out, "class {}(ida_idp.processor_t):", class_name).unwrap();
136 writeln!(out).unwrap();
137
138 writeln!(out, " id = {:#x}", opts.processor_id).unwrap();
140
141 if opts.flags.is_empty() {
143 writeln!(out, " flag = ida_idp.PR_DEFSEG32 | ida_idp.PR_USE32").unwrap();
144 } else {
145 let flag_strs: Vec<String> = opts
146 .flags
147 .iter()
148 .map(|f| format!("ida_idp.{}", f))
149 .collect();
150 writeln!(out, " flag = {}", flag_strs.join(" | ")).unwrap();
151 }
152
153 writeln!(out, " cnbits = 8").unwrap();
154 writeln!(out, " dnbits = 8").unwrap();
155 writeln!(out).unwrap();
156
157 writeln!(out, " psnames = [\"{}\"]", opts.processor_name).unwrap();
159 writeln!(
160 out,
161 " plnames = [\"{}\"]",
162 codegen_python::escape_python_str(&opts.processor_long_name)
163 )
164 .unwrap();
165 writeln!(out).unwrap();
166
167 writeln!(out, " reg_names = [").unwrap();
169 for name in &opts.register_names {
170 writeln!(out, " \"{}\",", name).unwrap();
171 }
172 writeln!(out, " ]").unwrap();
173 writeln!(out).unwrap();
174
175 let first_sreg_idx = opts
177 .register_names
178 .iter()
179 .position(|n| opts.segment_registers.first().map_or(false, |s| n == s))
180 .unwrap_or(opts.register_names.len().saturating_sub(2));
181 let last_sreg_idx = opts
182 .register_names
183 .iter()
184 .position(|n| opts.segment_registers.last().map_or(false, |s| n == s))
185 .unwrap_or(opts.register_names.len().saturating_sub(1));
186
187 writeln!(out, " reg_first_sreg = {}", first_sreg_idx).unwrap();
188 writeln!(out, " reg_last_sreg = {}", last_sreg_idx).unwrap();
189 writeln!(out, " segreg_size = 0").unwrap();
190 writeln!(out, " reg_code_sreg = {}", first_sreg_idx).unwrap();
191 writeln!(out, " reg_data_sreg = {}", last_sreg_idx).unwrap();
192 writeln!(out).unwrap();
193
194 emit_instruc_table(out, def, opts, type_map);
196
197 emit_assembler_info(out, opts);
199
200 writeln!(out, " def __init__(self):").unwrap();
202 writeln!(out, " ida_idp.processor_t.__init__(self)").unwrap();
203 writeln!(out).unwrap();
204
205 emit_ana_method(out, def, opts, type_map, max_insn_bytes);
207
208 emit_out_insn_method(out, def, opts, type_map, max_insn_bytes);
210
211 emit_out_operand_method(out);
213
214 emit_emu_method(out, def, opts);
216
217 writeln!(out).unwrap();
219 writeln!(out, "def PROCESSOR_ENTRY():").unwrap();
220 writeln!(out, " return {}()", class_name).unwrap();
221}
222
223fn emit_instruc_table(
225 out: &mut String,
226 def: &ValidatedDef,
227 opts: &IdaOptions,
228 type_map: &HashMap<String, String>,
229) {
230 writeln!(out, " instruc = [").unwrap();
231
232 for instr in &def.instructions {
233 let features = compute_instruction_features(instr, opts, type_map);
234 if features.is_empty() {
235 writeln!(
236 out,
237 " {{\"name\": \"{}\", \"feature\": 0}},",
238 instr.name
239 )
240 .unwrap();
241 } else {
242 writeln!(
243 out,
244 " {{\"name\": \"{}\", \"feature\": {}}},",
245 instr.name,
246 features.join(" | ")
247 )
248 .unwrap();
249 }
250 }
251
252 writeln!(out, " ]").unwrap();
253 writeln!(out, " instruc_start = 0").unwrap();
254 writeln!(out, " instruc_end = len(instruc)").unwrap();
255 writeln!(out).unwrap();
256}
257
258fn compute_instruction_features(
260 instr: &ValidatedInstruction,
261 opts: &IdaOptions,
262 type_map: &HashMap<String, String>,
263) -> Vec<String> {
264 let mut features = Vec::new();
265
266 if opts.flow.stops.contains(&instr.name) {
268 features.push("ida_idp.CF_STOP".to_string());
269 }
270 if opts.flow.calls.contains(&instr.name) {
271 features.push("ida_idp.CF_CALL".to_string());
272 }
273 if opts.flow.branches.contains(&instr.name)
274 || opts.flow.unconditional_branches.contains(&instr.name)
275 {
276 features.push("ida_idp.CF_JUMP".to_string());
277 }
278
279 for (i, field) in instr.resolved_fields.iter().enumerate() {
281 if i >= 6 {
282 break; }
284 let flag_num = i + 1;
285 let kind = classify_operand(&field.name, &field.resolved_type, opts, type_map);
286 match kind {
287 OperandKind::Register => {
288 if i == 0 && is_likely_destination(&field.name) {
291 features.push(format!("ida_idp.CF_CHG{}", flag_num));
292 } else {
293 features.push(format!("ida_idp.CF_USE{}", flag_num));
294 }
295 }
296 _ => {
297 features.push(format!("ida_idp.CF_USE{}", flag_num));
298 }
299 }
300 }
301
302 features
303}
304
305fn classify_operand(
307 field_name: &str,
308 resolved: &ResolvedFieldType,
309 opts: &IdaOptions,
310 type_map: &HashMap<String, String>,
311) -> OperandKind {
312 if let Some(kind) = opts.operand_types.get(field_name) {
314 return *kind;
315 }
316
317 if let Some(alias) = &resolved.alias_name {
319 if let Some(mapped) = type_map.get(alias) {
320 let lower = mapped.to_ascii_lowercase();
321 if lower.contains("reg") {
322 return OperandKind::Register;
323 }
324 }
325 }
326
327 let lower = field_name.to_ascii_lowercase();
329 if matches!(
330 lower.as_str(),
331 "rd" | "rs"
332 | "rt"
333 | "ra"
334 | "rb"
335 | "rc"
336 | "rn"
337 | "rm"
338 | "rz"
339 | "reg"
340 | "dreg"
341 | "sreg"
342 ) || lower.starts_with("ar")
343 && lower.len() <= 3
344 {
345 return OperandKind::Register;
346 }
347
348 if matches!(
349 lower.as_str(),
350 "addr" | "target" | "dest" | "address" | "ea"
351 ) {
352 return OperandKind::Address;
353 }
354
355 if matches!(
356 lower.as_str(),
357 "mem" | "memory" | "disp" | "displacement"
358 ) {
359 return OperandKind::Memory;
360 }
361
362 OperandKind::Immediate
364}
365
366fn is_likely_destination(name: &str) -> bool {
368 let lower = name.to_ascii_lowercase();
369 matches!(lower.as_str(), "rd" | "rt" | "d" | "dreg" | "dst")
370}
371
372fn emit_assembler_info(out: &mut String, opts: &IdaOptions) {
374 writeln!(out, " assembler = {{").unwrap();
375 writeln!(out, " \"flag\": ida_idp.AS_COLON | ida_idp.ASH_HEXF3 | ida_idp.ASB_BINF0 | ida_idp.ASO_OCTF1 | ida_idp.AS_NCMAS,").unwrap();
376 writeln!(out, " \"uflag\": 0,").unwrap();
377 writeln!(
378 out,
379 " \"name\": \"{} assembler\",",
380 codegen_python::escape_python_str(&opts.processor_long_name)
381 )
382 .unwrap();
383 writeln!(out, " \"origin\": \".org\",").unwrap();
384 writeln!(out, " \"end\": \".end\",").unwrap();
385 writeln!(out, " \"cmnt\": \";\",").unwrap();
386 writeln!(out, " \"ascsep\": '\"',").unwrap();
387 writeln!(out, " \"accsep\": \"'\",").unwrap();
388 writeln!(out, " \"esccodes\": \"\\\"'\",").unwrap();
389 writeln!(out, " \"a_ascii\": \".ascii\",").unwrap();
390 writeln!(out, " \"a_byte\": \".byte\",").unwrap();
391 writeln!(out, " \"a_word\": \".word\",").unwrap();
392 writeln!(out, " \"a_dword\": \".dword\",").unwrap();
393 writeln!(out, " \"a_qword\": \".quad\",").unwrap();
394 writeln!(out, " \"a_float\": \".float\",").unwrap();
395 writeln!(out, " \"a_double\": \".double\",").unwrap();
396 writeln!(out, " \"a_bss\": \"dfs %s\",").unwrap();
397 writeln!(out, " \"a_seg\": \"seg\",").unwrap();
398 writeln!(out, " \"a_curip\": \".\",").unwrap();
399 writeln!(out, " \"a_public\": \".global\",").unwrap();
400 writeln!(out, " \"a_weak\": \"weak\",").unwrap();
401 writeln!(out, " \"a_extrn\": \".extern\",").unwrap();
402 writeln!(out, " \"a_comdef\": \"\",").unwrap();
403 writeln!(out, " \"a_align\": \".align\",").unwrap();
404 writeln!(out, " \"lbrace\": \"(\",").unwrap();
405 writeln!(out, " \"rbrace\": \")\",").unwrap();
406 writeln!(out, " \"a_mod\": \"%\",").unwrap();
407 writeln!(out, " \"a_band\": \"&\",").unwrap();
408 writeln!(out, " \"a_bor\": \"|\",").unwrap();
409 writeln!(out, " \"a_xor\": \"^\",").unwrap();
410 writeln!(out, " \"a_bnot\": \"~\",").unwrap();
411 writeln!(out, " \"a_shl\": \"<<\",").unwrap();
412 writeln!(out, " \"a_shr\": \">>\",").unwrap();
413 writeln!(out, " \"a_sizeof_fmt\": \"size %s\",").unwrap();
414 writeln!(out, " }}").unwrap();
415 writeln!(out).unwrap();
416}
417
418fn emit_ana_method(
420 out: &mut String,
421 def: &ValidatedDef,
422 opts: &IdaOptions,
423 type_map: &HashMap<String, String>,
424 max_insn_bytes: u32,
425) {
426 writeln!(out, " def ev_ana_insn(self, insn):").unwrap();
427 writeln!(
428 out,
429 " data = ida_bytes.get_bytes(insn.ea, {})",
430 max_insn_bytes
431 )
432 .unwrap();
433 writeln!(out, " if data is None:").unwrap();
434 writeln!(out, " return 0").unwrap();
435 writeln!(out, " result = _decode(data)").unwrap();
436 writeln!(out, " if result is None:").unwrap();
437 writeln!(out, " return 0").unwrap();
438 writeln!(out, " itype, fields, size = result").unwrap();
439 writeln!(out, " insn.itype = itype").unwrap();
440 writeln!(out, " insn.size = size").unwrap();
441
442 writeln!(out, " # Populate operands from decoded fields").unwrap();
445 writeln!(out, " op_idx = 0").unwrap();
446 writeln!(out, " for name, value in fields.items():").unwrap();
447 writeln!(out, " if op_idx >= 6:").unwrap();
448 writeln!(out, " break").unwrap();
449 writeln!(out, " # Skip sub-decoder dicts (extension opcodes) - not operands").unwrap();
450 writeln!(out, " if isinstance(value, dict):").unwrap();
451 writeln!(out, " continue").unwrap();
452 writeln!(out, " # Ensure value is unsigned for IDA's ea_t fields").unwrap();
453 writeln!(out, " if isinstance(value, int) and value < 0:").unwrap();
454 writeln!(out, " value = value & 0xFFFFFFFF").unwrap();
455 writeln!(out, " op = insn.ops[op_idx]").unwrap();
456
457 emit_operand_classification(out, def, opts, type_map);
459
460 writeln!(out, " op_idx += 1").unwrap();
461 writeln!(out, " return insn.size").unwrap();
462 writeln!(out).unwrap();
463}
464
465fn emit_operand_classification(
467 out: &mut String,
468 def: &ValidatedDef,
469 opts: &IdaOptions,
470 type_map: &HashMap<String, String>,
471) {
472 let mut field_kinds: HashMap<String, OperandKind> = HashMap::new();
474 for instr in &def.instructions {
475 for field in &instr.resolved_fields {
476 field_kinds
477 .entry(field.name.clone())
478 .or_insert_with(|| classify_operand(&field.name, &field.resolved_type, opts, type_map));
479 }
480 }
481
482 let reg_fields: Vec<&String> = field_kinds
484 .iter()
485 .filter(|(_, k)| **k == OperandKind::Register)
486 .map(|(n, _)| n)
487 .collect();
488
489 let addr_fields: Vec<&String> = field_kinds
490 .iter()
491 .filter(|(_, k)| **k == OperandKind::Address)
492 .map(|(n, _)| n)
493 .collect();
494
495 let mem_fields: Vec<&String> = field_kinds
496 .iter()
497 .filter(|(_, k)| **k == OperandKind::Memory)
498 .map(|(n, _)| n)
499 .collect();
500
501 let mut first = true;
503
504 if !reg_fields.is_empty() {
505 let names: Vec<String> = reg_fields.iter().map(|n| format!("\"{}\"", n)).collect();
506 writeln!(
507 out,
508 " {}name in [{}]:{}",
509 if first { "if " } else { "elif " },
510 names.join(", "),
511 ""
512 )
513 .unwrap();
514 writeln!(out, " op.type = ida_ua.o_reg").unwrap();
515 writeln!(out, " op.reg = value").unwrap();
516 first = false;
517 }
518
519 if !addr_fields.is_empty() {
520 let names: Vec<String> = addr_fields.iter().map(|n| format!("\"{}\"", n)).collect();
521 writeln!(
522 out,
523 " {}name in [{}]:",
524 if first { "if " } else { "elif " },
525 names.join(", ")
526 )
527 .unwrap();
528 writeln!(out, " op.type = ida_ua.o_near").unwrap();
529 if opts.bytes_per_unit > 1 {
530 writeln!(
531 out,
532 " op.addr = value * {}",
533 opts.bytes_per_unit
534 )
535 .unwrap();
536 } else {
537 writeln!(out, " op.addr = value").unwrap();
538 }
539 first = false;
540 }
541
542 if !mem_fields.is_empty() {
543 let names: Vec<String> = mem_fields.iter().map(|n| format!("\"{}\"", n)).collect();
544 writeln!(
545 out,
546 " {}name in [{}]:",
547 if first { "if " } else { "elif " },
548 names.join(", ")
549 )
550 .unwrap();
551 writeln!(out, " op.type = ida_ua.o_mem").unwrap();
552 if opts.bytes_per_unit > 1 {
553 writeln!(
554 out,
555 " op.addr = value * {}",
556 opts.bytes_per_unit
557 )
558 .unwrap();
559 } else {
560 writeln!(out, " op.addr = value").unwrap();
561 }
562 first = false;
563 }
564
565 if first {
567 writeln!(out, " op.type = ida_ua.o_imm").unwrap();
569 writeln!(out, " op.value = value").unwrap();
570 } else {
571 writeln!(out, " else:").unwrap();
572 writeln!(out, " op.type = ida_ua.o_imm").unwrap();
573 writeln!(out, " op.value = value").unwrap();
574 }
575}
576
577fn emit_out_insn_method(
579 out: &mut String,
580 def: &ValidatedDef,
581 opts: &IdaOptions,
582 type_map: &HashMap<String, String>,
583 max_insn_bytes: u32,
584) {
585 writeln!(out, " def ev_out_insn(self, outctx):").unwrap();
586 writeln!(out, " insn = outctx.insn").unwrap();
587 writeln!(out, " # Re-decode to get full fields (including sub-decoder dicts)").unwrap();
588 writeln!(
589 out,
590 " data = ida_bytes.get_bytes(insn.ea, {})",
591 max_insn_bytes
592 )
593 .unwrap();
594 writeln!(out, " if data is None:").unwrap();
595 writeln!(out, " return False").unwrap();
596 writeln!(out, " result = _decode(data)").unwrap();
597 writeln!(out, " if result is None:").unwrap();
598 writeln!(out, " return False").unwrap();
599 writeln!(out, " _, fields, _ = result").unwrap();
600
601 if opts.bytes_per_unit > 1 {
603 let mut addr_field_names: Vec<String> = Vec::new();
605 for instr in &def.instructions {
606 for field in &instr.resolved_fields {
607 let kind = classify_operand(&field.name, &field.resolved_type, opts, type_map);
608 if kind == OperandKind::Address && !addr_field_names.contains(&field.name) {
609 addr_field_names.push(field.name.clone());
610 }
611 }
612 }
613 if !addr_field_names.is_empty() {
614 let names: Vec<String> = addr_field_names
615 .iter()
616 .map(|n| format!("\"{}\"", n))
617 .collect();
618 writeln!(
619 out,
620 " _ADDR_FIELDS = {{{}}}",
621 names.join(", ")
622 )
623 .unwrap();
624 writeln!(
625 out,
626 " fields = {{k: (v * {} if k in _ADDR_FIELDS else v) for k, v in fields.items()}}",
627 opts.bytes_per_unit
628 )
629 .unwrap();
630 }
631 }
632
633 writeln!(out, " mnemonic, operands = _format_insn(insn.itype, fields)").unwrap();
634 writeln!(out, " outctx.out_custom_mnem(mnemonic)").unwrap();
635 writeln!(out, " if operands:").unwrap();
636 writeln!(out, " outctx.out_line(\" \" + operands)").unwrap();
637 writeln!(out, " outctx.flush_outbuf()").unwrap();
638 writeln!(out, " return True").unwrap();
639 writeln!(out).unwrap();
640}
641
642fn emit_out_operand_method(out: &mut String) {
644 writeln!(out, " def ev_out_operand(self, outctx, op):").unwrap();
645 writeln!(out, " return True").unwrap();
646 writeln!(out).unwrap();
647}
648
649fn emit_emu_method(
652 out: &mut String,
653 _def: &ValidatedDef,
654 _opts: &IdaOptions,
655) {
656 writeln!(out, " def ev_emu_insn(self, insn):").unwrap();
657 writeln!(out, " feature = insn.get_canon_feature()").unwrap();
658 writeln!(out, " if feature & ida_idp.CF_JUMP:").unwrap();
659 writeln!(out, " ida_problems.remember_problem(ida_problems.PR_JUMP, insn.ea)").unwrap();
660 writeln!(out, " if feature & ida_idp.CF_STOP == 0:").unwrap();
661 writeln!(out, " ida_xref.add_cref(insn.ea, insn.ea + insn.size, ida_xref.fl_F)").unwrap();
662 writeln!(out, " # Add xrefs for address operands").unwrap();
663 writeln!(out, " for i in range(6):").unwrap();
664 writeln!(out, " op = insn.ops[i]").unwrap();
665 writeln!(out, " if op.type == ida_ua.o_void:").unwrap();
666 writeln!(out, " break").unwrap();
667 writeln!(out, " if op.type == ida_ua.o_near:").unwrap();
668 writeln!(out, " if feature & ida_idp.CF_CALL:").unwrap();
669 writeln!(out, " insn.add_cref(op.addr, op.offb, ida_xref.fl_CN)").unwrap();
670 writeln!(out, " else:").unwrap();
671 writeln!(out, " insn.add_cref(op.addr, op.offb, ida_xref.fl_JN)").unwrap();
672 writeln!(out, " elif op.type == ida_ua.o_mem:").unwrap();
673 writeln!(out, " insn.add_dref(op.addr, op.offb, ida_xref.dr_R)").unwrap();
674 writeln!(out, " return True").unwrap();
675 writeln!(out).unwrap();
676}
677
678fn capitalize(s: &str) -> String {
680 let mut chars = s.chars();
681 match chars.next() {
682 None => String::new(),
683 Some(c) => c.to_ascii_uppercase().to_string() + chars.as_str(),
684 }
685}