1use std::collections::HashMap;
7use std::fmt::Write;
8
9use crate::backend::OperandKind;
10use crate::backend::ida::IdaOptions;
11use crate::codegen_python::{self, DisplayConfig};
12use crate::tree::DecodeNode;
13use crate::types::*;
14
15const ITYPE_PREFIX: &str = "ITYPE";
17
18pub fn generate_ida_code(
20 def: &ValidatedDef,
21 tree: &DecodeNode,
22 opts: &IdaOptions,
23 type_map: &HashMap<String, String>,
24) -> String {
25 let mut out = String::new();
26
27 let display = DisplayConfig {
28 type_prefixes: opts.display_prefixes.clone(),
29 };
30
31 emit_header(&mut out);
32 emit_helpers(&mut out, def);
33 codegen_python::emit_display_format_helpers(&mut out, def);
34 emit_map_functions(&mut out, def);
35 emit_subdecoder_functions(&mut out, def);
36 emit_itype_constants(&mut out, def);
37 emit_field_names_table(&mut out, def);
38 codegen_python::emit_decode_function(&mut out, def, tree, ITYPE_PREFIX);
39 codegen_python::emit_format_function(&mut out, def, ITYPE_PREFIX, &display);
40 emit_processor_class(&mut out, def, opts, type_map);
41
42 out
43}
44
45fn emit_header(out: &mut String) {
47 writeln!(out, "# Auto-generated by https://github.com/ioncodes/chipi").unwrap();
48 writeln!(out, "# Do not edit.").unwrap();
49 writeln!(out).unwrap();
50 writeln!(out, "import ida_idp").unwrap();
51 writeln!(out, "import ida_ua").unwrap();
52 writeln!(out, "import ida_bytes").unwrap();
53 writeln!(out, "import ida_idaapi").unwrap();
54 writeln!(out, "import ida_nalt").unwrap();
55 writeln!(out, "import ida_problems").unwrap();
56 writeln!(out, "import ida_xref").unwrap();
57 writeln!(out).unwrap();
58 writeln!(out).unwrap();
59}
60
61fn emit_helpers(out: &mut String, def: &ValidatedDef) {
63 if codegen_python::needs_sign_extend(def) {
64 codegen_python::emit_sign_extend_helper(out);
65 }
66 if codegen_python::needs_rotate_helpers(def) {
67 codegen_python::emit_rotate_helpers(out);
68 }
69}
70
71fn emit_map_functions(out: &mut String, def: &ValidatedDef) {
73 codegen_python::emit_map_functions_python(out, &def.maps);
74}
75
76fn emit_subdecoder_functions(out: &mut String, def: &ValidatedDef) {
78 for sd in &def.sub_decoders {
79 codegen_python::emit_subdecoder_python(out, sd);
80 }
81}
82
83fn emit_itype_constants(out: &mut String, def: &ValidatedDef) {
85 for (i, instr) in def.instructions.iter().enumerate() {
86 writeln!(
87 out,
88 "{}_{} = {}",
89 ITYPE_PREFIX,
90 instr.name.to_ascii_uppercase(),
91 i
92 )
93 .unwrap();
94 }
95 writeln!(out).unwrap();
96 writeln!(out).unwrap();
97}
98
99fn emit_field_names_table(out: &mut String, def: &ValidatedDef) {
102 writeln!(out, "_FIELD_NAMES = {{").unwrap();
103 for instr in &def.instructions {
104 let itype_const = format!("{}_{}", ITYPE_PREFIX, instr.name.to_ascii_uppercase());
105 let field_names: Vec<String> = instr
106 .resolved_fields
107 .iter()
108 .map(|f| format!("\"{}\"", f.name))
109 .collect();
110 writeln!(out, " {}: [{}],", itype_const, field_names.join(", ")).unwrap();
111 }
112 writeln!(out, "}}").unwrap();
113 writeln!(out).unwrap();
114 writeln!(out).unwrap();
115}
116
117fn emit_processor_class(
119 out: &mut String,
120 def: &ValidatedDef,
121 opts: &IdaOptions,
122 type_map: &HashMap<String, String>,
123) {
124 let class_name = format!("{}Processor", capitalize(&opts.processor_name));
125 let unit_bytes = def.config.width / 8;
126 let max_insn_bytes = unit_bytes
127 * def
128 .instructions
129 .iter()
130 .map(|i| i.unit_count())
131 .max()
132 .unwrap_or(1);
133
134 writeln!(out, "class {}(ida_idp.processor_t):", class_name).unwrap();
136 writeln!(out).unwrap();
137
138 writeln!(out, " id = {:#x}", opts.processor_id).unwrap();
140
141 if opts.flags.is_empty() {
143 writeln!(out, " flag = ida_idp.PR_DEFSEG32 | ida_idp.PR_USE32").unwrap();
144 } else {
145 let flag_strs: Vec<String> = opts
146 .flags
147 .iter()
148 .map(|f| format!("ida_idp.{}", f))
149 .collect();
150 writeln!(out, " flag = {}", flag_strs.join(" | ")).unwrap();
151 }
152
153 writeln!(out, " cnbits = 8").unwrap();
154 writeln!(out, " dnbits = 8").unwrap();
155 writeln!(out).unwrap();
156
157 writeln!(out, " psnames = [\"{}\"]", opts.processor_name).unwrap();
159 writeln!(
160 out,
161 " plnames = [\"{}\"]",
162 codegen_python::escape_python_str(&opts.processor_long_name)
163 )
164 .unwrap();
165 writeln!(out).unwrap();
166
167 writeln!(out, " reg_names = [").unwrap();
169 for name in &opts.register_names {
170 writeln!(out, " \"{}\",", name).unwrap();
171 }
172 writeln!(out, " ]").unwrap();
173 writeln!(out).unwrap();
174
175 let first_sreg_idx = opts
177 .register_names
178 .iter()
179 .position(|n| opts.segment_registers.first().map_or(false, |s| n == s))
180 .unwrap_or(opts.register_names.len().saturating_sub(2));
181 let last_sreg_idx = opts
182 .register_names
183 .iter()
184 .position(|n| opts.segment_registers.last().map_or(false, |s| n == s))
185 .unwrap_or(opts.register_names.len().saturating_sub(1));
186
187 writeln!(out, " reg_first_sreg = {}", first_sreg_idx).unwrap();
188 writeln!(out, " reg_last_sreg = {}", last_sreg_idx).unwrap();
189 writeln!(out, " segreg_size = 0").unwrap();
190 writeln!(out, " reg_code_sreg = {}", first_sreg_idx).unwrap();
191 writeln!(out, " reg_data_sreg = {}", last_sreg_idx).unwrap();
192 writeln!(out).unwrap();
193
194 emit_instruc_table(out, def, opts, type_map);
196
197 emit_assembler_info(out, opts);
199
200 writeln!(out, " def __init__(self):").unwrap();
202 writeln!(out, " ida_idp.processor_t.__init__(self)").unwrap();
203 writeln!(out).unwrap();
204
205 emit_ana_method(out, def, opts, type_map, max_insn_bytes);
207
208 emit_out_insn_method(out, def, opts, type_map, max_insn_bytes);
210
211 emit_out_operand_method(out);
213
214 emit_emu_method(out, def, opts);
216
217 writeln!(out).unwrap();
219 writeln!(out, "def PROCESSOR_ENTRY():").unwrap();
220 writeln!(out, " return {}()", class_name).unwrap();
221}
222
223fn emit_instruc_table(
225 out: &mut String,
226 def: &ValidatedDef,
227 opts: &IdaOptions,
228 type_map: &HashMap<String, String>,
229) {
230 writeln!(out, " instruc = [").unwrap();
231
232 for instr in &def.instructions {
233 let features = compute_instruction_features(instr, opts, type_map);
234 if features.is_empty() {
235 writeln!(
236 out,
237 " {{\"name\": \"{}\", \"feature\": 0}},",
238 instr.name
239 )
240 .unwrap();
241 } else {
242 writeln!(
243 out,
244 " {{\"name\": \"{}\", \"feature\": {}}},",
245 instr.name,
246 features.join(" | ")
247 )
248 .unwrap();
249 }
250 }
251
252 writeln!(out, " ]").unwrap();
253 writeln!(out, " instruc_start = 0").unwrap();
254 writeln!(out, " instruc_end = len(instruc)").unwrap();
255 writeln!(out).unwrap();
256}
257
258fn compute_instruction_features(
260 instr: &ValidatedInstruction,
261 opts: &IdaOptions,
262 type_map: &HashMap<String, String>,
263) -> Vec<String> {
264 let mut features = Vec::new();
265
266 if opts.flow.stops.contains(&instr.name) {
268 features.push("ida_idp.CF_STOP".to_string());
269 }
270 if opts.flow.calls.contains(&instr.name) {
271 features.push("ida_idp.CF_CALL".to_string());
272 }
273 if opts.flow.branches.contains(&instr.name)
274 || opts.flow.unconditional_branches.contains(&instr.name)
275 {
276 features.push("ida_idp.CF_JUMP".to_string());
277 }
278
279 for (i, field) in instr.resolved_fields.iter().enumerate() {
281 if i >= 6 {
282 break; }
284 let flag_num = i + 1;
285 let kind = classify_operand(&field.name, &field.resolved_type, opts, type_map);
286 match kind {
287 OperandKind::Register => {
288 if i == 0 && is_likely_destination(&field.name) {
291 features.push(format!("ida_idp.CF_CHG{}", flag_num));
292 } else {
293 features.push(format!("ida_idp.CF_USE{}", flag_num));
294 }
295 }
296 _ => {
297 features.push(format!("ida_idp.CF_USE{}", flag_num));
298 }
299 }
300 }
301
302 features
303}
304
305fn classify_operand(
307 field_name: &str,
308 resolved: &ResolvedFieldType,
309 opts: &IdaOptions,
310 type_map: &HashMap<String, String>,
311) -> OperandKind {
312 if let Some(kind) = opts.operand_types.get(field_name) {
314 return *kind;
315 }
316
317 if let Some(alias) = &resolved.alias_name {
319 if let Some(mapped) = type_map.get(alias) {
320 let lower = mapped.to_ascii_lowercase();
321 if lower.contains("reg") {
322 return OperandKind::Register;
323 }
324 }
325 }
326
327 let lower = field_name.to_ascii_lowercase();
329 if matches!(
330 lower.as_str(),
331 "rd" | "rs" | "rt" | "ra" | "rb" | "rc" | "rn" | "rm" | "rz" | "reg" | "dreg" | "sreg"
332 ) || lower.starts_with("ar") && lower.len() <= 3
333 {
334 return OperandKind::Register;
335 }
336
337 if matches!(
338 lower.as_str(),
339 "addr" | "target" | "dest" | "address" | "ea"
340 ) {
341 return OperandKind::Address;
342 }
343
344 if matches!(lower.as_str(), "mem" | "memory" | "disp" | "displacement") {
345 return OperandKind::Memory;
346 }
347
348 OperandKind::Immediate
350}
351
352fn is_likely_destination(name: &str) -> bool {
354 let lower = name.to_ascii_lowercase();
355 matches!(lower.as_str(), "rd" | "rt" | "d" | "dreg" | "dst")
356}
357
358fn emit_assembler_info(out: &mut String, opts: &IdaOptions) {
360 writeln!(out, " assembler = {{").unwrap();
361 writeln!(out, " \"flag\": ida_idp.AS_COLON | ida_idp.ASH_HEXF3 | ida_idp.ASB_BINF0 | ida_idp.ASO_OCTF1 | ida_idp.AS_NCMAS,").unwrap();
362 writeln!(out, " \"uflag\": 0,").unwrap();
363 writeln!(
364 out,
365 " \"name\": \"{} assembler\",",
366 codegen_python::escape_python_str(&opts.processor_long_name)
367 )
368 .unwrap();
369 writeln!(out, " \"origin\": \".org\",").unwrap();
370 writeln!(out, " \"end\": \".end\",").unwrap();
371 writeln!(out, " \"cmnt\": \";\",").unwrap();
372 writeln!(out, " \"ascsep\": '\"',").unwrap();
373 writeln!(out, " \"accsep\": \"'\",").unwrap();
374 writeln!(out, " \"esccodes\": \"\\\"'\",").unwrap();
375 writeln!(out, " \"a_ascii\": \".ascii\",").unwrap();
376 writeln!(out, " \"a_byte\": \".byte\",").unwrap();
377 writeln!(out, " \"a_word\": \".word\",").unwrap();
378 writeln!(out, " \"a_dword\": \".dword\",").unwrap();
379 writeln!(out, " \"a_qword\": \".quad\",").unwrap();
380 writeln!(out, " \"a_float\": \".float\",").unwrap();
381 writeln!(out, " \"a_double\": \".double\",").unwrap();
382 writeln!(out, " \"a_bss\": \"dfs %s\",").unwrap();
383 writeln!(out, " \"a_seg\": \"seg\",").unwrap();
384 writeln!(out, " \"a_curip\": \".\",").unwrap();
385 writeln!(out, " \"a_public\": \".global\",").unwrap();
386 writeln!(out, " \"a_weak\": \"weak\",").unwrap();
387 writeln!(out, " \"a_extrn\": \".extern\",").unwrap();
388 writeln!(out, " \"a_comdef\": \"\",").unwrap();
389 writeln!(out, " \"a_align\": \".align\",").unwrap();
390 writeln!(out, " \"lbrace\": \"(\",").unwrap();
391 writeln!(out, " \"rbrace\": \")\",").unwrap();
392 writeln!(out, " \"a_mod\": \"%\",").unwrap();
393 writeln!(out, " \"a_band\": \"&\",").unwrap();
394 writeln!(out, " \"a_bor\": \"|\",").unwrap();
395 writeln!(out, " \"a_xor\": \"^\",").unwrap();
396 writeln!(out, " \"a_bnot\": \"~\",").unwrap();
397 writeln!(out, " \"a_shl\": \"<<\",").unwrap();
398 writeln!(out, " \"a_shr\": \">>\",").unwrap();
399 writeln!(out, " \"a_sizeof_fmt\": \"size %s\",").unwrap();
400 writeln!(out, " }}").unwrap();
401 writeln!(out).unwrap();
402}
403
404fn emit_ana_method(
406 out: &mut String,
407 def: &ValidatedDef,
408 opts: &IdaOptions,
409 type_map: &HashMap<String, String>,
410 max_insn_bytes: u32,
411) {
412 writeln!(out, " def ev_ana_insn(self, insn):").unwrap();
413 writeln!(
414 out,
415 " data = ida_bytes.get_bytes(insn.ea, {})",
416 max_insn_bytes
417 )
418 .unwrap();
419 writeln!(out, " if data is None:").unwrap();
420 writeln!(out, " return 0").unwrap();
421 writeln!(out, " result = _decode(data)").unwrap();
422 writeln!(out, " if result is None:").unwrap();
423 writeln!(out, " return 0").unwrap();
424 writeln!(out, " itype, fields, size = result").unwrap();
425 writeln!(out, " insn.itype = itype").unwrap();
426 writeln!(out, " insn.size = size").unwrap();
427
428 writeln!(out, " # Populate operands from decoded fields").unwrap();
431 writeln!(out, " op_idx = 0").unwrap();
432 writeln!(out, " for name, value in fields.items():").unwrap();
433 writeln!(out, " if op_idx >= 6:").unwrap();
434 writeln!(out, " break").unwrap();
435 writeln!(
436 out,
437 " # Skip sub-decoder dicts (extension opcodes) - not operands"
438 )
439 .unwrap();
440 writeln!(out, " if isinstance(value, dict):").unwrap();
441 writeln!(out, " continue").unwrap();
442 writeln!(
443 out,
444 " # Ensure value is unsigned for IDA's ea_t fields"
445 )
446 .unwrap();
447 writeln!(out, " if isinstance(value, int) and value < 0:").unwrap();
448 writeln!(out, " value = value & 0xFFFFFFFF").unwrap();
449 writeln!(out, " op = insn.ops[op_idx]").unwrap();
450
451 emit_operand_classification(out, def, opts, type_map);
453
454 writeln!(out, " op_idx += 1").unwrap();
455 writeln!(out, " return insn.size").unwrap();
456 writeln!(out).unwrap();
457}
458
459fn emit_operand_classification(
461 out: &mut String,
462 def: &ValidatedDef,
463 opts: &IdaOptions,
464 type_map: &HashMap<String, String>,
465) {
466 let mut field_kinds: HashMap<String, OperandKind> = HashMap::new();
468 for instr in &def.instructions {
469 for field in &instr.resolved_fields {
470 field_kinds.entry(field.name.clone()).or_insert_with(|| {
471 classify_operand(&field.name, &field.resolved_type, opts, type_map)
472 });
473 }
474 }
475
476 let reg_fields: Vec<&String> = field_kinds
478 .iter()
479 .filter(|(_, k)| **k == OperandKind::Register)
480 .map(|(n, _)| n)
481 .collect();
482
483 let addr_fields: Vec<&String> = field_kinds
484 .iter()
485 .filter(|(_, k)| **k == OperandKind::Address)
486 .map(|(n, _)| n)
487 .collect();
488
489 let mem_fields: Vec<&String> = field_kinds
490 .iter()
491 .filter(|(_, k)| **k == OperandKind::Memory)
492 .map(|(n, _)| n)
493 .collect();
494
495 let mut first = true;
497
498 if !reg_fields.is_empty() {
499 let names: Vec<String> = reg_fields.iter().map(|n| format!("\"{}\"", n)).collect();
500 writeln!(
501 out,
502 " {}name in [{}]:{}",
503 if first { "if " } else { "elif " },
504 names.join(", "),
505 ""
506 )
507 .unwrap();
508 writeln!(out, " op.type = ida_ua.o_reg").unwrap();
509 writeln!(out, " op.reg = value").unwrap();
510 first = false;
511 }
512
513 if !addr_fields.is_empty() {
514 let names: Vec<String> = addr_fields.iter().map(|n| format!("\"{}\"", n)).collect();
515 writeln!(
516 out,
517 " {}name in [{}]:",
518 if first { "if " } else { "elif " },
519 names.join(", ")
520 )
521 .unwrap();
522 writeln!(out, " op.type = ida_ua.o_near").unwrap();
523 if opts.bytes_per_unit > 1 {
524 writeln!(
525 out,
526 " op.addr = value * {}",
527 opts.bytes_per_unit
528 )
529 .unwrap();
530 } else {
531 writeln!(out, " op.addr = value").unwrap();
532 }
533 first = false;
534 }
535
536 if !mem_fields.is_empty() {
537 let names: Vec<String> = mem_fields.iter().map(|n| format!("\"{}\"", n)).collect();
538 writeln!(
539 out,
540 " {}name in [{}]:",
541 if first { "if " } else { "elif " },
542 names.join(", ")
543 )
544 .unwrap();
545 writeln!(out, " op.type = ida_ua.o_mem").unwrap();
546 if opts.bytes_per_unit > 1 {
547 writeln!(
548 out,
549 " op.addr = value * {}",
550 opts.bytes_per_unit
551 )
552 .unwrap();
553 } else {
554 writeln!(out, " op.addr = value").unwrap();
555 }
556 first = false;
557 }
558
559 if first {
561 writeln!(out, " op.type = ida_ua.o_imm").unwrap();
563 writeln!(out, " op.value = value").unwrap();
564 } else {
565 writeln!(out, " else:").unwrap();
566 writeln!(out, " op.type = ida_ua.o_imm").unwrap();
567 writeln!(out, " op.value = value").unwrap();
568 }
569}
570
571fn emit_out_insn_method(
573 out: &mut String,
574 def: &ValidatedDef,
575 opts: &IdaOptions,
576 type_map: &HashMap<String, String>,
577 max_insn_bytes: u32,
578) {
579 writeln!(out, " def ev_out_insn(self, outctx):").unwrap();
580 writeln!(out, " insn = outctx.insn").unwrap();
581 writeln!(
582 out,
583 " # Re-decode to get full fields (including sub-decoder dicts)"
584 )
585 .unwrap();
586 writeln!(
587 out,
588 " data = ida_bytes.get_bytes(insn.ea, {})",
589 max_insn_bytes
590 )
591 .unwrap();
592 writeln!(out, " if data is None:").unwrap();
593 writeln!(out, " return False").unwrap();
594 writeln!(out, " result = _decode(data)").unwrap();
595 writeln!(out, " if result is None:").unwrap();
596 writeln!(out, " return False").unwrap();
597 writeln!(out, " _, fields, _ = result").unwrap();
598
599 if opts.bytes_per_unit > 1 {
601 let mut addr_field_names: Vec<String> = Vec::new();
603 for instr in &def.instructions {
604 for field in &instr.resolved_fields {
605 let kind = classify_operand(&field.name, &field.resolved_type, opts, type_map);
606 if kind == OperandKind::Address && !addr_field_names.contains(&field.name) {
607 addr_field_names.push(field.name.clone());
608 }
609 }
610 }
611 if !addr_field_names.is_empty() {
612 let names: Vec<String> = addr_field_names
613 .iter()
614 .map(|n| format!("\"{}\"", n))
615 .collect();
616 writeln!(out, " _ADDR_FIELDS = {{{}}}", names.join(", ")).unwrap();
617 writeln!(
618 out,
619 " fields = {{k: (v * {} if k in _ADDR_FIELDS else v) for k, v in fields.items()}}",
620 opts.bytes_per_unit
621 )
622 .unwrap();
623 }
624 }
625
626 writeln!(
627 out,
628 " mnemonic, operands = _format_insn(insn.itype, fields)"
629 )
630 .unwrap();
631 writeln!(out, " outctx.out_custom_mnem(mnemonic)").unwrap();
632 writeln!(out, " if operands:").unwrap();
633 writeln!(out, " outctx.out_line(\" \" + operands)").unwrap();
634 writeln!(out, " outctx.flush_outbuf()").unwrap();
635 writeln!(out, " return True").unwrap();
636 writeln!(out).unwrap();
637}
638
639fn emit_out_operand_method(out: &mut String) {
641 writeln!(out, " def ev_out_operand(self, outctx, op):").unwrap();
642 writeln!(out, " return True").unwrap();
643 writeln!(out).unwrap();
644}
645
646fn emit_emu_method(out: &mut String, _def: &ValidatedDef, _opts: &IdaOptions) {
649 writeln!(out, " def ev_emu_insn(self, insn):").unwrap();
650 writeln!(out, " feature = insn.get_canon_feature()").unwrap();
651 writeln!(out, " if feature & ida_idp.CF_JUMP:").unwrap();
652 writeln!(
653 out,
654 " ida_problems.remember_problem(ida_problems.PR_JUMP, insn.ea)"
655 )
656 .unwrap();
657 writeln!(out, " if feature & ida_idp.CF_STOP == 0:").unwrap();
658 writeln!(
659 out,
660 " ida_xref.add_cref(insn.ea, insn.ea + insn.size, ida_xref.fl_F)"
661 )
662 .unwrap();
663 writeln!(out, " # Add xrefs for address operands").unwrap();
664 writeln!(out, " for i in range(6):").unwrap();
665 writeln!(out, " op = insn.ops[i]").unwrap();
666 writeln!(out, " if op.type == ida_ua.o_void:").unwrap();
667 writeln!(out, " break").unwrap();
668 writeln!(out, " if op.type == ida_ua.o_near:").unwrap();
669 writeln!(out, " if feature & ida_idp.CF_CALL:").unwrap();
670 writeln!(
671 out,
672 " insn.add_cref(op.addr, op.offb, ida_xref.fl_CN)"
673 )
674 .unwrap();
675 writeln!(out, " else:").unwrap();
676 writeln!(
677 out,
678 " insn.add_cref(op.addr, op.offb, ida_xref.fl_JN)"
679 )
680 .unwrap();
681 writeln!(out, " elif op.type == ida_ua.o_mem:").unwrap();
682 writeln!(
683 out,
684 " insn.add_dref(op.addr, op.offb, ida_xref.dr_R)"
685 )
686 .unwrap();
687 writeln!(out, " return True").unwrap();
688 writeln!(out).unwrap();
689}
690
691fn capitalize(s: &str) -> String {
693 let mut chars = s.chars();
694 match chars.next() {
695 None => String::new(),
696 Some(c) => c.to_ascii_uppercase().to_string() + chars.as_str(),
697 }
698}