Skip to main content

chipi_core/
codegen_python.rs

1//! Shared Python code generation helpers.
2//!
3//! Provides reusable functions for emitting Python code from the chipi IR.
4//! Used by the IDA backend (and future Binary Ninja backend).
5
6use std::collections::HashMap;
7use std::fmt::Write;
8
9use crate::tree::DecodeNode;
10use crate::types::*;
11
12/// Configuration for display formatting in generated Python code.
13#[derive(Debug, Clone, Default)]
14pub struct DisplayConfig {
15    /// Maps type alias names to display prefixes (e.g., "gpr" -> "r", "fpr" -> "f").
16    pub type_prefixes: HashMap<String, String>,
17}
18
19/// Emit `_fmt_signed_hex` and `_fmt_hex` helper functions if needed.
20pub fn emit_display_format_helpers(out: &mut String, def: &ValidatedDef) {
21    if needs_display_format(def) {
22        writeln!(out, "def _fmt_signed_hex(v):").unwrap();
23        writeln!(out, "    if v < 0:").unwrap();
24        writeln!(out, "        return f\"-0x{{-v:x}}\"").unwrap();
25        writeln!(out, "    return f\"0x{{v:x}}\"").unwrap();
26        writeln!(out).unwrap();
27        writeln!(out).unwrap();
28        writeln!(out, "def _fmt_hex(v):").unwrap();
29        writeln!(out, "    return f\"0x{{v:x}}\"").unwrap();
30        writeln!(out).unwrap();
31        writeln!(out).unwrap();
32    }
33}
34
35/// Check if any field uses display(hex) or display(signed_hex).
36fn needs_display_format(def: &ValidatedDef) -> bool {
37    for instr in &def.instructions {
38        for field in &instr.resolved_fields {
39            if field.resolved_type.display_format.is_some() {
40                return true;
41            }
42        }
43    }
44    for sd in &def.sub_decoders {
45        for instr in &sd.instructions {
46            for field in &instr.resolved_fields {
47                if field.resolved_type.display_format.is_some() {
48                    return true;
49                }
50            }
51        }
52    }
53    false
54}
55
56/// Emit a `_sign_extend(val, bits)` Python helper function.
57pub fn emit_sign_extend_helper(out: &mut String) {
58    writeln!(out, "def _sign_extend(val, bits):").unwrap();
59    writeln!(out, "    mask = 1 << (bits - 1)").unwrap();
60    writeln!(out, "    return (val ^ mask) - mask").unwrap();
61    writeln!(out).unwrap();
62    writeln!(out).unwrap();
63}
64
65/// Emit a `_rotate_right(val, amt, width)` Python helper function.
66pub fn emit_rotate_helpers(out: &mut String) {
67    writeln!(out, "def _rotate_right(val, amt, width=32):").unwrap();
68    writeln!(out, "    amt = amt % width").unwrap();
69    writeln!(out, "    mask = (1 << width) - 1").unwrap();
70    writeln!(out, "    val = val & mask").unwrap();
71    writeln!(out, "    return ((val >> amt) | (val << (width - amt))) & mask").unwrap();
72    writeln!(out).unwrap();
73    writeln!(out).unwrap();
74    writeln!(out, "def _rotate_left(val, amt, width=32):").unwrap();
75    writeln!(out, "    amt = amt % width").unwrap();
76    writeln!(out, "    mask = (1 << width) - 1").unwrap();
77    writeln!(out, "    val = val & mask").unwrap();
78    writeln!(out, "    return ((val << amt) | (val >> (width - amt))) & mask").unwrap();
79    writeln!(out).unwrap();
80    writeln!(out).unwrap();
81}
82
83/// Generate Python dict-based map (lookup) functions.
84pub fn emit_map_functions_python(out: &mut String, maps: &[MapDef]) {
85    for map_def in maps {
86        let params: Vec<&str> = map_def.params.iter().map(|s| s.as_str()).collect();
87        writeln!(out, "def {}({}):", map_def.name, params.join(", ")).unwrap();
88
89        // Find the wildcard/default entry
90        let default_entry = map_def.entries.iter().find(|e| {
91            e.keys.len() == 1 && e.keys[0] == MapKey::Wildcard
92        });
93
94        // Build the lookup dict
95        writeln!(out, "    _MAP = {{").unwrap();
96        for entry in &map_def.entries {
97            if entry.keys.iter().any(|k| *k == MapKey::Wildcard) {
98                continue; // skip wildcard, it's the default
99            }
100            let key = if entry.keys.len() == 1 {
101                format_map_key_python(&entry.keys[0])
102            } else {
103                let keys: Vec<String> = entry.keys.iter().map(format_map_key_python).collect();
104                format!("({})", keys.join(", "))
105            };
106            let value = format_pieces_to_python_str_simple(&entry.output);
107            writeln!(out, "        {}: {},", key, value).unwrap();
108        }
109        writeln!(out, "    }}").unwrap();
110
111        let lookup_key = if params.len() == 1 {
112            params[0].to_string()
113        } else {
114            format!("({})", params.join(", "))
115        };
116
117        let default_val = if let Some(entry) = default_entry {
118            format_pieces_to_python_str_simple(&entry.output)
119        } else {
120            "\"???\"".to_string()
121        };
122
123        writeln!(out, "    return _MAP.get({}, {})", lookup_key, default_val).unwrap();
124        writeln!(out).unwrap();
125        writeln!(out).unwrap();
126    }
127}
128
129fn format_map_key_python(key: &MapKey) -> String {
130    match key {
131        MapKey::Value(v) => format!("{}", v),
132        MapKey::Wildcard => "None".to_string(),
133    }
134}
135
136/// Convert format pieces to a simple Python string expression (no IDA-specific formatting).
137/// Returns a Python expression that evaluates to a string.
138fn format_pieces_to_python_str_simple(pieces: &[FormatPiece]) -> String {
139    if pieces.is_empty() {
140        return "\"\"".to_string();
141    }
142
143    // Check if all pieces are literals
144    let all_literal = pieces.iter().all(|p| matches!(p, FormatPiece::Literal(_)));
145    if all_literal {
146        let mut s = String::new();
147        for piece in pieces {
148            if let FormatPiece::Literal(lit) = piece {
149                s.push_str(lit);
150            }
151        }
152        return format!("\"{}\"", escape_python_str(&s));
153    }
154
155    // Build f-string
156    let mut parts = String::new();
157    parts.push_str("f\"");
158    for piece in pieces {
159        match piece {
160            FormatPiece::Literal(lit) => {
161                parts.push_str(&escape_python_fstr(lit));
162            }
163            FormatPiece::FieldRef { expr, spec } => {
164                parts.push('{');
165                parts.push_str(&expr_to_python_simple(expr));
166                if let Some(spec) = spec {
167                    parts.push(':');
168                    parts.push_str(spec);
169                }
170                parts.push('}');
171            }
172        }
173    }
174    parts.push('"');
175    parts
176}
177
178fn expr_to_python_simple(expr: &FormatExpr) -> String {
179    match expr {
180        FormatExpr::Field(name) => name.clone(),
181        FormatExpr::IntLiteral(val) => format!("{}", val),
182        FormatExpr::Arithmetic { left, op, right } => {
183            let l = expr_to_python_simple(left);
184            let r = expr_to_python_simple(right);
185            let op_str = arith_op_str(op);
186            format!("({} {} {})", l, op_str, r)
187        }
188        FormatExpr::MapCall { map_name, args } => {
189            let arg_strs: Vec<String> = args.iter().map(|a| expr_to_python_simple(a)).collect();
190            format!("{}({})", map_name, arg_strs.join(", "))
191        }
192        FormatExpr::Ternary {
193            field,
194            if_nonzero,
195            if_zero,
196        } => {
197            let else_val = if_zero.as_deref().unwrap_or("");
198            format!(
199                "(\"{}\" if {} else \"{}\")",
200                escape_python_str(if_nonzero),
201                field,
202                escape_python_str(else_val)
203            )
204        }
205        FormatExpr::BuiltinCall { func, args } => {
206            let arg_strs: Vec<String> = args.iter().map(|a| expr_to_python_simple(a)).collect();
207            match func {
208                BuiltinFunc::RotateRight => {
209                    format!(
210                        "_rotate_right({}, {})",
211                        arg_strs.first().map(|s| s.as_str()).unwrap_or("0"),
212                        arg_strs.get(1).map(|s| s.as_str()).unwrap_or("0")
213                    )
214                }
215                BuiltinFunc::RotateLeft => {
216                    format!(
217                        "_rotate_left({}, {})",
218                        arg_strs.first().map(|s| s.as_str()).unwrap_or("0"),
219                        arg_strs.get(1).map(|s| s.as_str()).unwrap_or("0")
220                    )
221                }
222            }
223        }
224        FormatExpr::SubDecoderAccess { field, fragment } => {
225            format!("{}[\"{}\"]", field, fragment)
226        }
227    }
228}
229
230fn arith_op_str(op: &ArithOp) -> &'static str {
231    match op {
232        ArithOp::Add => "+",
233        ArithOp::Sub => "-",
234        ArithOp::Mul => "*",
235        ArithOp::Div => "//",
236        ArithOp::Mod => "%",
237    }
238}
239
240/// Escape a string for use inside Python string literals.
241pub fn escape_python_str(s: &str) -> String {
242    s.replace('\\', "\\\\").replace('"', "\\\"")
243}
244
245/// Escape a string for use inside Python f-string literals.
246pub fn escape_python_fstr(s: &str) -> String {
247    s.replace('\\', "\\\\")
248        .replace('"', "\\\"")
249        .replace('{', "{{")
250        .replace('}', "}}")
251}
252
253/// Generate the Python `_decode(data)` function body using the decision tree.
254///
255/// Returns the complete function as a string. The function returns
256/// `(itype_const, fields_dict, byte_size)` or `None`.
257pub fn emit_decode_function(
258    out: &mut String,
259    def: &ValidatedDef,
260    tree: &DecodeNode,
261    itype_prefix: &str,
262) {
263    let unit_bytes = def.config.width / 8;
264    let endian = match def.config.endian {
265        ByteEndian::Big => "big",
266        ByteEndian::Little => "little",
267    };
268    let variable_length = def.instructions.iter().any(|i| i.unit_count() > 1);
269
270    writeln!(out, "def _decode(data):").unwrap();
271    writeln!(
272        out,
273        "    if len(data) < {}:",
274        unit_bytes
275    )
276    .unwrap();
277    writeln!(out, "        return None").unwrap();
278    writeln!(
279        out,
280        "    opcode = int.from_bytes(data[0:{}], byteorder=\"{}\")",
281        unit_bytes, endian
282    )
283    .unwrap();
284
285    emit_tree_python(out, tree, def, 1, itype_prefix, variable_length, unit_bytes, endian);
286
287    writeln!(out).unwrap();
288    writeln!(out).unwrap();
289}
290
291/// Recursively emit Python decision tree code.
292fn emit_tree_python(
293    out: &mut String,
294    node: &DecodeNode,
295    def: &ValidatedDef,
296    indent: usize,
297    itype_prefix: &str,
298    variable_length: bool,
299    unit_bytes: u32,
300    endian: &str,
301) {
302    let pad = "    ".repeat(indent);
303    match node {
304        DecodeNode::Leaf { instruction_index } => {
305            let instr = &def.instructions[*instruction_index];
306            if let Some(guard) = leaf_guard_python(instr, unit_bytes, endian) {
307                writeln!(out, "{}if {}:", pad, guard).unwrap();
308                emit_return_decoded(
309                    out,
310                    instr,
311                    itype_prefix,
312                    indent + 1,
313                    variable_length,
314                    unit_bytes,
315                    endian,
316                );
317                writeln!(out, "{}else:", pad).unwrap();
318                writeln!(out, "{}    return None", pad).unwrap();
319            } else {
320                emit_return_decoded(
321                    out,
322                    instr,
323                    itype_prefix,
324                    indent,
325                    variable_length,
326                    unit_bytes,
327                    endian,
328                );
329            }
330        }
331        DecodeNode::PriorityLeaves { candidates } => {
332            for (i, &idx) in candidates.iter().enumerate() {
333                let instr = &def.instructions[idx];
334                let guard = leaf_guard_python(instr, unit_bytes, endian);
335
336                if i == 0 {
337                    if let Some(guard_expr) = guard {
338                        writeln!(out, "{}if {}:", pad, guard_expr).unwrap();
339                        emit_return_decoded(
340                            out,
341                            instr,
342                            itype_prefix,
343                            indent + 1,
344                            variable_length,
345                            unit_bytes,
346                            endian,
347                        );
348                    } else {
349                        emit_return_decoded(
350                            out,
351                            instr,
352                            itype_prefix,
353                            indent,
354                            variable_length,
355                            unit_bytes,
356                            endian,
357                        );
358                        break;
359                    }
360                } else if i == candidates.len() - 1 {
361                    // Last candidate
362                    if let Some(guard_expr) = guard {
363                        writeln!(out, "{}elif {}:", pad, guard_expr).unwrap();
364                        emit_return_decoded(
365                            out,
366                            instr,
367                            itype_prefix,
368                            indent + 1,
369                            variable_length,
370                            unit_bytes,
371                            endian,
372                        );
373                        writeln!(out, "{}else:", pad).unwrap();
374                        writeln!(out, "{}    return None", pad).unwrap();
375                    } else {
376                        writeln!(out, "{}else:", pad).unwrap();
377                        emit_return_decoded(
378                            out,
379                            instr,
380                            itype_prefix,
381                            indent + 1,
382                            variable_length,
383                            unit_bytes,
384                            endian,
385                        );
386                    }
387                } else {
388                    // Middle
389                    let guard_expr = guard.unwrap_or_else(|| "True".to_string());
390                    writeln!(out, "{}elif {}:", pad, guard_expr).unwrap();
391                    emit_return_decoded(
392                        out,
393                        instr,
394                        itype_prefix,
395                        indent + 1,
396                        variable_length,
397                        unit_bytes,
398                        endian,
399                    );
400                }
401            }
402        }
403        DecodeNode::Fail => {
404            writeln!(out, "{}return None", pad).unwrap();
405        }
406        DecodeNode::Branch {
407            range,
408            arms,
409            default,
410        } => {
411            let extract = extract_expr_python("opcode", &[*range], unit_bytes, endian);
412            let var_name = format!("_v{}", indent);
413            writeln!(out, "{}{} = {}", pad, var_name, extract).unwrap();
414
415            let mut first = true;
416            for (value, child) in arms {
417                if first {
418                    writeln!(out, "{}if {} == {:#x}:", pad, var_name, value).unwrap();
419                    first = false;
420                } else {
421                    writeln!(out, "{}elif {} == {:#x}:", pad, var_name, value).unwrap();
422                }
423                emit_tree_python(
424                    out,
425                    child,
426                    def,
427                    indent + 1,
428                    itype_prefix,
429                    variable_length,
430                    unit_bytes,
431                    endian,
432                );
433            }
434
435            // Default arm
436            if !arms.is_empty() {
437                writeln!(out, "{}else:", pad).unwrap();
438                emit_tree_python(
439                    out,
440                    default,
441                    def,
442                    indent + 1,
443                    itype_prefix,
444                    variable_length,
445                    unit_bytes,
446                    endian,
447                );
448            } else {
449                emit_tree_python(
450                    out,
451                    default,
452                    def,
453                    indent,
454                    itype_prefix,
455                    variable_length,
456                    unit_bytes,
457                    endian,
458                );
459            }
460        }
461    }
462}
463
464/// Emit Python code to return a decoded instruction tuple.
465fn emit_return_decoded(
466    out: &mut String,
467    instr: &ValidatedInstruction,
468    itype_prefix: &str,
469    indent: usize,
470    variable_length: bool,
471    unit_bytes: u32,
472    endian: &str,
473) {
474    let pad = "    ".repeat(indent);
475    let unit_count = instr.unit_count();
476    let bytes_consumed = unit_count * unit_bytes;
477    let itype_const = format!("{}_{}", itype_prefix, instr.name.to_ascii_uppercase());
478
479    if variable_length && unit_count > 1 {
480        writeln!(out, "{}if len(data) < {}:", pad, bytes_consumed).unwrap();
481        writeln!(out, "{}    return None", pad).unwrap();
482    }
483
484    if instr.resolved_fields.is_empty() {
485        writeln!(
486            out,
487            "{}return ({}, {{}}, {})",
488            pad, itype_const, bytes_consumed
489        )
490        .unwrap();
491    } else {
492        // Extract fields
493        for field in &instr.resolved_fields {
494            let extract = extract_expr_python("opcode", &field.ranges, unit_bytes, endian);
495            let expr = apply_transforms_python(&extract, &field.resolved_type);
496            writeln!(out, "{}{} = {}", pad, field.name, expr).unwrap();
497        }
498
499        // Build fields dict
500        let field_names: Vec<&str> = instr
501            .resolved_fields
502            .iter()
503            .map(|f| f.name.as_str())
504            .collect();
505        let dict_entries: Vec<String> = field_names
506            .iter()
507            .map(|n| format!("\"{}\": {}", n, n))
508            .collect();
509        writeln!(
510            out,
511            "{}return ({}, {{{}}}, {})",
512            pad,
513            itype_const,
514            dict_entries.join(", "),
515            bytes_consumed
516        )
517        .unwrap();
518    }
519}
520
521/// Generate a Python expression to extract bits from ranges.
522pub fn extract_expr_python(
523    var: &str,
524    ranges: &[BitRange],
525    unit_bytes: u32,
526    endian: &str,
527) -> String {
528    if ranges.is_empty() {
529        return "0".to_string();
530    }
531
532    if ranges.len() == 1 {
533        let range = ranges[0];
534        let source = unit_read_python(range.unit, unit_bytes, endian);
535        let source = if range.unit == 0 {
536            var.to_string()
537        } else {
538            source
539        };
540
541        let width = range.width();
542        let shift = range.end;
543        let mask = (1u64 << width) - 1;
544
545        if shift == 0 {
546            format!("({} & {:#x})", source, mask)
547        } else {
548            format!("(({} >> {}) & {:#x})", source, shift, mask)
549        }
550    } else {
551        let mut parts = Vec::new();
552        let mut accumulated_width = 0u32;
553
554        for range in ranges {
555            let source = if range.unit == 0 {
556                var.to_string()
557            } else {
558                unit_read_python(range.unit, unit_bytes, endian)
559            };
560
561            let width = range.width();
562            let shift = range.end;
563            let mask = (1u64 << width) - 1;
564
565            let extracted = if shift == 0 {
566                format!("({} & {:#x})", source, mask)
567            } else {
568                format!("(({} >> {}) & {:#x})", source, shift, mask)
569            };
570
571            if accumulated_width > 0 {
572                parts.push(format!("({} << {})", extracted, accumulated_width));
573            } else {
574                parts.push(extracted);
575            }
576
577            accumulated_width += width;
578        }
579
580        parts.join(" | ")
581    }
582}
583
584/// Generate a Python expression to read a unit from the data buffer.
585fn unit_read_python(unit: u32, unit_bytes: u32, endian: &str) -> String {
586    if unit == 0 {
587        "opcode".to_string()
588    } else {
589        let start = unit * unit_bytes;
590        let end = start + unit_bytes;
591        format!(
592            "int.from_bytes(data[{}:{}], byteorder=\"{}\")",
593            start, end, endian
594        )
595    }
596}
597
598/// Apply chipi transforms to a Python expression.
599pub fn apply_transforms_python(extract_expr: &str, resolved: &ResolvedFieldType) -> String {
600    let mut expr = extract_expr.to_string();
601
602    for transform in &resolved.transforms {
603        match transform {
604            Transform::SignExtend(n) => {
605                expr = format!("_sign_extend({}, {})", expr, n);
606            }
607            Transform::ZeroExtend(_) => {
608                // No-op in Python (arbitrary precision)
609            }
610            Transform::ShiftLeft(n) => {
611                expr = format!("(({}) << {})", expr, n);
612            }
613        }
614    }
615
616    // Handle sub-decoder fields
617    if let Some(ref sd_name) = resolved.sub_decoder {
618        let decode_fn = format!("_decode_{}", to_snake_case(sd_name));
619        return format!("{}({})", decode_fn, expr);
620    }
621
622    expr
623}
624
625/// Compute a guard condition for a leaf node in Python.
626/// Returns `None` if no guard is needed.
627pub fn leaf_guard_python(
628    instr: &ValidatedInstruction,
629    unit_bytes: u32,
630    endian: &str,
631) -> Option<String> {
632    let fixed_bits = instr.fixed_bits();
633    if fixed_bits.is_empty() {
634        return None;
635    }
636
637    let mut units_map: HashMap<u32, Vec<(u32, Bit)>> = HashMap::new();
638    for (unit, hw_bit, bit) in fixed_bits {
639        units_map.entry(unit).or_default().push((hw_bit, bit));
640    }
641
642    let mut conditions = Vec::new();
643
644    for (unit, bits) in &units_map {
645        let (mask, value) = compute_mask_value(bits);
646        if mask != 0 {
647            let source = if *unit == 0 {
648                "opcode".to_string()
649            } else {
650                unit_read_python(*unit, unit_bytes, endian)
651            };
652            conditions.push(format!("{} & {:#x} == {:#x}", source, mask, value));
653        }
654    }
655
656    if conditions.is_empty() {
657        None
658    } else {
659        Some(conditions.join(" and "))
660    }
661}
662
663/// Compute a bitmask and expected value from fixed bits.
664fn compute_mask_value(fixed_bits: &[(u32, Bit)]) -> (u64, u64) {
665    let mut mask: u64 = 0;
666    let mut value: u64 = 0;
667    for &(bit_pos, bit_val) in fixed_bits {
668        if bit_val == Bit::Wildcard {
669            continue;
670        }
671        mask |= 1u64 << bit_pos;
672        if bit_val == Bit::One {
673            value |= 1u64 << bit_pos;
674        }
675    }
676    (mask, value)
677}
678
679/// Convert format pieces to a Python expression (concatenated string).
680/// `fields_var` is the variable name for the fields dict (e.g., "fields").
681pub fn format_pieces_to_python_expr(
682    pieces: &[FormatPiece],
683    fields: &[ResolvedField],
684    fields_var: &str,
685    display: &DisplayConfig,
686) -> String {
687    if pieces.is_empty() {
688        return "\"\"".to_string();
689    }
690
691    let all_literal = pieces.iter().all(|p| matches!(p, FormatPiece::Literal(_)));
692    if all_literal {
693        let mut s = String::new();
694        for piece in pieces {
695            if let FormatPiece::Literal(lit) = piece {
696                s.push_str(lit);
697            }
698        }
699        return format!("\"{}\"", escape_python_str(&s));
700    }
701
702    // Build an f-string-style expression
703    let mut result = String::from("f\"");
704    for piece in pieces {
705        match piece {
706            FormatPiece::Literal(lit) => {
707                result.push_str(&escape_python_fstr(lit));
708            }
709            FormatPiece::FieldRef { expr, spec } => {
710                if spec.is_some() {
711                    // Explicit format spec overrides display hints
712                    result.push('{');
713                    result.push_str(&expr_to_python(expr, fields, fields_var, display));
714                    if let Some(spec) = spec {
715                        result.push(':');
716                        result.push_str(spec);
717                    }
718                    result.push('}');
719                } else if let Some(wrapper) = resolve_display_wrapper(expr, fields, display) {
720                    // Apply display format (hex, signed_hex, or type prefix)
721                    result.push('{');
722                    result.push_str(&wrapper);
723                    result.push('}');
724                } else {
725                    result.push('{');
726                    result.push_str(&expr_to_python(expr, fields, fields_var, display));
727                    result.push('}');
728                }
729            }
730        }
731    }
732    result.push('"');
733    result
734}
735
736/// Resolve display formatting for a format expression.
737/// Returns a Python expression string with the display wrapper applied, or None.
738fn resolve_display_wrapper(
739    expr: &FormatExpr,
740    fields: &[ResolvedField],
741    display: &DisplayConfig,
742) -> Option<String> {
743    // Find the display format from the expression's primary field
744    let (display_fmt, alias_name) = resolve_display_info(expr, fields)?;
745
746    let raw_expr = expr_to_python(expr, fields, "fields", display);
747
748    // Check type prefix first (e.g., gpr -> "r")
749    if let Some(alias) = &alias_name {
750        if let Some(prefix) = display.type_prefixes.get(alias) {
751            return Some(format!("\"{}\" + str({})", escape_python_str(prefix), raw_expr));
752        }
753    }
754
755    // Check display format hint
756    match display_fmt? {
757        DisplayFormat::SignedHex => Some(format!("_fmt_signed_hex({})", raw_expr)),
758        DisplayFormat::Hex => Some(format!("_fmt_hex({})", raw_expr)),
759    }
760}
761
762/// Extract display format and alias name from a format expression.
763/// Looks at the primary field in the expression.
764fn resolve_display_info(
765    expr: &FormatExpr,
766    fields: &[ResolvedField],
767) -> Option<(Option<DisplayFormat>, Option<String>)> {
768    match expr {
769        FormatExpr::Field(name) => {
770            let field = fields.iter().find(|f| f.name == *name)?;
771            Some((
772                field.resolved_type.display_format,
773                field.resolved_type.alias_name.clone(),
774            ))
775        }
776        FormatExpr::Arithmetic { left, right, .. } => {
777            // Try left first, then right
778            resolve_display_info(left, fields).or_else(|| resolve_display_info(right, fields))
779        }
780        _ => None,
781    }
782}
783
784/// Convert a FormatExpr to a Python expression string.
785pub fn expr_to_python(expr: &FormatExpr, fields: &[ResolvedField], fields_var: &str, display: &DisplayConfig) -> String {
786    match expr {
787        FormatExpr::Field(name) => {
788            format!("{}[\"{}\"]", fields_var, name)
789        }
790        FormatExpr::Ternary {
791            field,
792            if_nonzero,
793            if_zero,
794        } => {
795            let else_val = if_zero.as_deref().unwrap_or("");
796            format!(
797                "(\"{}\" if {}[\"{}\"] else \"{}\")",
798                escape_python_str(if_nonzero),
799                fields_var,
800                field,
801                escape_python_str(else_val)
802            )
803        }
804        FormatExpr::Arithmetic { left, op, right } => {
805            let l = expr_to_python(left, fields, fields_var, display);
806            let r = expr_to_python(right, fields, fields_var, display);
807            let op_str = arith_op_str(op);
808            format!("({} {} {})", l, op_str, r)
809        }
810        FormatExpr::IntLiteral(val) => format!("{}", val),
811        FormatExpr::MapCall { map_name, args } => {
812            let arg_strs: Vec<String> = args
813                .iter()
814                .map(|a| expr_to_python(a, fields, fields_var, display))
815                .collect();
816            format!("{}({})", map_name, arg_strs.join(", "))
817        }
818        FormatExpr::BuiltinCall { func, args } => {
819            let arg_strs: Vec<String> = args
820                .iter()
821                .map(|a| expr_to_python(a, fields, fields_var, display))
822                .collect();
823            match func {
824                BuiltinFunc::RotateRight => {
825                    format!(
826                        "_rotate_right({}, {})",
827                        arg_strs.first().map(|s| s.as_str()).unwrap_or("0"),
828                        arg_strs.get(1).map(|s| s.as_str()).unwrap_or("0")
829                    )
830                }
831                BuiltinFunc::RotateLeft => {
832                    format!(
833                        "_rotate_left({}, {})",
834                        arg_strs.first().map(|s| s.as_str()).unwrap_or("0"),
835                        arg_strs.get(1).map(|s| s.as_str()).unwrap_or("0")
836                    )
837                }
838            }
839        }
840        FormatExpr::SubDecoderAccess { field, fragment } => {
841            format!("{}[\"{}\"][\"{}\"]", fields_var, field, fragment)
842        }
843    }
844}
845
846/// Generate a Python guard condition from a Guard.
847pub fn emit_guard_python(
848    guard: &Guard,
849    _fields: &[ResolvedField],
850    fields_var: &str,
851    _display: &DisplayConfig,
852) -> String {
853    let conditions: Vec<String> = guard
854        .conditions
855        .iter()
856        .map(|cond| {
857            let left = guard_operand_to_python(&cond.left, fields_var);
858            let right = guard_operand_to_python(&cond.right, fields_var);
859            let op = match cond.op {
860                CompareOp::Eq => "==",
861                CompareOp::Ne => "!=",
862                CompareOp::Lt => "<",
863                CompareOp::Le => "<=",
864                CompareOp::Gt => ">",
865                CompareOp::Ge => ">=",
866            };
867            format!("{} {} {}", left, op, right)
868        })
869        .collect();
870
871    conditions.join(" and ")
872}
873
874fn guard_operand_to_python(operand: &GuardOperand, fields_var: &str) -> String {
875    match operand {
876        GuardOperand::Field(name) => format!("{}[\"{}\"]", fields_var, name),
877        GuardOperand::Literal(val) => format!("{}", val),
878        GuardOperand::Expr { left, op, right } => {
879            let l = guard_operand_to_python(left, fields_var);
880            let r = guard_operand_to_python(right, fields_var);
881            let op_str = arith_op_str(op);
882            format!("({} {} {})", l, op_str, r)
883        }
884    }
885}
886
887/// Generate a Python sub-decoder dispatch function.
888pub fn emit_subdecoder_python(
889    out: &mut String,
890    sd: &ValidatedSubDecoder,
891) {
892    let fn_name = format!("_decode_{}", to_snake_case(&sd.name));
893    let width = sd.width;
894    let _unit_bytes = width / 8;
895
896    // Generate map functions for sub-decoder-local maps
897    emit_map_functions_python(out, &sd.maps);
898
899    writeln!(out, "def {}(val):", fn_name).unwrap();
900
901    // Build a simple dispatch table using if/elif on masked values
902    for (i, instr) in sd.instructions.iter().enumerate() {
903        let (mask, value) = compute_instruction_mask_value(instr);
904
905        let keyword = if i == 0 { "if" } else { "elif" };
906        writeln!(out, "    {} val & {:#x} == {:#x}:", keyword, mask, value).unwrap();
907
908        // Extract fields
909        for field in &instr.resolved_fields {
910            let extract = extract_field_from_val(&field.ranges, width);
911            let expr = apply_transforms_python(&extract, &field.resolved_type);
912            writeln!(out, "        {} = {}", field.name, expr).unwrap();
913        }
914
915        // Build fragment dict
916        let mut frag_entries = Vec::new();
917        for frag in &instr.fragments {
918            let frag_expr = format_pieces_to_python_subdecoder_str(&frag.pieces, &instr.resolved_fields);
919            frag_entries.push(format!("\"{}\": {}", frag.name, frag_expr));
920        }
921        writeln!(out, "        return {{{}}}", frag_entries.join(", ")).unwrap();
922    }
923
924    writeln!(out, "    return None").unwrap();
925    writeln!(out).unwrap();
926    writeln!(out).unwrap();
927}
928
929/// Compute mask/value for a sub-decoder instruction.
930fn compute_instruction_mask_value(instr: &ValidatedSubInstruction) -> (u64, u64) {
931    let mut mask: u64 = 0;
932    let mut value: u64 = 0;
933
934    for seg in &instr.segments {
935        if let Segment::Fixed {
936            ranges, pattern, ..
937        } = seg
938        {
939            let mut bit_idx = 0;
940            for range in ranges {
941                for i in 0..range.width() {
942                    if bit_idx < pattern.len() {
943                        let bit = pattern[bit_idx];
944                        if bit != Bit::Wildcard {
945                            let hw_bit = range.start - i;
946                            mask |= 1u64 << hw_bit;
947                            if bit == Bit::One {
948                                value |= 1u64 << hw_bit;
949                            }
950                        }
951                        bit_idx += 1;
952                    }
953                }
954            }
955        }
956    }
957
958    (mask, value)
959}
960
961/// Extract a field value from `val` for sub-decoder (single-unit, no data[] reads).
962fn extract_field_from_val(ranges: &[BitRange], _width: u32) -> String {
963    if ranges.is_empty() {
964        return "0".to_string();
965    }
966
967    if ranges.len() == 1 {
968        let range = ranges[0];
969        let width = range.width();
970        let shift = range.end;
971        let mask = (1u64 << width) - 1;
972
973        if shift == 0 {
974            format!("(val & {:#x})", mask)
975        } else {
976            format!("((val >> {}) & {:#x})", shift, mask)
977        }
978    } else {
979        let mut parts = Vec::new();
980        let mut accumulated_width = 0u32;
981
982        for range in ranges {
983            let width = range.width();
984            let shift = range.end;
985            let mask = (1u64 << width) - 1;
986
987            let extracted = if shift == 0 {
988                format!("(val & {:#x})", mask)
989            } else {
990                format!("((val >> {}) & {:#x})", shift, mask)
991            };
992
993            if accumulated_width > 0 {
994                parts.push(format!("({} << {})", extracted, accumulated_width));
995            } else {
996                parts.push(extracted);
997            }
998
999            accumulated_width += width;
1000        }
1001
1002        parts.join(" | ")
1003    }
1004}
1005
1006/// Convert format pieces for a sub-decoder fragment to a Python string expression.
1007/// Fields are referenced directly by name (not via a dict).
1008fn format_pieces_to_python_subdecoder_str(
1009    pieces: &[FormatPiece],
1010    fields: &[ResolvedField],
1011) -> String {
1012    if pieces.is_empty() {
1013        return "\"\"".to_string();
1014    }
1015
1016    let all_literal = pieces.iter().all(|p| matches!(p, FormatPiece::Literal(_)));
1017    if all_literal {
1018        let mut s = String::new();
1019        for piece in pieces {
1020            if let FormatPiece::Literal(lit) = piece {
1021                s.push_str(lit);
1022            }
1023        }
1024        return format!("\"{}\"", escape_python_str(&s));
1025    }
1026
1027    let mut result = String::from("f\"");
1028    for piece in pieces {
1029        match piece {
1030            FormatPiece::Literal(lit) => {
1031                result.push_str(&escape_python_fstr(lit));
1032            }
1033            FormatPiece::FieldRef { expr, spec } => {
1034                result.push('{');
1035                result.push_str(&expr_to_python_direct(expr, fields));
1036                if let Some(spec) = spec {
1037                    result.push(':');
1038                    result.push_str(spec);
1039                }
1040                result.push('}');
1041            }
1042        }
1043    }
1044    result.push('"');
1045    result
1046}
1047
1048/// Convert a FormatExpr to a Python expression where fields are local variables.
1049fn expr_to_python_direct(expr: &FormatExpr, _fields: &[ResolvedField]) -> String {
1050    match expr {
1051        FormatExpr::Field(name) => name.clone(),
1052        FormatExpr::Ternary {
1053            field,
1054            if_nonzero,
1055            if_zero,
1056        } => {
1057            let else_val = if_zero.as_deref().unwrap_or("");
1058            format!(
1059                "(\"{}\" if {} else \"{}\")",
1060                escape_python_str(if_nonzero),
1061                field,
1062                escape_python_str(else_val)
1063            )
1064        }
1065        FormatExpr::Arithmetic { left, op, right } => {
1066            let l = expr_to_python_direct(left, _fields);
1067            let r = expr_to_python_direct(right, _fields);
1068            let op_str = arith_op_str(op);
1069            format!("({} {} {})", l, op_str, r)
1070        }
1071        FormatExpr::IntLiteral(val) => format!("{}", val),
1072        FormatExpr::MapCall { map_name, args } => {
1073            let arg_strs: Vec<String> = args
1074                .iter()
1075                .map(|a| expr_to_python_direct(a, _fields))
1076                .collect();
1077            format!("{}({})", map_name, arg_strs.join(", "))
1078        }
1079        FormatExpr::BuiltinCall { func, args } => {
1080            let arg_strs: Vec<String> = args
1081                .iter()
1082                .map(|a| expr_to_python_direct(a, _fields))
1083                .collect();
1084            match func {
1085                BuiltinFunc::RotateRight => {
1086                    format!(
1087                        "_rotate_right({}, {})",
1088                        arg_strs.first().map(|s| s.as_str()).unwrap_or("0"),
1089                        arg_strs.get(1).map(|s| s.as_str()).unwrap_or("0")
1090                    )
1091                }
1092                BuiltinFunc::RotateLeft => {
1093                    format!(
1094                        "_rotate_left({}, {})",
1095                        arg_strs.first().map(|s| s.as_str()).unwrap_or("0"),
1096                        arg_strs.get(1).map(|s| s.as_str()).unwrap_or("0")
1097                    )
1098                }
1099            }
1100        }
1101        FormatExpr::SubDecoderAccess { field, fragment } => {
1102            format!("{}[\"{}\"]", field, fragment)
1103        }
1104    }
1105}
1106
1107/// Convert a name to snake_case.
1108pub fn to_snake_case(name: &str) -> String {
1109    let mut result = String::new();
1110    for (i, ch) in name.chars().enumerate() {
1111        if ch.is_ascii_uppercase() && i > 0 {
1112            result.push('_');
1113        }
1114        result.push(ch.to_ascii_lowercase());
1115    }
1116    result
1117}
1118
1119/// Check if any instruction uses rotate builtins.
1120pub fn needs_rotate_helpers(def: &ValidatedDef) -> bool {
1121    for instr in &def.instructions {
1122        for fl in &instr.format_lines {
1123            for piece in &fl.pieces {
1124                if let FormatPiece::FieldRef { expr, .. } = piece {
1125                    if expr_uses_rotate(expr) {
1126                        return true;
1127                    }
1128                }
1129            }
1130        }
1131    }
1132    false
1133}
1134
1135fn expr_uses_rotate(expr: &FormatExpr) -> bool {
1136    match expr {
1137        FormatExpr::BuiltinCall { func, .. } => matches!(
1138            func,
1139            BuiltinFunc::RotateRight | BuiltinFunc::RotateLeft
1140        ),
1141        FormatExpr::Arithmetic { left, right, .. } => {
1142            expr_uses_rotate(left) || expr_uses_rotate(right)
1143        }
1144        _ => false,
1145    }
1146}
1147
1148/// Check if any instruction uses sign_extend transforms.
1149pub fn needs_sign_extend(def: &ValidatedDef) -> bool {
1150    for instr in &def.instructions {
1151        for field in &instr.resolved_fields {
1152            for transform in &field.resolved_type.transforms {
1153                if matches!(transform, Transform::SignExtend(_)) {
1154                    return true;
1155                }
1156            }
1157        }
1158    }
1159    // Also check sub-decoders
1160    for sd in &def.sub_decoders {
1161        for instr in &sd.instructions {
1162            for field in &instr.resolved_fields {
1163                for transform in &field.resolved_type.transforms {
1164                    if matches!(transform, Transform::SignExtend(_)) {
1165                        return true;
1166                    }
1167                }
1168            }
1169        }
1170    }
1171    false
1172}
1173
1174/// Generate the `_format_insn(itype, fields)` Python function.
1175/// Returns (mnemonic_str, operands_str).
1176pub fn emit_format_function(
1177    out: &mut String,
1178    def: &ValidatedDef,
1179    itype_prefix: &str,
1180    display: &DisplayConfig,
1181) {
1182    writeln!(out, "def _format_insn(itype, fields):").unwrap();
1183    writeln!(out, "    \"\"\"Format an instruction. Returns (mnemonic, operands) strings.\"\"\"").unwrap();
1184
1185    for (i, instr) in def.instructions.iter().enumerate() {
1186        let itype_const = format!("{}_{}", itype_prefix, instr.name.to_ascii_uppercase());
1187        let keyword = if i == 0 { "if" } else { "elif" };
1188        writeln!(out, "    {} itype == {}:", keyword, itype_const).unwrap();
1189
1190        if instr.format_lines.is_empty() {
1191            // Fallback: mnemonic is instruction name, operands are field values
1192            if instr.resolved_fields.is_empty() {
1193                writeln!(out, "        return \"{}\", \"\"", instr.name).unwrap();
1194            } else {
1195                let field_strs: Vec<String> = instr
1196                    .resolved_fields
1197                    .iter()
1198                    .map(|f| format!("str(fields[\"{}\"])", f.name))
1199                    .collect();
1200                writeln!(
1201                    out,
1202                    "        return \"{}\", \", \".join([{}])",
1203                    instr.name,
1204                    field_strs.join(", ")
1205                )
1206                .unwrap();
1207            }
1208        } else {
1209            emit_format_lines_python(out, instr, 2, display);
1210        }
1211    }
1212
1213    writeln!(out, "    return \"???\", \"\"").unwrap();
1214    writeln!(out).unwrap();
1215    writeln!(out).unwrap();
1216}
1217
1218/// Emit format lines for a single instruction as Python code.
1219fn emit_format_lines_python(
1220    out: &mut String,
1221    instr: &ValidatedInstruction,
1222    indent: usize,
1223    display: &DisplayConfig,
1224) {
1225    let pad = "    ".repeat(indent);
1226
1227    if instr.format_lines.len() == 1 && instr.format_lines[0].guard.is_none() {
1228        let fl = &instr.format_lines[0];
1229        let (mnemonic, operands) = split_format_pieces(&fl.pieces);
1230        let mnemonic_expr =
1231            format_pieces_to_python_expr(&mnemonic, &instr.resolved_fields, "fields", display);
1232        let operands_expr =
1233            format_pieces_to_python_expr(&operands, &instr.resolved_fields, "fields", display);
1234        writeln!(out, "{}return {}, {}", pad, mnemonic_expr, operands_expr).unwrap();
1235        return;
1236    }
1237
1238    // Multiple format lines with guards
1239    for (i, fl) in instr.format_lines.iter().enumerate() {
1240        let (mnemonic, operands) = split_format_pieces(&fl.pieces);
1241        let mnemonic_expr =
1242            format_pieces_to_python_expr(&mnemonic, &instr.resolved_fields, "fields", display);
1243        let operands_expr =
1244            format_pieces_to_python_expr(&operands, &instr.resolved_fields, "fields", display);
1245
1246        if let Some(guard) = &fl.guard {
1247            let guard_code = emit_guard_python(guard, &instr.resolved_fields, "fields", display);
1248            if i == 0 {
1249                writeln!(out, "{}if {}:", pad, guard_code).unwrap();
1250            } else {
1251                writeln!(out, "{}elif {}:", pad, guard_code).unwrap();
1252            }
1253            writeln!(out, "{}    return {}, {}", pad, mnemonic_expr, operands_expr).unwrap();
1254        } else {
1255            if i > 0 {
1256                writeln!(out, "{}else:", pad).unwrap();
1257                writeln!(out, "{}    return {}, {}", pad, mnemonic_expr, operands_expr).unwrap();
1258            } else {
1259                writeln!(out, "{}return {}, {}", pad, mnemonic_expr, operands_expr).unwrap();
1260            }
1261        }
1262    }
1263}
1264
1265/// Split format pieces into (mnemonic_pieces, operand_pieces).
1266/// The mnemonic is everything before the first space; operands are the rest.
1267fn split_format_pieces(pieces: &[FormatPiece]) -> (Vec<FormatPiece>, Vec<FormatPiece>) {
1268    let mut mnemonic = Vec::new();
1269    let mut operands = Vec::new();
1270    let mut found_space = false;
1271
1272    for piece in pieces {
1273        if found_space {
1274            operands.push(piece.clone());
1275        } else {
1276            match piece {
1277                FormatPiece::Literal(lit) => {
1278                    if let Some(pos) = lit.find(' ') {
1279                        // Split this literal at the first space
1280                        let before = &lit[..pos];
1281                        let after = &lit[pos + 1..];
1282                        if !before.is_empty() {
1283                            mnemonic.push(FormatPiece::Literal(before.to_string()));
1284                        }
1285                        if !after.is_empty() {
1286                            operands.push(FormatPiece::Literal(after.to_string()));
1287                        }
1288                        found_space = true;
1289                    } else {
1290                        mnemonic.push(piece.clone());
1291                    }
1292                }
1293                _ => {
1294                    // Field ref before any space - part of mnemonic (unusual but possible)
1295                    mnemonic.push(piece.clone());
1296                }
1297            }
1298        }
1299    }
1300
1301    (mnemonic, operands)
1302}