Skip to main content

chipi_core/
codegen_cpp.rs

1//! C++ code generation from validated definitions and decision trees.
2//!
3//! Generates a single-header C++ file with instruction enum, decode function,
4//! and disassembly formatting.
5
6use std::collections::HashMap;
7use std::fmt::Write;
8
9use crate::backend::cpp::CppOptions;
10use crate::backend::cpp::GuardStyle;
11use crate::tree::DecodeNode;
12use crate::types::*;
13
14/// Generate a complete C++ header file.
15pub fn generate_cpp_code(
16    def: &ValidatedDef,
17    tree: &DecodeNode,
18    opts: &CppOptions,
19    type_maps: &HashMap<String, String>,
20) -> String {
21    let mut out = String::new();
22
23    let default_ns = to_snake_case(&def.config.name);
24    let ns = opts.namespace.as_deref().unwrap_or(&default_ns);
25    let guard_name = format!("CHIPI_{}_HPP", ns.to_ascii_uppercase());
26    let unit_bytes = def.config.width / 8;
27    let word_type = cpp_word_type(def.config.width);
28    let endian = &def.config.endian;
29    let variable_length = def.instructions.iter().any(|i| i.unit_count() > 1);
30
31    // Header guard
32    match opts.guard_style {
33        GuardStyle::Pragma => writeln!(out, "#pragma once").unwrap(),
34        GuardStyle::Ifndef => {
35            writeln!(out, "#ifndef {}", guard_name).unwrap();
36            writeln!(out, "#define {}", guard_name).unwrap();
37        }
38    }
39    writeln!(out).unwrap();
40    writeln!(
41        out,
42        "// Auto-generated by https://github.com/ioncodes/chipi"
43    )
44    .unwrap();
45    writeln!(out, "// Do not edit.").unwrap();
46    writeln!(out).unwrap();
47
48    // Includes
49    writeln!(out, "#include <cstdint>").unwrap();
50    writeln!(out, "#include <cstddef>").unwrap();
51    writeln!(out, "#include <cstring>").unwrap();
52    writeln!(out, "#include <string>").unwrap();
53    writeln!(out, "#include <optional>").unwrap();
54    writeln!(out, "#include <format>").unwrap();
55    for inc in &opts.includes {
56        writeln!(out, "#include \"{}\"", inc).unwrap();
57    }
58    writeln!(out).unwrap();
59
60    writeln!(out, "namespace {} {{", ns).unwrap();
61    writeln!(out).unwrap();
62
63    // Built-in display wrapper types for type aliases with display hints
64    emit_display_types(&mut out, def, type_maps);
65
66    // Opcode enum
67    emit_opcode_enum(&mut out, def);
68
69    // Instruction struct
70    emit_instruction_struct(&mut out, def, type_maps);
71
72    // Sub-decoder types and functions
73    for sd in &def.sub_decoders {
74        emit_subdecoder(&mut out, sd, def.config.width);
75    }
76
77    // Map functions
78    emit_map_functions(&mut out, def);
79
80    // Decode function
81    emit_decode_function(
82        &mut out,
83        def,
84        tree,
85        word_type,
86        unit_bytes,
87        endian,
88        variable_length,
89        type_maps,
90    );
91
92    // Format function
93    emit_format_function(&mut out, def, type_maps, opts);
94
95    writeln!(out, "}} // namespace {}", ns).unwrap();
96
97    if opts.guard_style == GuardStyle::Ifndef {
98        writeln!(out).unwrap();
99        writeln!(out, "#endif // {}", guard_name).unwrap();
100    }
101
102    out
103}
104
105fn cpp_word_type(width: u32) -> &'static str {
106    match width {
107        8 => "uint8_t",
108        16 => "uint16_t",
109        32 => "uint32_t",
110        _ => "uint32_t",
111    }
112}
113
114fn cpp_signed_type(base: &str) -> &'static str {
115    match base {
116        "u8" | "i8" => "int8_t",
117        "u16" | "i16" => "int16_t",
118        "u32" | "i32" => "int32_t",
119        _ => "int32_t",
120    }
121}
122
123fn cpp_type_for(base: &str) -> &'static str {
124    match base {
125        "bool" => "bool",
126        "u1" | "u2" | "u3" | "u4" | "u5" | "u6" | "u7" | "u8" => "uint8_t",
127        "i8" => "int8_t",
128        "u16" => "uint16_t",
129        "i16" => "int16_t",
130        "u32" => "uint32_t",
131        "i32" => "int32_t",
132        _ => "uint32_t",
133    }
134}
135
136fn type_bits(base: &str) -> u32 {
137    match base {
138        "u8" | "i8" => 8,
139        "u16" | "i16" => 16,
140        "u32" | "i32" => 32,
141        _ => 32,
142    }
143}
144
145fn to_snake_case(name: &str) -> String {
146    let mut result = String::new();
147    for (i, ch) in name.chars().enumerate() {
148        if ch.is_ascii_uppercase() && i > 0 {
149            result.push('_');
150        }
151        result.push(ch.to_ascii_lowercase());
152    }
153    result
154}
155
156fn to_pascal_case(name: &str) -> String {
157    let mut result = String::new();
158    let mut cap_next = true;
159    for ch in name.chars() {
160        if ch == '_' {
161            cap_next = true;
162        } else if cap_next {
163            result.push(ch.to_ascii_uppercase());
164            cap_next = false;
165        } else {
166            result.push(ch.to_ascii_lowercase());
167        }
168    }
169    result
170}
171
172/// Emit built-in wrapper types for type aliases with `display(hex)` or `display(signed_hex)`.
173/// Only emits types that aren't already overridden via `type_map`.
174fn emit_display_types(out: &mut String, def: &ValidatedDef, type_maps: &HashMap<String, String>) {
175    let mut need_signed_hex = false;
176    let mut need_hex = false;
177
178    for alias in &def.type_aliases {
179        // Skip if the user provided a type_map override
180        if type_maps.contains_key(&alias.name) {
181            continue;
182        }
183        match alias.display_format {
184            Some(DisplayFormat::SignedHex) => need_signed_hex = true,
185            Some(DisplayFormat::Hex) => need_hex = true,
186            None => {}
187        }
188    }
189
190    if need_signed_hex {
191        writeln!(out, "struct SignedHex {{").unwrap();
192        writeln!(out, "    int32_t value;").unwrap();
193        writeln!(out, "    SignedHex() = default;").unwrap();
194        writeln!(out, "    constexpr SignedHex(int32_t v) : value(v) {{}}").unwrap();
195        writeln!(
196            out,
197            "    bool operator==(const SignedHex&) const = default;"
198        )
199        .unwrap();
200        writeln!(
201            out,
202            "    bool operator==(int other) const {{ return value == other; }}"
203        )
204        .unwrap();
205        writeln!(
206            out,
207            "    bool operator!=(int other) const {{ return value != other; }}"
208        )
209        .unwrap();
210        writeln!(
211            out,
212            "    bool operator<(int other) const {{ return value < other; }}"
213        )
214        .unwrap();
215        writeln!(
216            out,
217            "    bool operator<=(int other) const {{ return value <= other; }}"
218        )
219        .unwrap();
220        writeln!(
221            out,
222            "    bool operator>(int other) const {{ return value > other; }}"
223        )
224        .unwrap();
225        writeln!(
226            out,
227            "    bool operator>=(int other) const {{ return value >= other; }}"
228        )
229        .unwrap();
230        writeln!(
231            out,
232            "    SignedHex operator-() const {{ return SignedHex(-value); }}"
233        )
234        .unwrap();
235        writeln!(out, "    friend SignedHex operator-(int lhs, SignedHex rhs) {{ return SignedHex(lhs - rhs.value); }}").unwrap();
236        writeln!(out, "    friend SignedHex operator+(int lhs, SignedHex rhs) {{ return SignedHex(lhs + rhs.value); }}").unwrap();
237        writeln!(out, "    friend SignedHex operator+(SignedHex lhs, int rhs) {{ return SignedHex(lhs.value + rhs); }}").unwrap();
238        writeln!(out, "    friend SignedHex operator-(SignedHex lhs, int rhs) {{ return SignedHex(lhs.value - rhs); }}").unwrap();
239        writeln!(out, "    friend SignedHex operator*(SignedHex lhs, int rhs) {{ return SignedHex(lhs.value * rhs); }}").unwrap();
240        writeln!(out, "}};").unwrap();
241        writeln!(out).unwrap();
242    }
243
244    if need_hex {
245        writeln!(out, "struct Hex {{").unwrap();
246        writeln!(out, "    uint32_t value;").unwrap();
247        writeln!(out, "    Hex() = default;").unwrap();
248        writeln!(out, "    constexpr Hex(uint32_t v) : value(v) {{}}").unwrap();
249        writeln!(out, "    bool operator==(const Hex&) const = default;").unwrap();
250        writeln!(
251            out,
252            "    bool operator==(unsigned other) const {{ return value == other; }}"
253        )
254        .unwrap();
255        writeln!(out, "}};").unwrap();
256        writeln!(out).unwrap();
257    }
258
259    writeln!(out, "}} // namespace {}", to_snake_case(&def.config.name)).unwrap();
260    writeln!(out).unwrap();
261
262    // std::formatter specializations must be outside the namespace
263    if need_signed_hex {
264        let ns = to_snake_case(&def.config.name);
265        writeln!(
266            out,
267            "template <> struct std::formatter<{}::SignedHex> : std::formatter<std::string> {{",
268            ns
269        )
270        .unwrap();
271        writeln!(
272            out,
273            "    auto format({}::SignedHex v, auto& ctx) const {{",
274            ns
275        )
276        .unwrap();
277        writeln!(out, "        if (v.value < 0)").unwrap();
278        writeln!(out, "            return std::formatter<std::string>::format(std::format(\"-0x{{:x}}\", static_cast<unsigned>(-v.value)), ctx);").unwrap();
279        writeln!(out, "        return std::formatter<std::string>::format(std::format(\"0x{{:x}}\", static_cast<unsigned>(v.value)), ctx);").unwrap();
280        writeln!(out, "    }}").unwrap();
281        writeln!(out, "}};").unwrap();
282        writeln!(out).unwrap();
283    }
284
285    if need_hex {
286        let ns = to_snake_case(&def.config.name);
287        writeln!(
288            out,
289            "template <> struct std::formatter<{}::Hex> : std::formatter<std::string> {{",
290            ns
291        )
292        .unwrap();
293        writeln!(out, "    auto format({}::Hex v, auto& ctx) const {{", ns).unwrap();
294        writeln!(out, "        return std::formatter<std::string>::format(std::format(\"0x{{:x}}\", v.value), ctx);").unwrap();
295        writeln!(out, "    }}").unwrap();
296        writeln!(out, "}};").unwrap();
297        writeln!(out).unwrap();
298    }
299
300    // Re-open the namespace
301    if need_signed_hex || need_hex {
302        writeln!(out, "namespace {} {{", to_snake_case(&def.config.name)).unwrap();
303        writeln!(out).unwrap();
304    }
305}
306
307/// Emit the Opcode enum.
308fn emit_opcode_enum(out: &mut String, def: &ValidatedDef) {
309    writeln!(out, "enum class Opcode : uint32_t {{").unwrap();
310    for (i, instr) in def.instructions.iter().enumerate() {
311        writeln!(out, "    {} = {},", to_pascal_case(&instr.name), i).unwrap();
312    }
313    writeln!(out, "}};").unwrap();
314    writeln!(out).unwrap();
315}
316
317/// Emit the Instruction struct with a tagged union for fields.
318fn emit_instruction_struct(
319    out: &mut String,
320    def: &ValidatedDef,
321    type_maps: &HashMap<String, String>,
322) {
323    writeln!(out, "struct Instruction {{").unwrap();
324    writeln!(out, "    Opcode opcode;").unwrap();
325    writeln!(out, "    uint32_t size; // bytes consumed").unwrap();
326    writeln!(out).unwrap();
327
328    // Generate a union with per-instruction field structs
329    let has_fields = def
330        .instructions
331        .iter()
332        .any(|i| !i.resolved_fields.is_empty());
333    if has_fields {
334        writeln!(out, "    union {{").unwrap();
335        for instr in &def.instructions {
336            if instr.resolved_fields.is_empty() {
337                continue;
338            }
339            writeln!(out, "        struct {{").unwrap();
340            for field in &instr.resolved_fields {
341                let cpp_type = field_cpp_type(field, type_maps);
342                writeln!(out, "            {} {};", cpp_type, field.name).unwrap();
343            }
344            writeln!(out, "        }} {};", instr.name).unwrap();
345        }
346        writeln!(out, "    }};").unwrap();
347    }
348
349    writeln!(out, "}};").unwrap();
350    writeln!(out).unwrap();
351}
352
353/// Get the C++ type for a field.
354fn field_cpp_type(field: &ResolvedField, type_maps: &HashMap<String, String>) -> String {
355    // Check type map first (user override)
356    if let Some(alias) = &field.resolved_type.alias_name {
357        if let Some(mapped) = type_maps.get(alias) {
358            return mapped.clone();
359        }
360    }
361
362    // Check sub-decoder
363    if let Some(ref sd_name) = field.resolved_type.sub_decoder {
364        return format!("{}Insn", to_pascal_case(sd_name));
365    }
366
367    // Check display format -> built-in wrapper types
368    match field.resolved_type.display_format {
369        Some(DisplayFormat::SignedHex) => return "SignedHex".to_string(),
370        Some(DisplayFormat::Hex) => return "Hex".to_string(),
371        None => {}
372    }
373
374    cpp_type_for(&field.resolved_type.base_type).to_string()
375}
376
377/// Emit sub-decoder struct and dispatch function.
378fn emit_subdecoder(out: &mut String, sd: &ValidatedSubDecoder, _parent_width: u32) {
379    let type_name = format!("{}Insn", to_pascal_case(&sd.name));
380    let word_type = cpp_word_type(sd.width);
381
382    // Fragment struct
383    writeln!(out, "struct {} {{", type_name).unwrap();
384    for frag_name in &sd.fragment_names {
385        writeln!(out, "    const char* {};", frag_name).unwrap();
386    }
387    writeln!(out, "}};").unwrap();
388    writeln!(out).unwrap();
389
390    // Pre-baked fragment strings for each instruction
391    // Then dispatch function
392    let fn_name = format!("decode_{}", to_snake_case(&sd.name));
393    writeln!(
394        out,
395        "inline std::optional<{}> {}({} val) {{",
396        type_name, fn_name, word_type
397    )
398    .unwrap();
399
400    for (i, instr) in sd.instructions.iter().enumerate() {
401        let (mask, value) = compute_instruction_mask_value_sub(instr);
402        let keyword = if i == 0 { "if" } else { "} else if" };
403        writeln!(
404            out,
405            "    {} ((val & {:#x}) == {:#x}) {{",
406            keyword, mask, value
407        )
408        .unwrap();
409
410        // Build fragment values
411        // For simplicity, use string literals where possible; for field-dependent
412        // fragments, generate inline formatting
413        for frag in &instr.fragments {
414            let all_literal = frag
415                .pieces
416                .iter()
417                .all(|p| matches!(p, FormatPiece::Literal(_)));
418            if all_literal {
419                let s: String = frag
420                    .pieces
421                    .iter()
422                    .map(|p| {
423                        if let FormatPiece::Literal(lit) = p {
424                            lit.as_str()
425                        } else {
426                            ""
427                        }
428                    })
429                    .collect();
430                writeln!(out, "        // {}.{} = \"{}\"", instr.name, frag.name, s).unwrap();
431            }
432        }
433
434        // Return struct with fragment values
435        let frag_values: Vec<String> = instr
436            .fragments
437            .iter()
438            .map(|frag| {
439                let all_literal = frag
440                    .pieces
441                    .iter()
442                    .all(|p| matches!(p, FormatPiece::Literal(_)));
443                if all_literal {
444                    let s: String = frag
445                        .pieces
446                        .iter()
447                        .map(|p| {
448                            if let FormatPiece::Literal(lit) = p {
449                                lit.as_str()
450                            } else {
451                                ""
452                            }
453                        })
454                        .collect();
455                    format!("\"{}\"", s)
456                } else {
457                    // For dynamic fragments, we'd need snprintf; use empty for now
458                    "\"\"".to_string()
459                }
460            })
461            .collect();
462
463        writeln!(
464            out,
465            "        return {} {{ {} }};",
466            type_name,
467            frag_values.join(", ")
468        )
469        .unwrap();
470    }
471
472    if !sd.instructions.is_empty() {
473        writeln!(out, "    }}").unwrap();
474    }
475    writeln!(out, "    return std::nullopt;").unwrap();
476    writeln!(out, "}}").unwrap();
477    writeln!(out).unwrap();
478}
479
480fn compute_instruction_mask_value_sub(instr: &ValidatedSubInstruction) -> (u64, u64) {
481    let mut mask: u64 = 0;
482    let mut value: u64 = 0;
483    for seg in &instr.segments {
484        if let Segment::Fixed {
485            ranges, pattern, ..
486        } = seg
487        {
488            let mut bit_idx = 0;
489            for range in ranges {
490                for i in 0..range.width() {
491                    if bit_idx < pattern.len() {
492                        let bit = pattern[bit_idx];
493                        if bit != Bit::Wildcard {
494                            let hw_bit = range.start - i;
495                            mask |= 1u64 << hw_bit;
496                            if bit == Bit::One {
497                                value |= 1u64 << hw_bit;
498                            }
499                        }
500                        bit_idx += 1;
501                    }
502                }
503            }
504        }
505    }
506    (mask, value)
507}
508
509/// Emit map (lookup) functions.
510fn emit_map_functions(out: &mut String, def: &ValidatedDef) {
511    for map_def in &def.maps {
512        let params: Vec<String> = map_def
513            .params
514            .iter()
515            .map(|p| format!("int {}", p))
516            .collect();
517        writeln!(
518            out,
519            "inline const char* {}({}) {{",
520            map_def.name,
521            params.join(", ")
522        )
523        .unwrap();
524
525        let key_var = if map_def.params.len() == 1 {
526            map_def.params[0].clone()
527        } else {
528            // Multi-param: won't use switch
529            String::new()
530        };
531
532        if map_def.params.len() == 1 {
533            writeln!(out, "    switch ({}) {{", key_var).unwrap();
534            for entry in &map_def.entries {
535                if entry.keys.len() == 1 && entry.keys[0] == MapKey::Wildcard {
536                    continue;
537                }
538                if let Some(MapKey::Value(v)) = entry.keys.first() {
539                    let s = pieces_to_str(&entry.output);
540                    writeln!(out, "    case {}: return \"{}\";", v, s).unwrap();
541                }
542            }
543
544            let default_str = map_def
545                .entries
546                .iter()
547                .find(|e| e.keys.len() == 1 && e.keys[0] == MapKey::Wildcard)
548                .map(|e| pieces_to_str(&e.output))
549                .unwrap_or_else(|| "???".to_string());
550            writeln!(out, "    default: return \"{}\";", default_str).unwrap();
551            writeln!(out, "    }}").unwrap();
552        } else {
553            // Fallback for multi-param maps: if/else chain
554            for entry in &map_def.entries {
555                if entry.keys.iter().any(|k| *k == MapKey::Wildcard) {
556                    continue;
557                }
558                let conds: Vec<String> = entry
559                    .keys
560                    .iter()
561                    .zip(map_def.params.iter())
562                    .map(|(k, p)| {
563                        if let MapKey::Value(v) = k {
564                            format!("{} == {}", p, v)
565                        } else {
566                            "true".to_string()
567                        }
568                    })
569                    .collect();
570                let s = pieces_to_str(&entry.output);
571                writeln!(out, "    if ({}) return \"{}\";", conds.join(" && "), s).unwrap();
572            }
573            writeln!(out, "    return \"???\";").unwrap();
574        }
575
576        writeln!(out, "}}").unwrap();
577        writeln!(out).unwrap();
578    }
579
580    // Also emit sub-decoder maps
581    for sd in &def.sub_decoders {
582        for map_def in &sd.maps {
583            let params: Vec<String> = map_def
584                .params
585                .iter()
586                .map(|p| format!("int {}", p))
587                .collect();
588            writeln!(
589                out,
590                "inline const char* {}({}) {{",
591                map_def.name,
592                params.join(", ")
593            )
594            .unwrap();
595            writeln!(out, "    switch ({}) {{", map_def.params[0]).unwrap();
596            for entry in &map_def.entries {
597                if entry.keys.len() == 1 && entry.keys[0] == MapKey::Wildcard {
598                    continue;
599                }
600                if let Some(MapKey::Value(v)) = entry.keys.first() {
601                    let s = pieces_to_str(&entry.output);
602                    writeln!(out, "    case {}: return \"{}\";", v, s).unwrap();
603                }
604            }
605            let default_str = map_def
606                .entries
607                .iter()
608                .find(|e| e.keys.len() == 1 && e.keys[0] == MapKey::Wildcard)
609                .map(|e| pieces_to_str(&e.output))
610                .unwrap_or_else(|| "???".to_string());
611            writeln!(out, "    default: return \"{}\";", default_str).unwrap();
612            writeln!(out, "    }}").unwrap();
613            writeln!(out, "}}").unwrap();
614            writeln!(out).unwrap();
615        }
616    }
617}
618
619fn pieces_to_str(pieces: &[FormatPiece]) -> String {
620    let mut s = String::new();
621    for piece in pieces {
622        if let FormatPiece::Literal(lit) = piece {
623            s.push_str(lit);
624        }
625    }
626    s
627}
628
629/// Emit the decode function.
630fn emit_decode_function(
631    out: &mut String,
632    def: &ValidatedDef,
633    tree: &DecodeNode,
634    word_type: &str,
635    unit_bytes: u32,
636    endian: &ByteEndian,
637    variable_length: bool,
638    type_maps: &HashMap<String, String>,
639) {
640    writeln!(
641        out,
642        "inline std::optional<Instruction> decode(const uint8_t* data, size_t len) {{"
643    )
644    .unwrap();
645    writeln!(out, "    if (len < {}) return std::nullopt;", unit_bytes).unwrap();
646
647    // Read first unit
648    emit_word_read(out, "opcode", word_type, 0, unit_bytes, endian, 1);
649
650    emit_tree_cpp(
651        out,
652        tree,
653        def,
654        1,
655        word_type,
656        unit_bytes,
657        endian,
658        variable_length,
659        type_maps,
660    );
661
662    writeln!(out, "}}").unwrap();
663    writeln!(out).unwrap();
664}
665
666/// Emit a word read from the data buffer.
667fn emit_word_read(
668    out: &mut String,
669    var_name: &str,
670    word_type: &str,
671    offset: u32,
672    unit_bytes: u32,
673    endian: &ByteEndian,
674    indent: usize,
675) {
676    let pad = "    ".repeat(indent);
677    match (unit_bytes, endian) {
678        (1, _) => {
679            writeln!(out, "{}{} {} = data[{}];", pad, word_type, var_name, offset).unwrap();
680        }
681        (2, ByteEndian::Big) => {
682            writeln!(
683                out,
684                "{}{} {} = (static_cast<uint16_t>(data[{}]) << 8) | data[{}];",
685                pad,
686                word_type,
687                var_name,
688                offset,
689                offset + 1
690            )
691            .unwrap();
692        }
693        (2, ByteEndian::Little) => {
694            writeln!(
695                out,
696                "{}{} {} = data[{}] | (static_cast<uint16_t>(data[{}]) << 8);",
697                pad,
698                word_type,
699                var_name,
700                offset,
701                offset + 1
702            )
703            .unwrap();
704        }
705        (4, ByteEndian::Big) => {
706            writeln!(
707                out,
708                "{}{} {} = (static_cast<uint32_t>(data[{}]) << 24) | (static_cast<uint32_t>(data[{}]) << 16) | (static_cast<uint32_t>(data[{}]) << 8) | data[{}];",
709                pad, word_type, var_name, offset, offset + 1, offset + 2, offset + 3
710            ).unwrap();
711        }
712        (4, ByteEndian::Little) => {
713            writeln!(
714                out,
715                "{}{} {} = data[{}] | (static_cast<uint32_t>(data[{}]) << 8) | (static_cast<uint32_t>(data[{}]) << 16) | (static_cast<uint32_t>(data[{}]) << 24);",
716                pad, word_type, var_name, offset, offset + 1, offset + 2, offset + 3
717            ).unwrap();
718        }
719        _ => {
720            writeln!(out, "{}// unsupported width", pad).unwrap();
721        }
722    }
723}
724
725/// Unit read expression (inline).
726fn unit_read_expr(unit: u32, _word_type: &str, unit_bytes: u32, endian: &ByteEndian) -> String {
727    if unit == 0 {
728        return "opcode".to_string();
729    }
730    let offset = unit * unit_bytes;
731    match (unit_bytes, endian) {
732        (1, _) => format!("data[{}]", offset),
733        (2, ByteEndian::Big) => format!(
734            "(static_cast<uint16_t>(data[{}]) << 8 | data[{}])",
735            offset,
736            offset + 1
737        ),
738        (2, ByteEndian::Little) => format!(
739            "(data[{}] | static_cast<uint16_t>(data[{}]) << 8)",
740            offset,
741            offset + 1
742        ),
743        (4, ByteEndian::Big) => format!(
744            "(static_cast<uint32_t>(data[{}]) << 24 | static_cast<uint32_t>(data[{}]) << 16 | static_cast<uint32_t>(data[{}]) << 8 | data[{}])",
745            offset,
746            offset + 1,
747            offset + 2,
748            offset + 3
749        ),
750        (4, ByteEndian::Little) => format!(
751            "(data[{}] | static_cast<uint32_t>(data[{}]) << 8 | static_cast<uint32_t>(data[{}]) << 16 | static_cast<uint32_t>(data[{}]) << 24)",
752            offset,
753            offset + 1,
754            offset + 2,
755            offset + 3
756        ),
757        _ => "0".to_string(),
758    }
759}
760
761/// Extract bits expression.
762fn extract_expr(
763    var: &str,
764    ranges: &[BitRange],
765    word_type: &str,
766    unit_bytes: u32,
767    endian: &ByteEndian,
768) -> String {
769    if ranges.is_empty() {
770        return "0".to_string();
771    }
772
773    if ranges.len() == 1 {
774        let range = ranges[0];
775        let source = if range.unit == 0 {
776            var.to_string()
777        } else {
778            unit_read_expr(range.unit, word_type, unit_bytes, endian)
779        };
780        let width = range.width();
781        let shift = range.end;
782        let mask = (1u64 << width) - 1;
783        if shift == 0 {
784            format!("({} & {:#x})", source, mask)
785        } else {
786            format!("(({} >> {}) & {:#x})", source, shift, mask)
787        }
788    } else {
789        let mut parts = Vec::new();
790        let mut accumulated = 0u32;
791        for range in ranges {
792            let source = if range.unit == 0 {
793                var.to_string()
794            } else {
795                unit_read_expr(range.unit, word_type, unit_bytes, endian)
796            };
797            let width = range.width();
798            let shift = range.end;
799            let mask = (1u64 << width) - 1;
800            let extracted = if shift == 0 {
801                format!("({} & {:#x})", source, mask)
802            } else {
803                format!("(({} >> {}) & {:#x})", source, shift, mask)
804            };
805            if accumulated > 0 {
806                parts.push(format!("({} << {})", extracted, accumulated));
807            } else {
808                parts.push(extracted);
809            }
810            accumulated += width;
811        }
812        parts.join(" | ")
813    }
814}
815
816/// Compute mask/value for leaf guard.
817fn leaf_guard(
818    instr: &ValidatedInstruction,
819    word_type: &str,
820    unit_bytes: u32,
821    endian: &ByteEndian,
822) -> Option<String> {
823    let fixed_bits = instr.fixed_bits();
824    if fixed_bits.is_empty() {
825        return None;
826    }
827
828    let mut units_map: HashMap<u32, Vec<(u32, Bit)>> = HashMap::new();
829    for (unit, hw_bit, bit) in fixed_bits {
830        units_map.entry(unit).or_default().push((hw_bit, bit));
831    }
832
833    let mut conditions = Vec::new();
834    for (unit, bits) in &units_map {
835        let (mask, value) = compute_mask_value(bits);
836        if mask != 0 {
837            let source = if *unit == 0 {
838                "opcode".to_string()
839            } else {
840                unit_read_expr(*unit, word_type, unit_bytes, endian)
841            };
842            conditions.push(format!("({} & {:#x}) == {:#x}", source, mask, value));
843        }
844    }
845
846    if conditions.is_empty() {
847        None
848    } else {
849        Some(conditions.join(" && "))
850    }
851}
852
853fn compute_mask_value(fixed_bits: &[(u32, Bit)]) -> (u64, u64) {
854    let mut mask: u64 = 0;
855    let mut value: u64 = 0;
856    for &(bit_pos, bit_val) in fixed_bits {
857        if bit_val == Bit::Wildcard {
858            continue;
859        }
860        mask |= 1u64 << bit_pos;
861        if bit_val == Bit::One {
862            value |= 1u64 << bit_pos;
863        }
864    }
865    (mask, value)
866}
867
868/// Apply transforms to an extraction expression.
869fn apply_transforms(
870    extract: &str,
871    resolved: &ResolvedFieldType,
872    type_maps: &HashMap<String, String>,
873) -> String {
874    let mut expr = extract.to_string();
875
876    for transform in &resolved.transforms {
877        match transform {
878            Transform::SignExtend(n) => {
879                let signed = cpp_signed_type(&resolved.base_type);
880                let bits = type_bits(&resolved.base_type);
881                expr = format!(
882                    "static_cast<{}>(static_cast<{}>(({}) << ({} - {})) >> ({} - {}))",
883                    cpp_type_for(&resolved.base_type),
884                    signed,
885                    expr,
886                    bits,
887                    n,
888                    bits,
889                    n
890                );
891            }
892            Transform::ZeroExtend(_) => {}
893            Transform::ShiftLeft(n) => {
894                expr = format!("(({}) << {})", expr, n);
895            }
896        }
897    }
898
899    // Sub-decoder dispatch
900    if let Some(ref sd_name) = resolved.sub_decoder {
901        let decode_fn = format!("decode_{}", to_snake_case(sd_name));
902        return format!(
903            "{}(static_cast<{}>({})).value()",
904            decode_fn,
905            cpp_word_type(type_bits(&resolved.base_type).min(32)),
906            expr
907        );
908    }
909
910    // Type map wrapper (user override)
911    if let Some(alias) = &resolved.alias_name {
912        if let Some(mapped) = type_maps.get(alias) {
913            return format!("static_cast<{}>({})", mapped, expr);
914        }
915    }
916
917    // Display format -> built-in wrapper types
918    match resolved.display_format {
919        Some(DisplayFormat::SignedHex) => {
920            return format!("SignedHex(static_cast<int32_t>({}))", expr);
921        }
922        Some(DisplayFormat::Hex) => {
923            return format!("Hex(static_cast<uint32_t>({}))", expr);
924        }
925        None => {}
926    }
927
928    if resolved.base_type == "bool" {
929        format!("({}) != 0", expr)
930    } else {
931        format!(
932            "static_cast<{}>({})",
933            cpp_type_for(&resolved.base_type),
934            expr
935        )
936    }
937}
938
939/// Emit the decision tree as C++ switch/if-else.
940fn emit_tree_cpp(
941    out: &mut String,
942    node: &DecodeNode,
943    def: &ValidatedDef,
944    indent: usize,
945    word_type: &str,
946    unit_bytes: u32,
947    endian: &ByteEndian,
948    variable_length: bool,
949    type_maps: &HashMap<String, String>,
950) {
951    let pad = "    ".repeat(indent);
952    match node {
953        DecodeNode::Leaf { instruction_index } => {
954            let instr = &def.instructions[*instruction_index];
955            if let Some(guard) = leaf_guard(instr, word_type, unit_bytes, endian) {
956                writeln!(out, "{}if ({}) {{", pad, guard).unwrap();
957                emit_return_instruction(
958                    out,
959                    instr,
960                    indent + 1,
961                    word_type,
962                    unit_bytes,
963                    endian,
964                    variable_length,
965                    type_maps,
966                );
967                writeln!(out, "{}}} else {{", pad).unwrap();
968                writeln!(out, "{}    return std::nullopt;", pad).unwrap();
969                writeln!(out, "{}}}", pad).unwrap();
970            } else {
971                emit_return_instruction(
972                    out,
973                    instr,
974                    indent,
975                    word_type,
976                    unit_bytes,
977                    endian,
978                    variable_length,
979                    type_maps,
980                );
981            }
982        }
983        DecodeNode::PriorityLeaves { candidates } => {
984            for (i, &idx) in candidates.iter().enumerate() {
985                let instr = &def.instructions[idx];
986                let guard = leaf_guard(instr, word_type, unit_bytes, endian);
987                if i == 0 {
988                    if let Some(g) = guard {
989                        writeln!(out, "{}if ({}) {{", pad, g).unwrap();
990                        emit_return_instruction(
991                            out,
992                            instr,
993                            indent + 1,
994                            word_type,
995                            unit_bytes,
996                            endian,
997                            variable_length,
998                            type_maps,
999                        );
1000                    } else {
1001                        emit_return_instruction(
1002                            out,
1003                            instr,
1004                            indent,
1005                            word_type,
1006                            unit_bytes,
1007                            endian,
1008                            variable_length,
1009                            type_maps,
1010                        );
1011                        break;
1012                    }
1013                } else if i == candidates.len() - 1 {
1014                    if let Some(g) = guard {
1015                        writeln!(out, "{}}} else if ({}) {{", pad, g).unwrap();
1016                        emit_return_instruction(
1017                            out,
1018                            instr,
1019                            indent + 1,
1020                            word_type,
1021                            unit_bytes,
1022                            endian,
1023                            variable_length,
1024                            type_maps,
1025                        );
1026                        writeln!(out, "{}}} else {{", pad).unwrap();
1027                        writeln!(out, "{}    return std::nullopt;", pad).unwrap();
1028                        writeln!(out, "{}}}", pad).unwrap();
1029                    } else {
1030                        writeln!(out, "{}}} else {{", pad).unwrap();
1031                        emit_return_instruction(
1032                            out,
1033                            instr,
1034                            indent + 1,
1035                            word_type,
1036                            unit_bytes,
1037                            endian,
1038                            variable_length,
1039                            type_maps,
1040                        );
1041                        writeln!(out, "{}}}", pad).unwrap();
1042                    }
1043                } else {
1044                    let g = guard.unwrap_or_else(|| "true".to_string());
1045                    writeln!(out, "{}}} else if ({}) {{", pad, g).unwrap();
1046                    emit_return_instruction(
1047                        out,
1048                        instr,
1049                        indent + 1,
1050                        word_type,
1051                        unit_bytes,
1052                        endian,
1053                        variable_length,
1054                        type_maps,
1055                    );
1056                }
1057            }
1058        }
1059        DecodeNode::Fail => {
1060            writeln!(out, "{}return std::nullopt;", pad).unwrap();
1061        }
1062        DecodeNode::Branch {
1063            range,
1064            arms,
1065            default,
1066        } => {
1067            let ext = extract_expr("opcode", &[*range], word_type, unit_bytes, endian);
1068            writeln!(out, "{}switch ({}) {{", pad, ext).unwrap();
1069            for (value, child) in arms {
1070                writeln!(out, "{}case {:#x}: {{", pad, value).unwrap();
1071                emit_tree_cpp(
1072                    out,
1073                    child,
1074                    def,
1075                    indent + 1,
1076                    word_type,
1077                    unit_bytes,
1078                    endian,
1079                    variable_length,
1080                    type_maps,
1081                );
1082                writeln!(out, "{}    break;", pad).unwrap();
1083                writeln!(out, "{}}}", pad).unwrap();
1084            }
1085            writeln!(out, "{}default: {{", pad).unwrap();
1086            emit_tree_cpp(
1087                out,
1088                default,
1089                def,
1090                indent + 1,
1091                word_type,
1092                unit_bytes,
1093                endian,
1094                variable_length,
1095                type_maps,
1096            );
1097            writeln!(out, "{}    break;", pad).unwrap();
1098            writeln!(out, "{}}}", pad).unwrap();
1099            writeln!(out, "{}}}", pad).unwrap();
1100        }
1101    }
1102}
1103
1104/// Emit code to return a decoded Instruction.
1105fn emit_return_instruction(
1106    out: &mut String,
1107    instr: &ValidatedInstruction,
1108    indent: usize,
1109    word_type: &str,
1110    unit_bytes: u32,
1111    endian: &ByteEndian,
1112    variable_length: bool,
1113    type_maps: &HashMap<String, String>,
1114) {
1115    let pad = "    ".repeat(indent);
1116    let unit_count = instr.unit_count();
1117    let bytes_consumed = unit_count * unit_bytes;
1118    let variant = to_pascal_case(&instr.name);
1119
1120    if variable_length && unit_count > 1 {
1121        writeln!(
1122            out,
1123            "{}if (len < {}) return std::nullopt;",
1124            pad, bytes_consumed
1125        )
1126        .unwrap();
1127    }
1128
1129    if instr.resolved_fields.is_empty() {
1130        writeln!(
1131            out,
1132            "{}return Instruction {{ Opcode::{}, {} }};",
1133            pad, variant, bytes_consumed
1134        )
1135        .unwrap();
1136    } else {
1137        writeln!(out, "{}{{", pad).unwrap();
1138        writeln!(out, "{}    Instruction insn{{}};", pad).unwrap();
1139        writeln!(out, "{}    insn.opcode = Opcode::{};", pad, variant).unwrap();
1140        writeln!(out, "{}    insn.size = {};", pad, bytes_consumed).unwrap();
1141        for field in &instr.resolved_fields {
1142            let ext = extract_expr("opcode", &field.ranges, word_type, unit_bytes, endian);
1143            let expr = apply_transforms(&ext, &field.resolved_type, type_maps);
1144            writeln!(
1145                out,
1146                "{}    insn.{}.{} = {};",
1147                pad, instr.name, field.name, expr
1148            )
1149            .unwrap();
1150        }
1151        writeln!(out, "{}    return insn;", pad).unwrap();
1152        writeln!(out, "{}}}", pad).unwrap();
1153    }
1154}
1155
1156/// Emit the format/disassemble function using std::format.
1157fn emit_format_function(
1158    out: &mut String,
1159    def: &ValidatedDef,
1160    _type_maps: &HashMap<String, String>,
1161    _opts: &CppOptions,
1162) {
1163    writeln!(out, "inline std::string format(const Instruction& insn) {{").unwrap();
1164    writeln!(out, "    switch (insn.opcode) {{").unwrap();
1165
1166    for instr in &def.instructions {
1167        let variant = to_pascal_case(&instr.name);
1168        writeln!(out, "    case Opcode::{}: {{", variant).unwrap();
1169
1170        if instr.format_lines.is_empty() {
1171            writeln!(out, "        return \"{}\";", instr.name).unwrap();
1172        } else {
1173            emit_format_lines_cpp(out, instr, 2);
1174        }
1175
1176        writeln!(out, "    }}").unwrap();
1177    }
1178
1179    writeln!(out, "    default: return \"???\";").unwrap();
1180    writeln!(out, "    }}").unwrap();
1181    writeln!(out, "}}").unwrap();
1182    writeln!(out).unwrap();
1183}
1184
1185/// Emit format lines for a single instruction using std::format.
1186fn emit_format_lines_cpp(out: &mut String, instr: &ValidatedInstruction, indent: usize) {
1187    let pad = "    ".repeat(indent);
1188
1189    if instr.format_lines.len() == 1 && instr.format_lines[0].guard.is_none() {
1190        let fl = &instr.format_lines[0];
1191        emit_std_format_call(out, &fl.pieces, instr, &pad);
1192        return;
1193    }
1194
1195    for (i, fl) in instr.format_lines.iter().enumerate() {
1196        if let Some(guard) = &fl.guard {
1197            let guard_code = guard_to_cpp(guard, instr);
1198            if i == 0 {
1199                writeln!(out, "{}if ({}) {{", pad, guard_code).unwrap();
1200            } else {
1201                writeln!(out, "{}}} else if ({}) {{", pad, guard_code).unwrap();
1202            }
1203            emit_std_format_call(out, &fl.pieces, instr, &format!("{}    ", pad));
1204        } else {
1205            if i > 0 {
1206                writeln!(out, "{}}} else {{", pad).unwrap();
1207            }
1208            emit_std_format_call(out, &fl.pieces, instr, &format!("{}    ", pad));
1209        }
1210    }
1211
1212    if instr.format_lines.len() > 1
1213        || instr
1214            .format_lines
1215            .first()
1216            .map_or(false, |fl| fl.guard.is_some())
1217    {
1218        writeln!(out, "{}}}", pad).unwrap();
1219    }
1220}
1221
1222/// Emit a return std::format(...) call for format pieces.
1223/// Fields are passed as arguments; user types participate via std::formatter specializations.
1224fn emit_std_format_call(
1225    out: &mut String,
1226    pieces: &[FormatPiece],
1227    instr: &ValidatedInstruction,
1228    pad: &str,
1229) {
1230    let mut fmt_str = String::new();
1231    let mut args: Vec<String> = Vec::new();
1232
1233    for piece in pieces {
1234        match piece {
1235            FormatPiece::Literal(lit) => {
1236                // Escape { and } for std::format
1237                for ch in lit.chars() {
1238                    match ch {
1239                        '{' => fmt_str.push_str("{{"),
1240                        '}' => fmt_str.push_str("}}"),
1241                        _ => fmt_str.push(ch),
1242                    }
1243                }
1244            }
1245            FormatPiece::FieldRef { expr, spec } => {
1246                let cpp_expr = expr_to_cpp(expr, instr);
1247                // Build std::format placeholder
1248                if let Some(spec) = spec {
1249                    fmt_str.push_str(&format!("{{:{}}}", translate_std_format_spec(spec)));
1250                } else {
1251                    fmt_str.push_str("{}");
1252                }
1253                args.push(cpp_expr);
1254            }
1255        }
1256    }
1257
1258    if args.is_empty() {
1259        writeln!(out, "{}return \"{}\";", pad, fmt_str).unwrap();
1260    } else {
1261        writeln!(
1262            out,
1263            "{}return std::format(\"{}\", {});",
1264            pad,
1265            fmt_str,
1266            args.join(", ")
1267        )
1268        .unwrap();
1269    }
1270}
1271
1272/// Translate chipi/Rust format specs to std::format specs.
1273/// Most are identical since std::format uses Python-style specs.
1274fn translate_std_format_spec(spec: &str) -> String {
1275    // chipi specs like "04x", "#06x", "#x" are already valid std::format specs
1276    spec.to_string()
1277}
1278
1279/// Convert a FormatExpr to a C++ expression string.
1280fn expr_to_cpp(expr: &FormatExpr, instr: &ValidatedInstruction) -> String {
1281    match expr {
1282        FormatExpr::Field(name) => {
1283            format!("insn.{}.{}", instr.name, name)
1284        }
1285        FormatExpr::Ternary {
1286            field,
1287            if_nonzero,
1288            if_zero,
1289        } => {
1290            let else_val = if_zero.as_deref().unwrap_or("");
1291            format!(
1292                "(insn.{}.{} ? \"{}\" : \"{}\")",
1293                instr.name, field, if_nonzero, else_val
1294            )
1295        }
1296        FormatExpr::Arithmetic { left, op, right } => {
1297            let l = expr_to_cpp(left, instr);
1298            let r = expr_to_cpp(right, instr);
1299            let op_str = match op {
1300                ArithOp::Add => "+",
1301                ArithOp::Sub => "-",
1302                ArithOp::Mul => "*",
1303                ArithOp::Div => "/",
1304                ArithOp::Mod => "%",
1305            };
1306            format!("({} {} {})", l, op_str, r)
1307        }
1308        FormatExpr::IntLiteral(val) => format!("{}", val),
1309        FormatExpr::MapCall { map_name, args } => {
1310            let arg_strs: Vec<String> = args.iter().map(|a| expr_to_cpp(a, instr)).collect();
1311            format!("{}({})", map_name, arg_strs.join(", "))
1312        }
1313        FormatExpr::BuiltinCall { func, args } => {
1314            let arg_strs: Vec<String> = args.iter().map(|a| expr_to_cpp(a, instr)).collect();
1315            match func {
1316                BuiltinFunc::RotateRight => {
1317                    format!(
1318                        "((static_cast<uint32_t>({}) >> {}) | (static_cast<uint32_t>({}) << (32 - {})))",
1319                        arg_strs.first().map(|s| s.as_str()).unwrap_or("0"),
1320                        arg_strs.get(1).map(|s| s.as_str()).unwrap_or("0"),
1321                        arg_strs.first().map(|s| s.as_str()).unwrap_or("0"),
1322                        arg_strs.get(1).map(|s| s.as_str()).unwrap_or("0"),
1323                    )
1324                }
1325                BuiltinFunc::RotateLeft => {
1326                    format!(
1327                        "((static_cast<uint32_t>({}) << {}) | (static_cast<uint32_t>({}) >> (32 - {})))",
1328                        arg_strs.first().map(|s| s.as_str()).unwrap_or("0"),
1329                        arg_strs.get(1).map(|s| s.as_str()).unwrap_or("0"),
1330                        arg_strs.first().map(|s| s.as_str()).unwrap_or("0"),
1331                        arg_strs.get(1).map(|s| s.as_str()).unwrap_or("0"),
1332                    )
1333                }
1334            }
1335        }
1336        FormatExpr::SubDecoderAccess { field, fragment } => {
1337            format!("insn.{}.{}.{}", instr.name, field, fragment)
1338        }
1339    }
1340}
1341
1342/// Convert a guard condition to C++ code.
1343fn guard_to_cpp(guard: &Guard, instr: &ValidatedInstruction) -> String {
1344    let conditions: Vec<String> = guard
1345        .conditions
1346        .iter()
1347        .map(|cond| {
1348            let left = guard_operand_cpp(&cond.left, instr);
1349            let right = guard_operand_cpp(&cond.right, instr);
1350            let op = match cond.op {
1351                CompareOp::Eq => "==",
1352                CompareOp::Ne => "!=",
1353                CompareOp::Lt => "<",
1354                CompareOp::Le => "<=",
1355                CompareOp::Gt => ">",
1356                CompareOp::Ge => ">=",
1357            };
1358            format!("{} {} {}", left, op, right)
1359        })
1360        .collect();
1361    conditions.join(" && ")
1362}
1363
1364fn guard_operand_cpp(operand: &GuardOperand, instr: &ValidatedInstruction) -> String {
1365    match operand {
1366        GuardOperand::Field(name) => format!("insn.{}.{}", instr.name, name),
1367        GuardOperand::Literal(val) => format!("{}", val),
1368        GuardOperand::Expr { left, op, right } => {
1369            let l = guard_operand_cpp(left, instr);
1370            let r = guard_operand_cpp(right, instr);
1371            let op_str = match op {
1372                ArithOp::Add => "+",
1373                ArithOp::Sub => "-",
1374                ArithOp::Mul => "*",
1375                ArithOp::Div => "/",
1376                ArithOp::Mod => "%",
1377            };
1378            format!("({} {} {})", l, op_str, r)
1379        }
1380    }
1381}