Skip to main content

chipi_core/
lut_gen.rs

1//! Function-pointer LUT generation. Consumes a validated definition and a
2//! dispatch tree.
3//!
4//! The output is one or more static `[Handler; N]` arrays. There is one
5//! per tree `Branch`. Each table has a small inline dispatch function next
6//! to it. The result is an emulator-style lookup-table dispatch.
7//!
8//! ## Basic usage (one handler per instruction)
9//!
10//! Each instruction maps directly to `{handler_mod}::{instruction_name}`.
11//!
12//! ## Grouped handlers with const generics
13//!
14//! Multiple instructions can share a single handler via const generics.
15//! Example:
16//!
17//! ```text
18//! // Generated LUT entry for addi (in the "alu" group):
19//! crate::cpu::interpreter::alu::<{ OP_ADDI }>
20//!
21//! // User implementation:
22//! pub fn alu<const OP: u32>(ctx: &mut Cpu, instr: Instruction) {
23//!     match OP {
24//!         OP_ADDI  => { /* ... */ }
25//!         OP_ADDIS => { /* ... */ }
26//!         _ => unreachable!(),
27//!     }
28//! }
29//! ```
30//!
31//! The compiler monomorphizes each instantiation. Runtime dispatch is one
32//! indirect call into the table. There is no extra branching overhead.
33
34use std::collections::HashMap;
35use std::fmt::Write;
36
37use crate::error::{Error, ErrorKind, Span};
38use crate::tree::DecodeNode;
39use crate::types::*;
40
41struct Ctx<'a> {
42    def: &'a ValidatedDef,
43    handler_mod: &'a str,
44    ctx_type: &'a str,
45    /// Type of the second parameter passed to every handler.
46    instr_type: &'a str,
47    /// Expression that yields a `u32` from the local `instr` or `opcode`
48    /// parameter inside a generated dispatch function.
49    raw_expr: &'a str,
50    uid: usize,
51    /// Accumulated auxiliary items in emission order. Holds tables and
52    /// dispatch functions.
53    buf: String,
54    /// Map from instruction name to its group handler function name. Empty
55    /// means no grouping.
56    groups: &'a HashMap<String, String>,
57    /// Optional handler for unmatched opcodes. Replaces the default
58    /// `todo!()` panic body of `_unimpl`.
59    invalid_handler: Option<&'a str>,
60}
61
62impl<'a> Ctx<'a> {
63    fn uid(&mut self) -> usize {
64        let id = self.uid;
65        self.uid += 1;
66        id
67    }
68
69    /// Parameter name used in generated function signatures.
70    /// Returns `"opcode"` for primitive integer types. Returns `"instr"`
71    /// for wrapper types.
72    fn param_name(&self) -> &'static str {
73        if is_primitive(self.instr_type) {
74            "opcode"
75        } else {
76            "instr"
77        }
78    }
79
80    /// Return the handler expression for `instr_name` as it should appear in a
81    /// static table or a direct call.
82    ///
83    /// - Ungrouped: `handler_mod::instr_name`
84    /// - Grouped:   `handler_mod::group_name::<{ OP_INSTR_NAME }>`
85    fn handler_for(&self, instr_name: &str) -> String {
86        if let Some(group) = self.groups.get(instr_name) {
87            format!(
88                "{}::{group}::<{{ {} }}>",
89                self.handler_mod,
90                op_const_name(instr_name)
91            )
92        } else {
93            format!("{}::{}", self.handler_mod, instr_name)
94        }
95    }
96}
97
98/// Generate the Rust source for a function-pointer LUT.
99///
100/// `handler_mod`: module path where handlers live. Example:
101/// `"crate::cpu::interpreter"`.
102///
103/// `ctx_type`: context type passed to every handler. Example:
104/// `"crate::gekko::Gekko"`.
105///
106/// `groups`: map from instruction name to group handler name. An empty map
107/// gives one handler per instruction.
108///
109/// `instr_type`: type of the second handler parameter. Pass `None` to
110/// derive from the spec's `width`. The auto type is `u8`, `u16`, or `u32`.
111/// Pass `Some("crate::cpu::Instruction")` to use a wrapper type.
112///
113/// `raw_expr`: expression that yields the underlying integer from the
114/// `instr` local. Ignored when `instr_type` is `None`. Defaults to
115/// `"instr.0"` for wrapper types.
116///
117/// `invalid_handler`: handler called for unmatched opcodes. `None` falls
118/// back to a `todo!()` panic.
119///
120/// The generated file contains:
121///
122/// - `pub const OP_*: u32`. One constant per instruction. Usable as a
123///   const-generic argument.
124/// - `pub type Handler = fn(&mut Ctx, InstrType);`
125/// - Static dispatch tables `_T0`, `_T1`, etc. One per bit-range level.
126/// - `pub fn dispatch(ctx: &mut Ctx, instr: InstrType)`.
127pub fn generate_lut_code(
128    def: &ValidatedDef,
129    tree: &DecodeNode,
130    handler_mod: &str,
131    ctx_type: &str,
132    groups: &HashMap<String, String>,
133    instr_type: Option<&str>,
134    raw_expr: Option<&str>,
135    dispatch: crate::Dispatch,
136    invalid_handler: Option<&str>,
137) -> Result<String, Vec<Error>> {
138    let instr_type = instr_type.unwrap_or_else(|| width_to_type(def.config.width));
139    let raw_expr = raw_expr.unwrap_or_else(|| {
140        if is_primitive(instr_type) {
141            "opcode"
142        } else {
143            "instr.0"
144        }
145    });
146
147    let mut ctx = Ctx {
148        def,
149        handler_mod,
150        ctx_type,
151        instr_type,
152        raw_expr,
153        uid: 0,
154        buf: String::new(),
155        groups,
156        invalid_handler,
157    };
158
159    let ct = ctx_type;
160    let it = instr_type;
161    let pn = ctx.param_name();
162    let re = raw_expr;
163
164    let mut out = String::new();
165    writeln!(
166        out,
167        "// Auto-generated by https://github.com/ioncodes/chipi"
168    )
169    .unwrap();
170    writeln!(out, "// Do not edit.").unwrap();
171    writeln!(out).unwrap();
172
173    // OP_* constants
174    writeln!(
175        out,
176        "// Per-instruction constants. Use as const-generic arguments:"
177    )
178    .unwrap();
179    writeln!(
180        out,
181        "// `fn alu<const OP: u32>(ctx, instr) {{ match OP {{ OP_ADDI => ... }} }}`"
182    )
183    .unwrap();
184    for (i, instr) in def.instructions.iter().enumerate() {
185        writeln!(out, "pub const {}: u32 = {i};", op_const_name(&instr.name)).unwrap();
186    }
187    writeln!(out).unwrap();
188
189    match dispatch {
190        crate::Dispatch::FnPtrLut => {
191            let root = emit_node(tree, &mut ctx);
192
193            writeln!(out, "pub type Handler = fn(&mut {ct}, {it});").unwrap();
194            writeln!(out).unwrap();
195            writeln!(out, "#[cold]").unwrap();
196            writeln!(out, "#[inline(never)]").unwrap();
197            writeln!(out, "fn _unimpl(_ctx: &mut {ct}, {pn}: {it}) {{").unwrap();
198            if let Some(handler) = invalid_handler {
199                writeln!(out, "    {handler}(_ctx, {pn})").unwrap();
200            } else {
201                writeln!(out, "    todo!(\"unimplemented opcode {{:#010x}}\", {re})").unwrap();
202            }
203            writeln!(out, "}}").unwrap();
204            writeln!(out).unwrap();
205            out.push_str(&ctx.buf);
206            writeln!(out, "/// Dispatch an instruction word to its handler.").unwrap();
207            writeln!(out, "#[inline(always)]").unwrap();
208            writeln!(out, "pub fn dispatch(ctx: &mut {ct}, {pn}: {it}) {{").unwrap();
209            writeln!(out, "    {root}(ctx, {pn});").unwrap();
210            writeln!(out, "}}").unwrap();
211        }
212        crate::Dispatch::JumpTable => {
213            writeln!(out, "/// Dispatch an instruction word to its handler.").unwrap();
214            writeln!(out, "#[inline(always)]").unwrap();
215            writeln!(out, "pub fn dispatch(ctx: &mut {ct}, {pn}: {it}) {{").unwrap();
216            emit_jump_table_node(&mut out, tree, &mut ctx, 1);
217            writeln!(out, "}}").unwrap();
218        }
219        crate::Dispatch::FlatLut => {
220            emit_flat_lut(
221                &mut out,
222                FlatTargetSource::TopLevel(def),
223                &mut ctx,
224                "dispatch",
225                ct,
226                it,
227                pn,
228                re,
229            )?;
230        }
231        crate::Dispatch::FlatMatch => {
232            emit_flat_match(
233                &mut out,
234                FlatTargetSource::TopLevel(def),
235                &mut ctx,
236                "dispatch",
237                ct,
238                it,
239                pn,
240                re,
241            )?;
242        }
243    }
244
245    // Generate instr_size() for variable-length decoders
246    if needs_variable_length(def) {
247        writeln!(out).unwrap();
248        writeln!(
249            out,
250            "/// Returns the size of the instruction in units (words)."
251        )
252        .unwrap();
253        writeln!(out, "#[inline(always)]").unwrap();
254        writeln!(out, "pub fn instr_size({pn}: {it}) -> u32 {{").unwrap();
255        emit_size_node(&mut out, tree, def, &re, 1);
256        writeln!(out, "}}").unwrap();
257    }
258
259    Ok(out)
260}
261
262/// Generate handler stub functions for every instruction.
263///
264/// `group_to_instrs` maps a group handler name to the instructions it
265/// covers. An empty map gives one stub per instruction.
266///
267/// `lut_mod` is the Rust module path where the generated `OP_*` constants
268/// live. Example: `"crate::cpu::lut"`. Required when groups are non-empty.
269/// The const-generic stubs need this to reference the constants.
270///
271/// `instr_type` is the type of the second parameter. Pass `None` to derive
272/// from the spec's `width`.
273///
274/// Run this once to bootstrap an interpreter module.
275pub fn generate_stubs_code(
276    def: &ValidatedDef,
277    ctx_type: &str,
278    group_to_instrs: &HashMap<String, Vec<String>>,
279    lut_mod: Option<&str>,
280    instr_type: Option<&str>,
281) -> String {
282    let instr_type = instr_type.unwrap_or_else(|| width_to_type(def.config.width));
283
284    // Reverse map: instr_name -> group fn name
285    let instr_to_group: HashMap<&str, &str> = group_to_instrs
286        .iter()
287        .flat_map(|(g, v)| v.iter().map(move |i| (i.as_str(), g.as_str())))
288        .collect();
289
290    let it = instr_type;
291    let pn = if is_primitive(instr_type) {
292        "_opcode"
293    } else {
294        "_instr"
295    };
296
297    let mut out = String::new();
298    writeln!(
299        out,
300        "// Handler stubs. Implement each function and remove the todo!()"
301    )
302    .unwrap();
303    writeln!(out, "#![allow(unused_variables)]").unwrap();
304    writeln!(out).unwrap();
305
306    // If there are groups and we know where the OP constants live, import them.
307    if !group_to_instrs.is_empty() {
308        if let Some(lut) = lut_mod {
309            writeln!(out, "use {lut}::*;").unwrap();
310            writeln!(out).unwrap();
311        }
312    }
313
314    // Emit one const-generic stub per group, with a match arm per instruction.
315    let mut emitted_groups: Vec<&str> = group_to_instrs.keys().map(|s| s.as_str()).collect();
316    emitted_groups.sort();
317    for group in emitted_groups {
318        let instrs = &group_to_instrs[group];
319        writeln!(
320            out,
321            "pub fn {group}<const OP: u32>(_ctx: &mut {ctx_type}, {pn}: {it}) {{"
322        )
323        .unwrap();
324        writeln!(out, "    match OP {{").unwrap();
325        for instr in instrs {
326            writeln!(
327                out,
328                "        {} => todo!(\"{instr}\"),",
329                op_const_name(instr)
330            )
331            .unwrap();
332        }
333        writeln!(out, "        _ => unreachable!(),").unwrap();
334        writeln!(out, "    }}").unwrap();
335        writeln!(out, "}}").unwrap();
336        writeln!(out).unwrap();
337    }
338
339    // Emit individual stubs for ungrouped instructions.
340    for instr in &def.instructions {
341        if instr_to_group.contains_key(instr.name.as_str()) {
342            continue;
343        }
344        writeln!(
345            out,
346            "pub fn {}(_ctx: &mut {ctx_type}, {pn}: {it}) {{ todo!(\"{}\") }}",
347            instr.name, instr.name,
348        )
349        .unwrap();
350    }
351
352    out
353}
354
355fn emit_node(node: &DecodeNode, ctx: &mut Ctx) -> String {
356    match node {
357        DecodeNode::Fail => "_unimpl".to_string(),
358
359        DecodeNode::Leaf { instruction_index } => {
360            ctx.handler_for(&ctx.def.instructions[*instruction_index].name)
361        }
362
363        DecodeNode::PriorityLeaves { candidates } => {
364            let id = ctx.uid();
365            let fn_name = format!("_priority_{id}");
366            // Clone these strings before the loop so we can mutably borrow ctx.buf below.
367            let ct = ctx.ctx_type.to_string();
368            let it = ctx.instr_type.to_string();
369            let pn = ctx.param_name();
370            let re = ctx.raw_expr.to_string();
371
372            let mut body = String::new();
373            writeln!(body, "#[inline(always)]").unwrap();
374            writeln!(body, "fn {fn_name}(ctx: &mut {ct}, {pn}: {it}) {{").unwrap();
375
376            let mut has_open_branch = false;
377            for (i, &idx) in candidates.iter().enumerate() {
378                let handler = ctx.handler_for(&ctx.def.instructions[idx].name);
379                let guard = full_guard_expr(&ctx.def.instructions[idx], &re);
380
381                match (i, guard) {
382                    (0, Some(g)) => {
383                        writeln!(body, "    if {g} {{").unwrap();
384                        writeln!(body, "        {handler}(ctx, {pn});").unwrap();
385                        has_open_branch = true;
386                    }
387                    (_, Some(g)) => {
388                        writeln!(body, "    }} else if {g} {{").unwrap();
389                        writeln!(body, "        {handler}(ctx, {pn});").unwrap();
390                    }
391                    (0, None) => {
392                        writeln!(body, "    {handler}(ctx, {pn});").unwrap();
393                        has_open_branch = false;
394                        break;
395                    }
396                    (_, None) => {
397                        writeln!(body, "    }} else {{").unwrap();
398                        writeln!(body, "        {handler}(ctx, {pn});").unwrap();
399                        writeln!(body, "    }}").unwrap();
400                        has_open_branch = false;
401                        break;
402                    }
403                }
404            }
405            if has_open_branch {
406                writeln!(body, "    }}").unwrap();
407            }
408            writeln!(body, "}}\n").unwrap();
409
410            ctx.buf.push_str(&body);
411            fn_name
412        }
413
414        DecodeNode::Branch {
415            range,
416            arms,
417            default,
418        } => {
419            let id = ctx.uid();
420            let table = format!("_T{id}");
421            let dispatch = format!("_d{id}");
422            let size = 1usize << range.width();
423            // Clone these strings before emitting so we can mutably borrow ctx.buf below.
424            let ct = ctx.ctx_type.to_string();
425            let it = ctx.instr_type.to_string();
426            let pn = ctx.param_name();
427            let re = ctx.raw_expr.to_string();
428
429            let default_handler = emit_node(default, ctx);
430            let mut entries: Vec<String> = vec![default_handler; size];
431            for (value, child) in arms {
432                let handler = emit_node(child, ctx);
433                let idx = *value as usize;
434                if idx < size {
435                    entries[idx] = handler;
436                }
437            }
438
439            writeln!(ctx.buf, "static {table}: [Handler; {size}] = [").unwrap();
440            for (i, entry) in entries.iter().enumerate() {
441                writeln!(ctx.buf, "    {entry}, // {i:#x}").unwrap();
442            }
443            writeln!(ctx.buf, "];\n").unwrap();
444
445            let extract = range_extract_expr(range, &re);
446            writeln!(ctx.buf, "#[inline(always)]").unwrap();
447            writeln!(ctx.buf, "fn {dispatch}(ctx: &mut {ct}, {pn}: {it}) {{").unwrap();
448            writeln!(ctx.buf, "    {table}[({extract}) as usize](ctx, {pn});").unwrap();
449            writeln!(ctx.buf, "}}\n").unwrap();
450
451            dispatch
452        }
453    }
454}
455
456/// Emit a decode tree node as nested match statements for the JumpTable strategy.
457fn emit_jump_table_node(out: &mut String, node: &DecodeNode, ctx: &mut Ctx, indent: usize) {
458    let pad = "    ".repeat(indent);
459    let pn = ctx.param_name();
460    // Clone raw_expr so we can pass `ctx` mutably to recursive calls below.
461    let re = ctx.raw_expr.to_string();
462
463    match node {
464        DecodeNode::Fail => {
465            if let Some(handler) = ctx.invalid_handler {
466                writeln!(out, "{pad}{handler}(ctx, {pn});").unwrap();
467            } else {
468                writeln!(
469                    out,
470                    "{pad}todo!(\"unimplemented opcode {{:#010x}}\", {re});"
471                )
472                .unwrap();
473            }
474        }
475        DecodeNode::Leaf { instruction_index } => {
476            let handler = ctx.handler_for(&ctx.def.instructions[*instruction_index].name);
477            writeln!(out, "{pad}{handler}(ctx, {pn});").unwrap();
478        }
479        DecodeNode::PriorityLeaves { candidates } => {
480            for (i, &idx) in candidates.iter().enumerate() {
481                let handler = ctx.handler_for(&ctx.def.instructions[idx].name);
482                let guard = full_guard_expr(&ctx.def.instructions[idx], &re);
483
484                match (i, guard) {
485                    (0, Some(g)) => {
486                        writeln!(out, "{pad}if {g} {{").unwrap();
487                        writeln!(out, "{pad}    {handler}(ctx, {pn});").unwrap();
488                    }
489                    (_, Some(g)) => {
490                        writeln!(out, "{pad}}} else if {g} {{").unwrap();
491                        writeln!(out, "{pad}    {handler}(ctx, {pn});").unwrap();
492                    }
493                    (0, None) => {
494                        writeln!(out, "{pad}{handler}(ctx, {pn});").unwrap();
495                        return;
496                    }
497                    (_, None) => {
498                        writeln!(out, "{pad}}} else {{").unwrap();
499                        writeln!(out, "{pad}    {handler}(ctx, {pn});").unwrap();
500                        writeln!(out, "{pad}}}").unwrap();
501                        return;
502                    }
503                }
504            }
505            // Close the last if branch
506            writeln!(out, "{pad}}}").unwrap();
507        }
508        DecodeNode::Branch {
509            range,
510            arms,
511            default,
512        } => {
513            let extract = range_extract_expr(range, &re);
514            writeln!(out, "{pad}match ({extract}) as usize {{").unwrap();
515
516            for (value, child) in arms {
517                writeln!(out, "{pad}    {value:#x} => {{").unwrap();
518                emit_jump_table_node(out, child, ctx, indent + 2);
519                writeln!(out, "{pad}    }}").unwrap();
520            }
521
522            writeln!(out, "{pad}    _ => {{").unwrap();
523            emit_jump_table_node(out, default, ctx, indent + 2);
524            writeln!(out, "{pad}    }}").unwrap();
525            writeln!(out, "{pad}}}").unwrap();
526        }
527    }
528}
529
530/// Map a decoder `width` to the corresponding Rust unsigned integer type.
531fn width_to_type(width: u32) -> &'static str {
532    match width {
533        8 => "u8",
534        16 => "u16",
535        _ => "u32",
536    }
537}
538
539/// Returns `true` for the Rust primitive unsigned integer types that chipi may
540/// emit automatically. Used to decide the parameter name (`opcode` vs `instr`)
541/// and whether a raw-extraction expression is needed.
542fn is_primitive(t: &str) -> bool {
543    matches!(t, "u8" | "u16" | "u32")
544}
545
546/// Convert an instruction name to its `OP_*` constant name.
547///
548/// e.g. `"addi"` -> `"OP_ADDI"`, `"ps_add."` -> `"OP_PS_ADD_DOT"`
549pub fn op_const_name(name: &str) -> String {
550    let sanitised = name.to_uppercase().replace('.', "_DOT").replace('-', "_");
551    format!("OP_{sanitised}")
552}
553
554fn full_guard_expr(instr: &ValidatedInstruction, raw_expr: &str) -> Option<String> {
555    let mut mask: u32 = 0;
556    let mut value: u32 = 0;
557    for (unit, hw_bit, bit) in instr.fixed_bits() {
558        if unit != 0 || bit == Bit::Wildcard {
559            continue;
560        }
561        mask |= 1 << hw_bit;
562        if bit == Bit::One {
563            value |= 1 << hw_bit;
564        }
565    }
566    if mask == 0 {
567        None
568    } else {
569        Some(format!("{raw_expr} & {mask:#010x} == {value:#010x}"))
570    }
571}
572
573fn range_extract_expr(range: &BitRange, raw_expr: &str) -> String {
574    let width = range.width();
575    let shift = range.end;
576    let mask = (1u32 << width) - 1;
577    if shift == 0 {
578        format!("{raw_expr} & {mask:#x}")
579    } else {
580        format!("({raw_expr} >> {shift}) & {mask:#x}")
581    }
582}
583
584/// Generate a flat dispatch function for a sub-decoder.
585///
586/// Produces `pub fn dispatch_{snake_name}(ctx: &mut Ctx, val: u8)` that
587/// decodes the extension bits and calls the appropriate handler.
588pub fn generate_subdecoder_dispatch(
589    _def: &ValidatedDef,
590    sd: &ValidatedSubDecoder,
591    handler_mod: &str,
592    ctx_type: &str,
593    groups: &HashMap<String, String>,
594    instr_type: Option<&str>,
595) -> String {
596    let snake_name = sd.name.chars().fold(String::new(), |mut acc, c| {
597        if c.is_uppercase() && !acc.is_empty() {
598            acc.push('_');
599        }
600        acc.push(c.to_ascii_lowercase());
601        acc
602    });
603    let dispatch_fn = format!("dispatch_{snake_name}");
604    let lut_size = 1usize << sd.width;
605
606    let mut out = String::new();
607
608    // OP_EXT_* constants
609    writeln!(out, "// Sub-decoder constants for {}", sd.name).unwrap();
610    for (i, instr) in sd.instructions.iter().enumerate() {
611        writeln!(out, "pub const {}: u32 = {i};", op_const_name(&instr.name)).unwrap();
612    }
613    writeln!(out).unwrap();
614
615    // Build dispatch table: value -> instruction index
616    // Prefer more specific matches (more fixed bits)
617    let mut dispatch_table: Vec<Option<usize>> = vec![None; lut_size];
618    let mut specificity: Vec<u32> = vec![0; lut_size];
619
620    for (instr_idx, instr) in sd.instructions.iter().enumerate() {
621        // Count fixed (non-wildcard) bits
622        let fixed_count: u32 = instr
623            .segments
624            .iter()
625            .map(|seg| {
626                if let Segment::Fixed { pattern, .. } = seg {
627                    pattern
628                        .iter()
629                        .filter(|b| matches!(b, Bit::Zero | Bit::One))
630                        .count() as u32
631                } else {
632                    0
633                }
634            })
635            .sum();
636
637        for val in 0..lut_size {
638            let matches = instr.segments.iter().all(|seg| {
639                if let Segment::Fixed {
640                    ranges, pattern, ..
641                } = seg
642                {
643                    let mut bit_idx = 0;
644                    for range in ranges {
645                        for i in 0..range.width() as usize {
646                            if bit_idx < pattern.len() {
647                                let hw_bit = range.start - i as u32;
648                                let bit_val = (val >> hw_bit) & 1;
649                                match pattern[bit_idx] {
650                                    Bit::Zero if bit_val != 0 => return false,
651                                    Bit::One if bit_val != 1 => return false,
652                                    _ => {}
653                                }
654                                bit_idx += 1;
655                            }
656                        }
657                    }
658                    true
659                } else {
660                    true
661                }
662            });
663            if matches && (dispatch_table[val].is_none() || fixed_count > specificity[val]) {
664                dispatch_table[val] = Some(instr_idx);
665                specificity[val] = fixed_count;
666            }
667        }
668    }
669
670    // Emit jump table dispatch
671    let param_type = width_to_type(sd.width);
672    let (param_name, param_type_str, raw_expr) = if let Some(it) = instr_type {
673        ("instr", it.to_string(), "instr.0".to_string())
674    } else {
675        ("val", param_type.to_string(), "val".to_string())
676    };
677    writeln!(out, "/// Dispatch a sub-decoder extension opcode.").unwrap();
678    writeln!(out, "#[inline(always)]").unwrap();
679    writeln!(
680        out,
681        "pub fn {dispatch_fn}(ctx: &mut {ctx_type}, {param_name}: {param_type_str}) {{"
682    )
683    .unwrap();
684    writeln!(out, "    match {raw_expr} {{").unwrap();
685
686    let mut i = 0;
687    while i < lut_size {
688        let current = dispatch_table[i];
689        let start = i;
690        while i < lut_size && dispatch_table[i] == current {
691            i += 1;
692        }
693        let end = i - 1;
694
695        let pattern = if start == end {
696            format!("{:#x}", start)
697        } else {
698            format!("{:#x}..={:#x}", start, end)
699        };
700
701        match current {
702            Some(idx) => {
703                let instr_name = &sd.instructions[idx].name;
704                let handler = if let Some(group) = groups.get(instr_name) {
705                    format!(
706                        "{}::{group}::<{{ {} }}>",
707                        handler_mod,
708                        op_const_name(instr_name)
709                    )
710                } else {
711                    format!("{}::{}", handler_mod, instr_name)
712                };
713                writeln!(out, "        {pattern} => {handler}(ctx, {param_name}),").unwrap();
714            }
715            None => {
716                writeln!(out, "        {pattern} => {{}},").unwrap();
717            }
718        }
719    }
720
721    writeln!(out, "    }}").unwrap();
722    writeln!(out, "}}").unwrap();
723
724    out
725}
726
727/// Check if any instruction in the decoder requires multiple units.
728fn needs_variable_length(def: &ValidatedDef) -> bool {
729    def.instructions.iter().any(|i| i.unit_count() > 1)
730}
731
732/// Emit a decode tree node that returns the instruction size in units.
733fn emit_size_node(
734    out: &mut String,
735    node: &DecodeNode,
736    def: &ValidatedDef,
737    raw_expr: &str,
738    indent: usize,
739) {
740    let pad = "    ".repeat(indent);
741
742    match node {
743        DecodeNode::Fail => {
744            writeln!(out, "{pad}1").unwrap();
745        }
746        DecodeNode::Leaf { instruction_index } => {
747            let size = def.instructions[*instruction_index].unit_count();
748            writeln!(out, "{pad}{size}").unwrap();
749        }
750        DecodeNode::PriorityLeaves { candidates } => {
751            for (i, &idx) in candidates.iter().enumerate() {
752                let size = def.instructions[idx].unit_count();
753                let guard = full_guard_expr(&def.instructions[idx], raw_expr);
754
755                match (i, guard) {
756                    (0, Some(g)) => {
757                        writeln!(out, "{pad}if {g} {{").unwrap();
758                        writeln!(out, "{pad}    {size}").unwrap();
759                    }
760                    (_, Some(g)) => {
761                        writeln!(out, "{pad}}} else if {g} {{").unwrap();
762                        writeln!(out, "{pad}    {size}").unwrap();
763                    }
764                    (0, None) => {
765                        writeln!(out, "{pad}{size}").unwrap();
766                        return;
767                    }
768                    (_, None) => {
769                        writeln!(out, "{pad}}} else {{").unwrap();
770                        writeln!(out, "{pad}    {size}").unwrap();
771                        writeln!(out, "{pad}}}").unwrap();
772                        return;
773                    }
774                }
775            }
776            writeln!(out, "{pad}}}").unwrap();
777        }
778        DecodeNode::Branch {
779            range,
780            arms,
781            default,
782        } => {
783            let extract = range_extract_expr(range, raw_expr);
784            writeln!(out, "{pad}match ({extract}) as usize {{").unwrap();
785
786            for (value, child) in arms {
787                writeln!(out, "{pad}    {value:#x} => {{").unwrap();
788                emit_size_node(out, child, def, raw_expr, indent + 2);
789                writeln!(out, "{pad}    }}").unwrap();
790            }
791
792            writeln!(out, "{pad}    _ => {{").unwrap();
793            emit_size_node(out, default, def, raw_expr, indent + 2);
794            writeln!(out, "{pad}    }}").unwrap();
795            writeln!(out, "{pad}}}").unwrap();
796        }
797    }
798}
799
800// ---------------------------------------------------------------------------
801// Flat dispatch (flat_lut, flat_match)
802// ---------------------------------------------------------------------------
803
804/// Source of instructions for flat-dispatch enumeration.
805/// `TopLevel` considers only unit-0 segments. `Sub` considers all segments.
806pub(crate) enum FlatTargetSource<'a> {
807    TopLevel(&'a ValidatedDef),
808    Sub(&'a ValidatedSubDecoder),
809}
810
811impl<'a> FlatTargetSource<'a> {
812    fn width(&self) -> u32 {
813        match self {
814            FlatTargetSource::TopLevel(d) => d.config.width,
815            FlatTargetSource::Sub(s) => s.width,
816        }
817    }
818
819    fn instruction_count(&self) -> usize {
820        match self {
821            FlatTargetSource::TopLevel(d) => d.instructions.len(),
822            FlatTargetSource::Sub(s) => s.instructions.len(),
823        }
824    }
825
826    fn instr_name(&self, idx: usize) -> &str {
827        match self {
828            FlatTargetSource::TopLevel(d) => &d.instructions[idx].name,
829            FlatTargetSource::Sub(s) => &s.instructions[idx].name,
830        }
831    }
832
833    /// Whether instruction `idx` matches raw value `raw`.
834    /// Top-level decoders look only at unit-0 segments. Variable-length
835    /// instructions still flat-dispatch on their first word.
836    fn matches(&self, idx: usize, raw: u64) -> bool {
837        let segments = match self {
838            FlatTargetSource::TopLevel(d) => &d.instructions[idx].segments,
839            FlatTargetSource::Sub(s) => &s.instructions[idx].segments,
840        };
841        let only_unit_zero = matches!(self, FlatTargetSource::TopLevel(_));
842
843        for seg in segments {
844            if let Segment::Fixed {
845                ranges, pattern, ..
846            } = seg
847            {
848                let mut bit_idx = 0;
849                for range in ranges {
850                    let in_unit_0 = range.unit == 0;
851                    let range_width = range.width() as usize;
852                    if only_unit_zero && !in_unit_0 {
853                        bit_idx += range_width;
854                        continue;
855                    }
856                    for i in 0..range_width {
857                        if bit_idx >= pattern.len() {
858                            break;
859                        }
860                        let hw_bit = range.start - i as u32;
861                        let bit_val = (raw >> hw_bit) & 1;
862                        match pattern[bit_idx] {
863                            Bit::Zero if bit_val != 0 => return false,
864                            Bit::One if bit_val != 1 => return false,
865                            _ => {}
866                        }
867                        bit_idx += 1;
868                    }
869                }
870            }
871        }
872        true
873    }
874}
875
876/// Build a `Vec<String>` of length `2^width`. Each entry is the resolved
877/// handler expression for that raw value. Returns ambiguity errors if any
878/// value resolves to two or more distinct handler strings.
879fn build_flat_handler_table(
880    src: &FlatTargetSource,
881    handler_for: &dyn Fn(&str) -> String,
882    invalid_handler: &str,
883    span: &Span,
884) -> Result<Vec<String>, Vec<Error>> {
885    let width = src.width();
886    let n: u64 = 1u64 << width;
887    let mut table: Vec<String> = vec![invalid_handler.to_string(); n as usize];
888    let mut errors: Vec<Error> = Vec::new();
889
890    let n_instrs = src.instruction_count();
891
892    for raw in 0..n {
893        let mut matched: Vec<usize> = Vec::new();
894        for idx in 0..n_instrs {
895            if src.matches(idx, raw) {
896                matched.push(idx);
897            }
898        }
899        if matched.is_empty() {
900            // already invalid_handler
901        } else if matched.len() == 1 {
902            table[raw as usize] = handler_for(src.instr_name(matched[0]));
903        } else {
904            let handlers: Vec<String> = matched
905                .iter()
906                .map(|i| handler_for(src.instr_name(*i)))
907                .collect();
908            let first = &handlers[0];
909            if handlers.iter().all(|h| h == first) {
910                table[raw as usize] = first.clone();
911            } else {
912                let pairs: Vec<(String, String)> = matched
913                    .iter()
914                    .zip(handlers.iter())
915                    .map(|(i, h)| (src.instr_name(*i).to_string(), h.clone()))
916                    .collect();
917                errors.push(Error::new(
918                    ErrorKind::FlatDispatchAmbiguous {
919                        raw,
920                        matches: pairs,
921                    },
922                    span.clone(),
923                ));
924            }
925        }
926    }
927
928    if errors.is_empty() {
929        Ok(table)
930    } else {
931        Err(errors)
932    }
933}
934
935#[allow(clippy::too_many_arguments)]
936fn emit_flat_lut(
937    out: &mut String,
938    src: FlatTargetSource,
939    ctx: &mut Ctx,
940    fn_name: &str,
941    ct: &str,
942    it: &str,
943    pn: &str,
944    re: &str,
945) -> Result<(), Vec<Error>> {
946    let invalid = ctx
947        .invalid_handler
948        .map(|s| s.to_string())
949        .unwrap_or_else(|| "_unimpl".to_string());
950
951    let handler_for_owned = |name: &str| ctx.handler_for(name);
952    let span = Span::new("<flat_lut>", 0, 0, 0);
953    let table = build_flat_handler_table(&src, &handler_for_owned, &invalid, &span)?;
954
955    let n = table.len();
956    writeln!(out, "pub type Handler = fn(&mut {ct}, {it});").unwrap();
957    writeln!(out).unwrap();
958    if ctx.invalid_handler.is_none() {
959        writeln!(out, "#[cold]").unwrap();
960        writeln!(out, "#[inline(never)]").unwrap();
961        writeln!(out, "fn _unimpl(_ctx: &mut {ct}, {pn}: {it}) {{").unwrap();
962        writeln!(out, "    todo!(\"unimplemented opcode {{:#010x}}\", {re})").unwrap();
963        writeln!(out, "}}").unwrap();
964        writeln!(out).unwrap();
965    }
966    writeln!(out, "static DISPATCH: [Handler; {n}] = [").unwrap();
967    for (i, entry) in table.iter().enumerate() {
968        writeln!(out, "    {entry}, // {i:#x}").unwrap();
969    }
970    writeln!(out, "];").unwrap();
971    writeln!(out).unwrap();
972    writeln!(out, "/// Dispatch via a flat lookup table.").unwrap();
973    writeln!(out, "#[inline(always)]").unwrap();
974    writeln!(out, "pub fn {fn_name}(ctx: &mut {ct}, {pn}: {it}) {{").unwrap();
975    writeln!(out, "    let key = ({re}) as usize;").unwrap();
976    writeln!(out, "    DISPATCH[key](ctx, {pn});").unwrap();
977    writeln!(out, "}}").unwrap();
978
979    Ok(())
980}
981
982#[allow(clippy::too_many_arguments)]
983fn emit_flat_match(
984    out: &mut String,
985    src: FlatTargetSource,
986    ctx: &mut Ctx,
987    fn_name: &str,
988    ct: &str,
989    it: &str,
990    pn: &str,
991    re: &str,
992) -> Result<(), Vec<Error>> {
993    let invalid = ctx
994        .invalid_handler
995        .map(|s| s.to_string())
996        .unwrap_or_else(|| "_unimpl".to_string());
997
998    let handler_for_owned = |name: &str| ctx.handler_for(name);
999    let span = Span::new("<flat_match>", 0, 0, 0);
1000    let table = build_flat_handler_table(&src, &handler_for_owned, &invalid, &span)?;
1001
1002    if ctx.invalid_handler.is_none() {
1003        writeln!(out, "#[cold]").unwrap();
1004        writeln!(out, "#[inline(never)]").unwrap();
1005        writeln!(out, "fn _unimpl(_ctx: &mut {ct}, {pn}: {it}) {{").unwrap();
1006        writeln!(out, "    todo!(\"unimplemented opcode {{:#010x}}\", {re})").unwrap();
1007        writeln!(out, "}}").unwrap();
1008        writeln!(out).unwrap();
1009    }
1010    writeln!(out, "/// Dispatch via a compressed match.").unwrap();
1011    writeln!(out, "#[inline(always)]").unwrap();
1012    writeln!(out, "pub fn {fn_name}(ctx: &mut {ct}, {pn}: {it}) {{").unwrap();
1013    writeln!(out, "    match ({re}) as u64 {{").unwrap();
1014
1015    let mut i = 0usize;
1016    let n = table.len();
1017    while i < n {
1018        let current = &table[i];
1019        let start = i;
1020        while i < n && table[i] == *current {
1021            i += 1;
1022        }
1023        let end = i - 1;
1024        let pattern = if start == end {
1025            format!("{:#x}", start)
1026        } else {
1027            format!("{:#x}..={:#x}", start, end)
1028        };
1029        writeln!(out, "        {pattern} => {current}(ctx, {pn}),").unwrap();
1030    }
1031
1032    writeln!(out, "        _ => {invalid}(ctx, {pn}),").unwrap();
1033    writeln!(out, "    }}").unwrap();
1034    writeln!(out, "}}").unwrap();
1035    Ok(())
1036}
1037
1038/// Generate a flat dispatch function for a sub-decoder.
1039/// Uses the explicit `flat_lut` or `flat_match` strategy.
1040/// Errors on raw-value ambiguity.
1041#[allow(clippy::too_many_arguments)]
1042pub fn generate_subdecoder_flat_dispatch(
1043    sd: &ValidatedSubDecoder,
1044    handler_mod: &str,
1045    ctx_type: &str,
1046    groups: &HashMap<String, String>,
1047    instr_type: Option<&str>,
1048    invalid_handler: Option<&str>,
1049    strategy: crate::Dispatch,
1050) -> Result<String, Vec<Error>> {
1051    let snake_name = sd.name.chars().fold(String::new(), |mut acc, c| {
1052        if c.is_uppercase() && !acc.is_empty() {
1053            acc.push('_');
1054        }
1055        acc.push(c.to_ascii_lowercase());
1056        acc
1057    });
1058    let dispatch_fn = format!("dispatch_{snake_name}");
1059
1060    let param_type = width_to_type(sd.width);
1061    let (param_name, param_type_str, raw_expr_str) = if let Some(it) = instr_type {
1062        ("instr", it.to_string(), "instr.0".to_string())
1063    } else {
1064        ("val", param_type.to_string(), "val".to_string())
1065    };
1066
1067    // Build a temporary Ctx-like structure for handler resolution. We
1068    // emulate Ctx::handler_for using the local `groups` map and `handler_mod`.
1069    let resolve = |name: &str| -> String {
1070        if let Some(group) = groups.get(name) {
1071            format!("{}::{group}::<{{ {} }}>", handler_mod, op_const_name(name))
1072        } else {
1073            format!("{}::{}", handler_mod, name)
1074        }
1075    };
1076    let invalid = invalid_handler
1077        .map(|s| s.to_string())
1078        .unwrap_or_else(|| format!("{}::invalid", handler_mod));
1079
1080    let span = Span::new("<flat_subdispatch>", 0, 0, 0);
1081    let table = build_flat_handler_table(&FlatTargetSource::Sub(sd), &resolve, &invalid, &span)?;
1082
1083    let mut out = String::new();
1084
1085    // OP_* constants for the sub-decoder's instructions
1086    writeln!(out, "// Sub-decoder constants for {}", sd.name).unwrap();
1087    for (i, instr) in sd.instructions.iter().enumerate() {
1088        writeln!(out, "pub const {}: u32 = {i};", op_const_name(&instr.name)).unwrap();
1089    }
1090    writeln!(out).unwrap();
1091
1092    match strategy {
1093        crate::Dispatch::FlatLut => {
1094            let n = table.len();
1095            writeln!(
1096                out,
1097                "pub type SubHandler{name} = fn(&mut {ctx_type}, {param_type_str});",
1098                name = sd.name,
1099            )
1100            .unwrap();
1101            writeln!(out).unwrap();
1102            writeln!(
1103                out,
1104                "static DISPATCH_{up}: [SubHandler{name}; {n}] = [",
1105                up = sd.name.to_uppercase(),
1106                name = sd.name,
1107            )
1108            .unwrap();
1109            for (i, entry) in table.iter().enumerate() {
1110                writeln!(out, "    {entry}, // {i:#x}").unwrap();
1111            }
1112            writeln!(out, "];").unwrap();
1113            writeln!(out).unwrap();
1114            writeln!(out, "/// Dispatch a sub-decoder extension opcode.").unwrap();
1115            writeln!(out, "#[inline(always)]").unwrap();
1116            writeln!(
1117                out,
1118                "pub fn {dispatch_fn}(ctx: &mut {ctx_type}, {param_name}: {param_type_str}) {{"
1119            )
1120            .unwrap();
1121            writeln!(
1122                out,
1123                "    DISPATCH_{up}[({raw_expr_str}) as usize](ctx, {param_name});",
1124                up = sd.name.to_uppercase()
1125            )
1126            .unwrap();
1127            writeln!(out, "}}").unwrap();
1128        }
1129        crate::Dispatch::FlatMatch => {
1130            writeln!(out, "/// Dispatch a sub-decoder extension opcode.").unwrap();
1131            writeln!(out, "#[inline(always)]").unwrap();
1132            writeln!(
1133                out,
1134                "pub fn {dispatch_fn}(ctx: &mut {ctx_type}, {param_name}: {param_type_str}) {{"
1135            )
1136            .unwrap();
1137            writeln!(out, "    match {raw_expr_str} {{").unwrap();
1138
1139            let mut i = 0usize;
1140            let n = table.len();
1141            while i < n {
1142                let current = &table[i];
1143                let start = i;
1144                while i < n && table[i] == *current {
1145                    i += 1;
1146                }
1147                let end = i - 1;
1148                let pattern = if start == end {
1149                    format!("{:#x}", start)
1150                } else {
1151                    format!("{:#x}..={:#x}", start, end)
1152                };
1153                writeln!(out, "        {pattern} => {current}(ctx, {param_name}),").unwrap();
1154            }
1155            writeln!(out, "        _ => {invalid}(ctx, {param_name}),").unwrap();
1156            writeln!(out, "    }}").unwrap();
1157            writeln!(out, "}}").unwrap();
1158        }
1159        _ => {
1160            return Err(vec![Error::new(
1161                ErrorKind::InvalidStrategy(format!("{:?}", strategy)),
1162                span,
1163            )]);
1164        }
1165    }
1166
1167    Ok(out)
1168}