Skip to main content

chipi/
lib.rs

1//! # chipi
2//!
3//! Generate instruction decoders and disassemblers from `.chipi` files.
4//!
5//! Write your CPU instruction encoding in a simple DSL, and chipi generates
6//! the Rust decoder and formatting code for you.
7//!
8//! ## Usage
9//!
10//! Add to `Cargo.toml`:
11//!
12//! ```toml
13//! [build-dependencies]
14//! chipi = "0.5.3"
15//! ```
16//!
17//! Create `build.rs`:
18//!
19//! ```ignore
20//! use std::env;
21//! use std::path::PathBuf;
22//!
23//! fn main() {
24//!     let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
25//!     chipi::generate("cpu.chipi", out_dir.join("cpu.rs").to_str().unwrap())
26//!         .expect("failed to generate decoder");
27//!     println!("cargo:rerun-if-changed=cpu.chipi");
28//! }
29//! ```
30//!
31//! Use the generated decoder:
32//!
33//! ```ignore
34//! mod cpu {
35//!     include!(concat!(env!("OUT_DIR"), "/cpu.rs"));
36//! }
37//!
38//! // decode() always takes &[u8] and returns (instruction, bytes_consumed)
39//! match cpu::CpuInstruction::decode(&data[offset..]) {
40//!     Some((instr, bytes)) => {
41//!         println!("{}", instr);
42//!         offset += bytes;
43//!     }
44//!     None => println!("invalid instruction"),
45//! }
46//! ```
47//!
48//! ## Example .chipi file
49//!
50//! ```text
51//! decoder Cpu {
52//!     width = 32
53//!     bit_order = msb0
54//!     endian = big
55//! }
56//!
57//! type simm16 = i32 { sign_extend(16) }
58//! type simm24 = i32 { sign_extend(24), shift_left(2) }
59//!
60//! bx   [0:5]=010010 li:simm24[6:29] aa:bool[30] lk:bool[31]
61//!      | "b{lk ? l}{aa ? a} {li:#x}"
62//!
63//! addi [0:5]=001110 rd:u8[6:10] ra:u8[11:15] simm:simm16[16:31]
64//!      | ra == 0: "li {rd}, {simm}"
65//!      | "addi {rd}, {ra}, {simm}"
66//! ```
67//!
68//! ## Syntax
69//!
70//! ### Decoder block
71//!
72//! ```text
73//! decoder Name {
74//!     width = 32        # 8, 16, or 32 bits
75//!     bit_order = msb0  # msb0 or lsb0
76//!     endian = big      # big or little (default: big)
77//!     max_units = 4     # optional: safety guard (validates bit ranges)
78//! }
79//! ```
80//!
81//! #### Variable-Length Instructions
82//!
83//! chipi automatically generates variable-length decoders when you use bit positions
84//! beyond `width-1`. Simply reference subsequent units in your bit ranges:
85//!
86//! ```text
87//! decoder Dsp {
88//!     width = 16
89//!     bit_order = msb0
90//!     endian = big
91//!     max_units = 2     # Optional safety check: ensures bits don't exceed 32 (width * max_units)
92//! }
93//!
94//! nop    [0:15]=0000000000000000        # 1 unit (16 bits)
95//! lri    [0:10]=00000010000 rd:u5[11:15] imm:u16[16:31]  # 2 units (32 bits)
96//! ```
97//!
98//! The generated `decode` always has the signature:
99//! `pub fn decode(data: &[u8]) -> Option<(Self, usize)>`
100//!
101//! It accepts raw bytes and returns the decoded instruction along with the
102//! number of bytes consumed.
103//!
104//! ### Instructions
105//!
106//! Each instruction is one line with a name, fixed bit patterns, and fields:
107//!
108//! ```text
109//! add [0:5]=011111 rd:u8[6:10] ra:u8[11:15]
110//! ```
111//!
112//! Fixed bits use `[range]=pattern`. Fields use `name:type[range]`.
113//!
114//! #### Wildcard Bits
115//!
116//! Use `?` in bit patterns for bits that can be any value:
117//!
118//! ```text
119//! # Match when bits [15:8] are 0x8c, bits [7:0] can be anything
120//! clr15   [15:0]=10001100????????
121//!         | "CLR15"
122//!
123//! # Mix wildcards with specific bits
124//! nop     [7:4]=0000 [3:0]=????
125//!         | "nop"
126//! ```
127//!
128//! Wildcard bits are excluded from the matching mask, so instructions match
129//! regardless of the values in those positions. This is useful for reserved or
130//! architecturally undefined bits.
131//!
132//! #### Overlapping Patterns
133//!
134//! chipi supports overlapping instruction patterns where one pattern is a subset of another.
135//! More specific patterns (with more fixed bits) are checked first:
136//!
137//! ```text
138//! # Generic instruction - matches 0x1X (any value in bits 4-7)
139//! load  [0:3]=0001 reg:u4[4:7]
140//!       | "load r{reg}"
141//!
142//! # Specific instruction - matches only 0x1F
143//! load_max [0:3]=0001 [4:7]=1111
144//!          | "load rmax"
145//! ```
146//!
147//! The decoder will check `load_max` first (all bits fixed), then fall back to `load`
148//! (bits 4-7 are wildcards). This works across all units in variable-length decoders.
149//!
150//! ### Types
151//!
152//! Builtin types:
153//! * `bool` (converts bit to true/false)
154//! * `u1` to `u7` (maps to u8)
155//! * `u8`, `u16`, `u32`
156//! * `i8`, `i16`, `i32`
157//!
158//! Custom types:
159//!
160//! ```text
161//! type simm = i32 { sign_extend(16) }
162//! type reg = u8 as Register
163//! ```
164//!
165//! Available transformations:
166//! * `sign_extend(n)` - sign extend from n bits
167//! * `zero_extend(n)` - zero extend from n bits
168//! * `shift_left(n)` - shift left by n bits
169//!
170//! Display format hints (controls how the field is printed in format strings):
171//! * `display(signed_hex)` - signed hex: `0x1A`, `-0x1A`, `0`
172//! * `display(hex)` - unsigned hex: `0x1A`, `0`
173//!
174//! ### Imports
175//!
176//! Import Rust types to wrap extracted values:
177//!
178//! ```text
179//! import crate::cpu::Register
180//! import std::num::Wrapping
181//! ```
182//!
183//! ### Format lines
184//!
185//! Format lines follow an instruction and define its disassembly output:
186//!
187//! ```text
188//! bx [0:5]=010010 li:simm24[6:29] aa:bool[30] lk:bool[31]
189//!    | "b{lk ? l}{aa ? a} {li:#x}"
190//! ```
191//!
192//! Features:
193//! * `{field}` - insert field value, with optional format spec: `{field:#x}`
194//! * `{field ? text}` - emit `text` if nonzero, `{field ? yes : no}` for else
195//! * `{a + b * 4}` - inline arithmetic (`+`, `-`, `*`, `/`, `%`)
196//! * `{-field}` - unary negation
197//! * `{map_name(arg)}` - call a map lookup
198//! * `{rotate_right(val, amt)}` - builtin functions
199//! * Guards: `| ra == 0: "li {rd}, {simm}"` - conditional format selection
200//! * Guard arithmetic: `| sh == 32 - mb : "srwi ..."` - arithmetic in guard operands
201//!
202//! ### Maps
203//!
204//! Lookup tables for use in format strings:
205//!
206//! ```text
207//! map spr_name(spr) {
208//!     1 => "xer"
209//!     8 => "lr"
210//!     9 => "ctr"
211//!     _ => "???"
212//! }
213//! ```
214//!
215//! ### Formatting trait
216//!
217//! chipi generates a `{Name}Format` trait with one method per instruction.
218//! Default implementations come from format lines. Override selectively:
219//!
220//! ```ignore
221//! struct MyFormat;
222//! impl cpu::CpuFormat for MyFormat {
223//!     fn fmt_bx(li: i32, aa: bool, lk: bool,
224//!               f: &mut std::fmt::Formatter) -> std::fmt::Result {
225//!         write!(f, "BRANCH {:#x}", li)
226//!     }
227//! }
228//!
229//! println!("{}", instr.display::<MyFormat>());
230//! ```
231//!
232//! ## Emulator LUT
233//!
234//! chipi can generate a function-pointer **lookup table** for emulator dispatch.
235//! Each opcode is routed directly to a handler function via static `[Handler; N]`
236//! arrays derived from the same decision tree.
237//!
238//! ### build.rs
239//!
240//! Use [`LutBuilder`] to configure and emit both the LUT and the handler stubs:
241//!
242//! ```ignore
243//! use std::env;
244//! use std::path::PathBuf;
245//!
246//! fn main() {
247//!     let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
248//!     let manifest = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
249//!     let spec = "cpu.chipi";
250//!
251//!     let builder = chipi::LutBuilder::new(spec)
252//!         .handler_mod("crate::cpu::interpreter")
253//!         .ctx_type("crate::Cpu");
254//!
255//!     // Regenerated every build, stays in sync with the spec
256//!     builder
257//!         .build_lut(out_dir.join("cpu_lut.rs").to_str().unwrap())
258//!         .expect("failed to generate LUT");
259//!
260//!     // Written once, hand-edits are never overwritten
261//!     let stubs = manifest.join("src/cpu/interpreter.rs");
262//!     if !stubs.exists() {
263//!         builder.build_stubs(stubs.to_str().unwrap())
264//!             .expect("failed to generate stubs");
265//!     }
266//!
267//!     println!("cargo:rerun-if-changed={spec}");
268//! }
269//! ```
270//!
271//! ### Include and dispatch
272//!
273//! ```ignore
274//! // src/cpu.rs
275//! #[allow(dead_code, non_upper_case_globals)]
276//! pub mod lut {
277//!     include!(concat!(env!("OUT_DIR"), "/cpu_lut.rs"));
278//! }
279//!
280//! // fetch-decode-execute
281//! let opcode = mem.read_u32(cpu.pc);
282//! cpu.pc = cpu.pc.wrapping_add(4);
283//! crate::cpu::lut::dispatch(&mut ctx, opcode);
284//! ```
285//!
286//! ### Handler stubs
287//!
288//! On the first build, `build_stubs` writes `src/cpu/interpreter.rs` with
289//! `todo!()` bodies. Replace each `todo!()` as you go; the file is never
290//! regenerated so hand-edits are safe.
291//!
292//! The second parameter type is derived from the spec's `width`:
293//! `u8` (8-bit), `u16` (16-bit), or `u32` (32-bit).
294//!
295//! ```ignore
296//! pub fn addi(_ctx: &mut crate::Cpu, _opcode: u32) { todo!("addi") }
297//! pub fn lwz(_ctx: &mut crate::Cpu, _opcode: u32) { todo!("lwz")  }
298//! // ... one fn per instruction
299//! ```
300//!
301//! ### Grouped handlers with const generics
302//!
303//! Use `.group()` to fold multiple instructions into one handler via a
304//! `const OP: u32` generic parameter. Each LUT entry is a separate
305//! monomorphization.
306//!
307//! Provide `.lut_mod()` so that generated stubs can `use` the `OP_*` constants:
308//!
309//! ```ignore
310//! chipi::LutBuilder::new("cpu.chipi")
311//!     .handler_mod("crate::cpu::interpreter")
312//!     .ctx_type("crate::Cpu")
313//!     .lut_mod("crate::cpu::lut")
314//!     .group("alu", ["addi", "addis", "ori", "oris"])
315//!     .build_lut(out_dir.join("cpu_lut.rs").to_str().unwrap())?;
316//! ```
317//!
318//! ### Custom instruction wrapper type
319//!
320//! Use `.instr_type()` to replace the raw integer with a richer type.
321//! chipi uses it in the generated `Handler` alias and all stub signatures.
322//! `.raw_expr()` tells chipi how to extract the underlying integer for table
323//! indexing; it defaults to `"instr.0"` for newtype wrappers.
324//!
325//! ```ignore
326//! chipi::LutBuilder::new("cpu.chipi")
327//!     .handler_mod("crate::cpu::interpreter")
328//!     .ctx_type("crate::Cpu")
329//!     .instr_type("crate::cpu::Instruction")  // struct Instruction(pub u32)
330//!     // .raw_expr("instr.0")                 // default for newtype wrappers
331//!     .build_lut(out_dir.join("cpu_lut.rs").to_str().unwrap())?;
332//! ```
333//!
334//! Generated `Handler` type and stub signature:
335//! ```ignore
336//! pub type Handler = fn(&mut crate::Cpu, crate::cpu::Instruction);
337//!
338//! pub fn addi(_ctx: &mut crate::Cpu, _instr: crate::cpu::Instruction) { todo!("addi") }
339//! ```
340//!
341//! ## Instruction Type Generation
342//!
343//! chipi can auto-generate the instruction newtype with field accessor methods,
344//! eliminating the need to hand-write bit extraction code. This is useful in
345//! cases where a thin wrapper for decoding is prefered (e.g. emulation).
346//!
347//! ### build.rs
348//!
349//! Add `.build_instr_type()` to your `LutBuilder` chain:
350//!
351//! ```ignore
352//! chipi::LutBuilder::new("cpu.chipi")
353//!     .instr_type("crate::cpu::Instruction")
354//!     .build_instr_type(out_dir.join("instruction.rs").to_str().unwrap())?;
355//! ```
356//!
357//! ### Generated output
358//!
359//! Creates a newtype with `#[inline]` accessor methods for every unique field:
360//!
361//! ```ignore
362//! pub struct Instruction(pub u32);
363//!
364//! #[rustfmt::skip]
365//! impl Instruction {
366//!     #[inline] pub fn rd(&self) -> u8 { ((self.0 >> 21) & 0x1f) as u8 }
367//!     #[inline] pub fn ra(&self) -> u8 { ((self.0 >> 16) & 0x1f) as u8 }
368//!     #[inline] pub fn simm(&self) -> i32 { ((((self.0 >> 0) & 0xffff) as i32) << 16) >> 16 }
369//!     #[inline] pub fn rc(&self) -> bool { (self.0 & 0x1) != 0 }
370//!     // ... one accessor per unique field across all instructions
371//! }
372//! ```
373//!
374//! ### Usage
375//!
376//! Include the generated file and optionally add custom methods:
377//!
378//! ```ignore
379//! // src/cpu/semantics.rs
380//! include!(concat!(env!("OUT_DIR"), "/instruction.rs"));
381//!
382//! // Add custom accessors not derivable from the spec
383//! impl Instruction {
384//!     /// SPR field with swapped halves (PowerPC)
385//!     pub fn spr_decoded(&self) -> u32 {
386//!         let raw = self.spr();
387//!         (raw >> 5) | ((raw & 0x1f) << 5)
388//!     }
389//! }
390//! ```
391//!
392//! ### Conflict handling
393//!
394//! Fields with the same name but different bit ranges across instructions generate
395//! separate accessors with bit range suffixes (e.g., `d_15_0()` and `d_11_0()`).
396//! You can add convenience aliases in a separate `impl` block if needed.
397//!
398//! ## API
399//!
400//! ```ignore
401//! // Parse and generate decoder from file
402//! chipi::generate("cpu.chipi", "out.rs")?;
403//!
404//! // Generate decoder from source string
405//! let code = chipi::generate_from_str(source, "cpu.chipi")?;
406//!
407//! // Step-by-step
408//! let def = chipi::parse("cpu.chipi")?;
409//! chipi::emit(&def, "out.rs")?;
410//!
411//! // Emulator LUT, simple
412//! // (instr type auto-derived from spec width: u8 / u16 / u32)
413//! chipi::generate_lut("cpu.chipi", "out/lut.rs", "crate::interp", "crate::Cpu")?;
414//! chipi::generate_stubs("cpu.chipi", "src/interp.rs", "crate::Cpu")?; // once only
415//!
416//! // Instruction type generation
417//! chipi::generate_instr_type("cpu.chipi", "out/instruction.rs", "Instruction")?;
418//!
419//! // Emulator LUT, full control via LutBuilder
420//! chipi::LutBuilder::new("cpu.chipi")
421//!     .handler_mod("crate::cpu::interpreter")
422//!     .ctx_type("crate::Cpu")
423//!     .lut_mod("crate::cpu::lut")              // needed when using groups
424//!     .group("alu", ["addi", "addis"])         // const-generic shared handler
425//!     .instr_type("crate::cpu::Instruction")   // optional wrapper type
426//!     .build_lut("out/lut.rs")?
427//!     .build_instr_type("out/instruction.rs")?;  // generate instruction type
428//! ```
429
430pub mod codegen;
431pub mod error;
432pub mod format_parser;
433pub mod instr_gen;
434pub mod lut_gen;
435pub mod parser;
436pub mod tree;
437pub mod types;
438pub mod validate;
439
440use std::collections::HashMap;
441use std::fs;
442use std::path::Path;
443
444use error::Errors;
445use types::DecoderDef;
446
447/// Parse a `.chipi` file from a file path and return the decoder definition.
448///
449/// # Errors
450///
451/// Returns an error if the file cannot be read or parsed.
452///
453/// # Example
454///
455/// ```ignore
456/// let def = chipi::parse("thumb.chipi")?;
457/// ```
458pub fn parse(input: &str) -> Result<DecoderDef, Box<dyn std::error::Error>> {
459    let path = Path::new(input);
460    let source = fs::read_to_string(path)?;
461    let filename = path
462        .file_name()
463        .and_then(|f| f.to_str())
464        .unwrap_or(input);
465
466    parser::parse(&source, filename).map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)
467}
468
469/// Parse source text directly without reading from a file.
470///
471/// # Arguments
472///
473/// * `source`: `.chipi` source code
474/// * `filename`: name used in error messages
475pub fn parse_str(source: &str, filename: &str) -> Result<DecoderDef, Vec<error::Error>> {
476    parser::parse(source, filename)
477}
478
479/// Validate a parsed definition and write generated Rust code to a file.
480///
481/// # Errors
482///
483/// Returns validation or I/O errors.
484pub fn emit(def: &DecoderDef, output: &str) -> Result<(), Box<dyn std::error::Error>> {
485    let validated = validate::validate(def)
486        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
487
488    let tree = tree::build_tree(&validated);
489    let code = codegen::generate_code(&validated, &tree);
490
491    fs::write(output, code)?;
492    Ok(())
493}
494
495/// Full pipeline: parse a `.chipi` file and generate a Rust decoder.
496///
497/// # Example
498///
499/// ```ignore
500/// chipi::generate("thumb.chipi", "thumb_decoder.rs")?;
501/// ```
502pub fn generate(input: &str, output: &str) -> Result<(), Box<dyn std::error::Error>> {
503    let def = parse(input)?;
504    emit(&def, output)?;
505    Ok(())
506}
507
508/// Generate a function-pointer LUT from a `.chipi` spec file.
509///
510/// Produces a Rust source file containing:
511/// - `pub type Handler = fn(&mut Ctx, u32)`
512/// - Static dispatch tables (`_T0`, `_T1`, ...) indexed by opcode bit ranges
513/// - `pub fn dispatch(ctx: &mut Ctx, opcode: u32)`
514///
515/// `handler_mod` is the module path where handler functions live, e.g.
516/// `"crate::cpu::interpreter"`Each instruction `foo` in the spec must have
517/// a corresponding `pub fn foo(ctx: &mut Ctx, opcode: u32)` there.
518///
519/// `ctx_type` is the mutable context passed to every handler, e.g.
520/// `"crate::gekko::Gekko"`.
521///
522/// # Example (build.rs)
523///
524/// ```ignore
525/// chipi::generate_lut(
526///     "cpu.chipi",
527///     out_dir.join("cpu_lut.rs").to_str().unwrap(),
528///     "crate::cpu::interpreter",
529///     "crate::Cpu",
530/// )?;
531/// ```
532pub fn generate_lut(
533    input: &str,
534    output: &str,
535    handler_mod: &str,
536    ctx_type: &str,
537) -> Result<(), Box<dyn std::error::Error>> {
538    let def = parse(input)?;
539    let validated = validate::validate(&def)
540        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
541    let t = tree::build_tree(&validated);
542    let code = lut_gen::generate_lut_code(&validated, &t, handler_mod, ctx_type, &HashMap::new(), None, None);
543    fs::write(output, code)?;
544    Ok(())
545}
546
547/// Generate handler stub functions for every instruction in a `.chipi` spec.
548///
549/// Each stub has the form:
550/// ```rust,ignore
551/// pub fn twi(_ctx: &mut Ctx, _opcode: u32) { todo!("twi") }
552/// ```
553///
554/// Intended to be run **once** to bootstrap an interpreter module. After that,
555/// replace `todo!()` bodies with real implementations as you go.
556pub fn generate_stubs(
557    input: &str,
558    output: &str,
559    ctx_type: &str,
560) -> Result<(), Box<dyn std::error::Error>> {
561    let def = parse(input)?;
562    let validated = validate::validate(&def)
563        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
564    let code = lut_gen::generate_stubs_code(&validated, ctx_type, &HashMap::new(), None, None);
565    fs::write(output, code)?;
566    Ok(())
567}
568
569/// Generate an instruction newtype with field accessor methods from a `.chipi` spec.
570///
571/// Collects all unique fields across all instructions and generates a
572/// `pub struct Name(pub u32)` with one `#[inline]` accessor method per field.
573///
574/// Fields with the same name but conflicting definitions (different bit ranges
575/// or types) generate separate accessors with bit range suffixes (e.g., `d_15_0`
576/// and `d_11_0`).
577///
578/// # Example
579///
580/// ```ignore
581/// chipi::generate_instr_type("cpu.chipi", "out/instruction.rs", "Instruction")?;
582/// ```
583///
584/// Then in your code:
585///
586/// ```ignore
587/// mod cpu {
588///     include!(concat!(env!("OUT_DIR"), "/instruction.rs"));
589/// }
590/// ```
591pub fn generate_instr_type(
592    input: &str,
593    output: &str,
594    struct_name: &str,
595) -> Result<(), Box<dyn std::error::Error>> {
596    let def = parse(input)?;
597    let validated = validate::validate(&def)
598        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
599    let (code, warnings) = instr_gen::generate_instr_type(&validated, struct_name);
600
601    // Print warnings to stderr (visible during cargo build)
602    for warning in &warnings {
603        eprintln!("warning: {}", warning);
604    }
605
606    fs::write(output, code)?;
607    Ok(())
608}
609
610/// Builder for generating a function-pointer LUT and handler stubs,
611/// with optional grouping of instructions under shared const-generic handlers.
612///
613/// Use this when you want multiple instructions to share one handler function
614/// via a `const OP: u32` generic parameter. See the crate documentation for
615/// the full pattern.
616///
617/// # Example (build.rs)
618///
619/// ```ignore
620/// chipi::LutBuilder::new("cpu.chipi")
621///     .handler_mod("crate::cpu::interpreter")
622///     .ctx_type("crate::Cpu")
623///     .lut_mod("crate::cpu::lut")
624///     .group("alu", ["addi", "addis", "ori", "oris"])
625///     .group("mem", ["lwz", "stw", "lbz", "stb"])
626///     .build_lut(out_dir.join("cpu_lut.rs").to_str().unwrap())?;
627///
628/// if !stubs.exists() {
629///     chipi::LutBuilder::new("cpu.chipi")
630///         .ctx_type("crate::Cpu")
631///         .lut_mod("crate::cpu::lut")
632///         .group("alu", ["addi", "addis", "ori", "oris"])
633///         .group("mem", ["lwz", "stw", "lbz", "stb"])
634///         .build_stubs(stubs.to_str().unwrap())?;
635/// }
636/// ```
637#[derive(Default)]
638pub struct LutBuilder {
639    input: String,
640    handler_mod: String,
641    ctx_type: String,
642    /// instruction name -> group fn name
643    instr_to_group: HashMap<String, String>,
644    /// group fn name -> instruction names (for stubs)
645    group_to_instrs: HashMap<String, Vec<String>>,
646    lut_mod: Option<String>,
647    /// Type of the second parameter of every handler (default: `u32`).
648    instr_type: Option<String>,
649    /// Expression to extract the raw `u32` from the instr local (default: `"instr.0"`
650    /// when `instr_type` is set, `"opcode"` otherwise).
651    raw_expr: Option<String>,
652}
653
654impl LutBuilder {
655    /// Create a new builder targeting the given `.chipi` spec file.
656    pub fn new(input: impl Into<String>) -> Self {
657        Self {
658            input: input.into(),
659            ..Default::default()
660        }
661    }
662
663    /// Set the Rust module path where handler functions live (e.g. `"crate::cpu::interpreter"`).
664    pub fn handler_mod(mut self, m: impl Into<String>) -> Self {
665        self.handler_mod = m.into();
666        self
667    }
668
669    /// Set the mutable context type passed to every handler (e.g. `"crate::Cpu"`).
670    pub fn ctx_type(mut self, t: impl Into<String>) -> Self {
671        self.ctx_type = t.into();
672        self
673    }
674
675    /// Set the Rust module path where the generated `OP_*` constants live
676    /// (e.g. `"crate::cpu::lut"`). Required when using groups so that stubs
677    /// can `use {lut_mod}::*` to import the constants.
678    pub fn lut_mod(mut self, path: impl Into<String>) -> Self {
679        self.lut_mod = Some(path.into());
680        self
681    }
682
683    /// Override the type of the second parameter of every handler function.
684    ///
685    /// Defaults to `u32` (raw opcode word). Set to a wrapper type such as
686    /// `"crate::cpu::semantics::Instruction"` to have handlers receive a
687    /// richer type instead. You must also call [`Self::raw_expr`] to tell
688    /// chipi how to extract the underlying `u32` for table indexing.
689    pub fn instr_type(mut self, t: impl Into<String>) -> Self {
690        self.instr_type = Some(t.into());
691        self
692    }
693
694    /// Expression that yields a `u32` from the `instr` local inside a generated
695    /// dispatch function. Only meaningful when [`Self::instr_type`] is set.
696    ///
697    /// For a newtype `struct Instruction(pub u32)` this is `"instr.0"` (the default
698    /// when `instr_type` is set). For a struct with a `raw()` method use `"instr.raw()"`.
699    pub fn raw_expr(mut self, expr: impl Into<String>) -> Self {
700        self.raw_expr = Some(expr.into());
701        self
702    }
703
704    /// Register a group: `name` is the shared handler function name (e.g. `"alu"`),
705    /// `instrs` lists the instruction names that route to it.
706    ///
707    /// Each instruction in `instrs` will appear in the LUT as
708    /// `handler_mod::alu::<{ OP_INSTR }>` instead of `handler_mod::instr`.
709    /// The generated stub is `pub fn alu<const OP: u32>(...)` with a `match OP` body.
710    pub fn group(
711        mut self,
712        name: impl Into<String>,
713        instrs: impl IntoIterator<Item = impl Into<String>>,
714    ) -> Self {
715        let name = name.into();
716        let instrs: Vec<String> = instrs.into_iter().map(|s| s.into()).collect();
717        for instr in &instrs {
718            self.instr_to_group.insert(instr.clone(), name.clone());
719        }
720        self.group_to_instrs.insert(name, instrs);
721        self
722    }
723
724    /// Generate the LUT source file.
725    pub fn build_lut(&self, output: &str) -> Result<(), Box<dyn std::error::Error>> {
726        let def = parse(&self.input)?;
727        let validated = validate::validate(&def)
728            .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
729        let t = tree::build_tree(&validated);
730        let code = lut_gen::generate_lut_code(
731            &validated,
732            &t,
733            &self.handler_mod,
734            &self.ctx_type,
735            &self.instr_to_group,
736            self.instr_type.as_deref(),
737            self.raw_expr.as_deref(),
738        );
739        fs::write(output, code)?;
740        Ok(())
741    }
742
743    /// Generate handler stubs source file.
744    pub fn build_stubs(&self, output: &str) -> Result<(), Box<dyn std::error::Error>> {
745        let def = parse(&self.input)?;
746        let validated = validate::validate(&def)
747            .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
748        let code = lut_gen::generate_stubs_code(
749            &validated,
750            &self.ctx_type,
751            &self.group_to_instrs,
752            self.lut_mod.as_deref(),
753            self.instr_type.as_deref(),
754        );
755        fs::write(output, code)?;
756        Ok(())
757    }
758
759    /// Generate an instruction newtype with field accessor methods.
760    ///
761    /// Collects all unique fields from the spec and generates a
762    /// `pub struct Name(pub u32)` with one `#[inline]` accessor per field.
763    ///
764    /// The struct name is derived from the last path segment of `.instr_type()`
765    /// (e.g., `"crate::cpu::Instruction"` -> `"Instruction"`), or defaults to
766    /// `"Instruction"` if `.instr_type()` was not called.
767    ///
768    /// Fields with conflicting definitions across instructions generate separate
769    /// accessors with bit range suffixes (e.g., `d_15_0` and `d_11_0`).
770    ///
771    /// # Example
772    ///
773    /// ```ignore
774    /// chipi::LutBuilder::new("cpu.chipi")
775    ///     .instr_type("crate::cpu::Instruction")
776    ///     .build_instr_type(out_dir.join("instruction.rs").to_str().unwrap())?;
777    /// ```
778    pub fn build_instr_type(&self, output: &str) -> Result<(), Box<dyn std::error::Error>> {
779        let def = parse(&self.input)?;
780        let validated = validate::validate(&def)
781            .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
782
783        // Derive struct name from instr_type path or default to "Instruction"
784        let struct_name = self
785            .instr_type
786            .as_deref()
787            .and_then(|t| t.rsplit("::").next())
788            .unwrap_or("Instruction");
789
790        let (code, warnings) = instr_gen::generate_instr_type(&validated, struct_name);
791
792        // Print warnings to stderr (visible during cargo build)
793        for warning in &warnings {
794            eprintln!("cargo:warning={}", warning);
795        }
796
797        fs::write(output, code)?;
798        Ok(())
799    }
800}
801
802/// Parse, validate, and generate code from source text. Returns the
803/// generated Rust code as a `String`.
804///
805/// # Errors
806///
807/// Returns parse or validation errors.
808pub fn generate_from_str(
809    source: &str,
810    filename: &str,
811) -> Result<String, Box<dyn std::error::Error>> {
812    let def = parser::parse(source, filename)
813        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
814
815    let validated = validate::validate(&def)
816        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
817
818    let tree = tree::build_tree(&validated);
819    let code = codegen::generate_code(&validated, &tree);
820
821    Ok(code)
822}