Skip to main content

chipi_core/
lib.rs

1//! # chipi-core
2//!
3//! Core library for the chipi instruction decoder generator.
4//!
5//! This crate provides the parser, validation, IR, and code generation backends.
6//! It is consumed by `chipi-cli` (the standalone CLI tool) and `chipi-build`
7//! (the `build.rs` helper for Rust projects).
8//!
9//! ## Crate structure
10//!
11//! - [`parser`]: parses `.chipi` files into a raw AST ([`types::DecoderDef`])
12//! - [`validate`]: validates and lowers the AST into a language-agnostic IR ([`types::ValidatedDef`])
13//! - [`tree`]: builds an optimal decision tree for instruction dispatch
14//! - [`backend`]: code generation backends (currently Rust only)
15//! - [`config`]: TOML config schema and [`config::Dispatch`] enum
16//! - [`codegen`]: Rust decoder/disassembler code generation
17//! - [`lut_gen`]: Rust emulator dispatch LUT generation
18//! - [`instr_gen`]: Rust instruction newtype generation
19//!
20//! ## Quick start
21//!
22//! For `build.rs` usage, prefer `chipi-build` which wraps this library with
23//! `cargo:rerun-if-changed` support. For CLI usage, use `chipi-cli`.
24//! Use `chipi-core` directly only when you need low-level control.
25//!
26//! ```ignore
27//! // Decoder/disassembler generation
28//! chipi_core::CodegenBuilder::new("dsp.chipi")
29//!     .type_map("reg5", "crate::dsp::DspReg")
30//!     .decoder_dispatch("GcDspExt", chipi_core::Dispatch::JumpTable)
31//!     .output("out.rs")
32//!     .run()?;
33//!
34//! // Emulator dispatch LUT (programmatic)
35//! chipi_core::LutBuilder::new("cpu.chipi")
36//!     .handler_mod("crate::cpu::interpreter")
37//!     .ctx_type("crate::Cpu")
38//!     .group("alu", ["addi", "addis"])
39//!     .build_lut("out/lut.rs")?;
40//!
41//! // Emulator dispatch LUT (from chipi.toml config)
42//! let cfg = chipi_core::config::load_config(Path::new("chipi.toml"))?;
43//! for target in &cfg.lut {
44//!     chipi_core::LutBuilder::run_target(target)?;
45//! }
46//! ```
47
48pub mod backend;
49pub mod codegen;
50pub mod config;
51pub mod error;
52pub mod format_parser;
53pub mod instr_gen;
54pub mod lut_gen;
55pub mod parser;
56pub mod tree;
57pub mod types;
58pub mod validate;
59
60use std::collections::HashMap;
61use std::fs;
62use std::path::Path;
63
64use error::Errors;
65use types::DecoderDef;
66
67/// Parse a `.chipi` file from a file path and return the decoder definition.
68///
69/// # Errors
70///
71/// Returns an error if the file cannot be read or parsed.
72///
73/// # Example
74///
75/// ```ignore
76/// let def = chipi::parse("thumb.chipi")?;
77/// ```
78pub fn parse(input: &str) -> Result<DecoderDef, Box<dyn std::error::Error>> {
79    let path = Path::new(input);
80    // Use include-aware parsing from file path
81    parser::parse_file(path).map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)
82}
83
84/// Parse source text directly without reading from a file.
85///
86/// # Arguments
87///
88/// * `source`: `.chipi` source code
89/// * `filename`: name used in error messages
90pub fn parse_str(source: &str, filename: &str) -> Result<DecoderDef, Vec<error::Error>> {
91    parser::parse(source, filename)
92}
93
94/// Validate a parsed definition and write generated Rust code to a file.
95///
96/// # Errors
97///
98/// Returns validation or I/O errors.
99pub fn emit(def: &DecoderDef, output: &str) -> Result<(), Box<dyn std::error::Error>> {
100    let validated = validate::validate(def)
101        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
102
103    let tree = tree::build_tree(&validated);
104    let code = codegen::generate_code(&validated, &tree, &HashMap::new(), &HashMap::new());
105
106    fs::write(output, code)?;
107    Ok(())
108}
109
110/// Full pipeline: parse a `.chipi` file and generate a Rust decoder.
111///
112/// # Example
113///
114/// ```ignore
115/// chipi::generate("thumb.chipi", "thumb_decoder.rs")?;
116/// ```
117pub fn generate(input: &str, output: &str) -> Result<(), Box<dyn std::error::Error>> {
118    let def = parse(input)?;
119    emit(&def, output)?;
120    Ok(())
121}
122
123/// Generate a function-pointer LUT from a `.chipi` spec file.
124///
125/// Produces a Rust source file containing:
126/// - `pub type Handler = fn(&mut Ctx, u32)`
127/// - Static dispatch tables (`_T0`, `_T1`, ...) indexed by opcode bit ranges
128/// - `pub fn dispatch(ctx: &mut Ctx, opcode: u32)`
129///
130/// `handler_mod` is the module path where handler functions live, e.g.
131/// `"crate::cpu::interpreter"`Each instruction `foo` in the spec must have
132/// a corresponding `pub fn foo(ctx: &mut Ctx, opcode: u32)` there.
133///
134/// `ctx_type` is the mutable context passed to every handler, e.g.
135/// `"crate::gekko::Gekko"`.
136///
137/// # Example (build.rs)
138///
139/// ```ignore
140/// chipi::generate_lut(
141///     "cpu.chipi",
142///     out_dir.join("cpu_lut.rs").to_str().unwrap(),
143///     "crate::cpu::interpreter",
144///     "crate::Cpu",
145/// )?;
146/// ```
147pub fn generate_lut(
148    input: &str,
149    output: &str,
150    handler_mod: &str,
151    ctx_type: &str,
152) -> Result<(), Box<dyn std::error::Error>> {
153    let def = parse(input)?;
154    let validated = validate::validate(&def)
155        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
156    let t = tree::build_tree(&validated);
157    let code = lut_gen::generate_lut_code(
158        &validated,
159        &t,
160        handler_mod,
161        ctx_type,
162        &HashMap::new(),
163        None,
164        None,
165        Dispatch::FnPtrLut,
166    );
167    fs::write(output, code)?;
168    Ok(())
169}
170
171/// Generate an instruction newtype with field accessor methods from a `.chipi` spec.
172///
173/// Collects all unique fields across all instructions and generates a
174/// `pub struct Name(pub u32)` with one `#[inline]` accessor method per field.
175///
176/// Fields with the same name but conflicting definitions (different bit ranges
177/// or types) generate separate accessors with bit range suffixes (e.g., `d_15_0`
178/// and `d_11_0`).
179///
180/// # Example
181///
182/// ```ignore
183/// chipi::generate_instr_type("cpu.chipi", "out/instruction.rs", "Instruction")?;
184/// ```
185///
186/// Then in your code:
187///
188/// ```ignore
189/// mod cpu {
190///     include!(concat!(env!("OUT_DIR"), "/instruction.rs"));
191/// }
192/// ```
193pub fn generate_instr_type(
194    input: &str,
195    output: &str,
196    struct_name: &str,
197) -> Result<(), Box<dyn std::error::Error>> {
198    let def = parse(input)?;
199    let validated = validate::validate(&def)
200        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
201    let (code, warnings) = instr_gen::generate_instr_type(&validated, struct_name);
202
203    // Print warnings to stderr (visible during cargo build)
204    for warning in &warnings {
205        eprintln!("warning: {}", warning);
206    }
207
208    fs::write(output, code)?;
209    Ok(())
210}
211
212/// Builder for generating a function-pointer LUT and handler stubs,
213/// with optional grouping of instructions under shared const-generic handlers.
214///
215/// Use this when you want multiple instructions to share one handler function
216/// via a `const OP: u32` generic parameter. See the crate documentation for
217/// the full pattern.
218///
219/// # Example (build.rs)
220///
221/// ```ignore
222/// chipi::LutBuilder::new("cpu.chipi")
223///     .handler_mod("crate::cpu::interpreter")
224///     .ctx_type("crate::Cpu")
225///     .lut_mod("crate::cpu::lut")
226///     .group("alu", ["addi", "addis", "ori", "oris"])
227///     .group("mem", ["lwz", "stw", "lbz", "stb"])
228///     .build_lut(out_dir.join("cpu_lut.rs").to_str().unwrap())?;
229///
230/// ```
231#[derive(Default)]
232pub struct LutBuilder {
233    input: String,
234    handler_mod: String,
235    ctx_type: String,
236    /// instruction name -> group fn name
237    instr_to_group: HashMap<String, String>,
238    /// group fn name -> instruction names (for stubs)
239    group_to_instrs: HashMap<String, Vec<String>>,
240    lut_mod: Option<String>,
241    /// Type of the second parameter of every handler (default: `u32`).
242    instr_type: Option<String>,
243    /// Expression to extract the raw `u32` from the instr local (default: `"instr.0"`
244    /// when `instr_type` is set, `"opcode"` otherwise).
245    raw_expr: Option<String>,
246    /// Dispatch strategy (default: `FnPtrLut`).
247    dispatch: Dispatch,
248}
249
250impl LutBuilder {
251    /// Create a new builder targeting the given `.chipi` spec file.
252    pub fn new(input: impl Into<String>) -> Self {
253        Self {
254            input: input.into(),
255            ..Default::default()
256        }
257    }
258
259    /// Set the Rust module path where handler functions live (e.g. `"crate::cpu::interpreter"`).
260    pub fn handler_mod(mut self, m: impl Into<String>) -> Self {
261        self.handler_mod = m.into();
262        self
263    }
264
265    /// Set the mutable context type passed to every handler (e.g. `"crate::Cpu"`).
266    pub fn ctx_type(mut self, t: impl Into<String>) -> Self {
267        self.ctx_type = t.into();
268        self
269    }
270
271    /// Set the Rust module path where the generated `OP_*` constants live
272    /// (e.g. `"crate::cpu::lut"`). Required when using groups so that stubs
273    /// can `use {lut_mod}::*` to import the constants.
274    pub fn lut_mod(mut self, path: impl Into<String>) -> Self {
275        self.lut_mod = Some(path.into());
276        self
277    }
278
279    /// Override the type of the second parameter of every handler function.
280    ///
281    /// Defaults to `u32` (raw opcode word). Set to a wrapper type such as
282    /// `"crate::cpu::semantics::Instruction"` to have handlers receive a
283    /// richer type instead. You must also call [`Self::raw_expr`] to tell
284    /// chipi how to extract the underlying `u32` for table indexing.
285    pub fn instr_type(mut self, t: impl Into<String>) -> Self {
286        self.instr_type = Some(t.into());
287        self
288    }
289
290    /// Expression that yields a `u32` from the `instr` local inside a generated
291    /// dispatch function. Only meaningful when [`Self::instr_type`] is set.
292    ///
293    /// For a newtype `struct Instruction(pub u32)` this is `"instr.0"` (the default
294    /// when `instr_type` is set). For a struct with a `raw()` method use `"instr.raw()"`.
295    pub fn raw_expr(mut self, expr: impl Into<String>) -> Self {
296        self.raw_expr = Some(expr.into());
297        self
298    }
299
300    /// Set the dispatch strategy.
301    ///
302    /// - [`Dispatch::FnPtrLut`] (default): static `[Handler; N]` arrays with indirect
303    ///   calls. Each tree level gets its own table.
304    /// - [`Dispatch::JumpTable`]: a single `#[inline(always)]` function with nested
305    ///   match statements. The compiler can inline handler calls for zero-overhead
306    ///   dispatch when handlers are also `#[inline(always)]`.
307    pub fn dispatch(mut self, strategy: Dispatch) -> Self {
308        self.dispatch = strategy;
309        self
310    }
311
312    /// Register a group: `name` is the shared handler function name (e.g. `"alu"`),
313    /// `instrs` lists the instruction names that route to it.
314    ///
315    /// Each instruction in `instrs` will appear in the LUT as
316    /// `handler_mod::alu::<{ OP_INSTR }>` instead of `handler_mod::instr`.
317    /// The generated stub is `pub fn alu<const OP: u32>(...)` with a `match OP` body.
318    pub fn group(
319        mut self,
320        name: impl Into<String>,
321        instrs: impl IntoIterator<Item = impl Into<String>>,
322    ) -> Self {
323        let name = name.into();
324        let instrs: Vec<String> = instrs.into_iter().map(|s| s.into()).collect();
325        for instr in &instrs {
326            self.instr_to_group.insert(instr.clone(), name.clone());
327        }
328        self.group_to_instrs.insert(name, instrs);
329        self
330    }
331
332    /// Create a `LutBuilder` from a [`config::LutTarget`].
333    pub fn from_config(target: &config::LutTarget) -> Self {
334        let mut builder = Self::new(&target.input)
335            .handler_mod(&target.handler_mod)
336            .ctx_type(&target.ctx_type)
337            .dispatch(target.dispatch);
338
339        if let Some(ref lut_mod) = target.lut_mod {
340            builder = builder.lut_mod(lut_mod);
341        }
342        if let Some(ref instr_type) = target.instr_type {
343            builder = builder.instr_type(instr_type);
344        }
345        if let Some(ref raw_expr) = target.raw_expr {
346            builder = builder.raw_expr(raw_expr);
347        }
348        for (name, instrs) in &target.groups {
349            builder = builder.group(name, instrs.iter().map(|s| s.as_str()));
350        }
351        builder
352    }
353
354    /// Run all outputs defined in a [`config::LutTarget`].
355    ///
356    /// Generates the LUT file, and optionally the instruction type and stubs
357    /// if configured. Stubs are only generated if the target file does not exist.
358    pub fn run_target(target: &config::LutTarget) -> Result<(), Box<dyn std::error::Error>> {
359        let builder = Self::from_config(target);
360
361        builder.build_lut(&target.output)?;
362
363        if let Some(ref instr_output) = target.instr_type_output {
364            builder.build_instr_type(instr_output)?;
365        }
366
367        Ok(())
368    }
369
370    /// Generate the LUT source file.
371    pub fn build_lut(&self, output: &str) -> Result<(), Box<dyn std::error::Error>> {
372        let def = parse(&self.input)?;
373        let validated = validate::validate(&def)
374            .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
375        let t = tree::build_tree(&validated);
376        let code = lut_gen::generate_lut_code(
377            &validated,
378            &t,
379            &self.handler_mod,
380            &self.ctx_type,
381            &self.instr_to_group,
382            self.instr_type.as_deref(),
383            self.raw_expr.as_deref(),
384            self.dispatch,
385        );
386        fs::write(output, code)?;
387        Ok(())
388    }
389
390    /// Generate an instruction newtype with field accessor methods.
391    ///
392    /// Collects all unique fields from the spec and generates a
393    /// `pub struct Name(pub u32)` with one `#[inline]` accessor per field.
394    ///
395    /// The struct name is derived from the last path segment of `.instr_type()`
396    /// (e.g., `"crate::cpu::Instruction"` -> `"Instruction"`), or defaults to
397    /// `"Instruction"` if `.instr_type()` was not called.
398    ///
399    /// Fields with conflicting definitions across instructions generate separate
400    /// accessors with bit range suffixes (e.g., `d_15_0` and `d_11_0`).
401    ///
402    /// # Example
403    ///
404    /// ```ignore
405    /// chipi::LutBuilder::new("cpu.chipi")
406    ///     .instr_type("crate::cpu::Instruction")
407    ///     .build_instr_type(out_dir.join("instruction.rs").to_str().unwrap())?;
408    /// ```
409    pub fn build_instr_type(&self, output: &str) -> Result<(), Box<dyn std::error::Error>> {
410        let def = parse(&self.input)?;
411        let validated = validate::validate(&def)
412            .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
413
414        // Derive struct name from instr_type path or default to "Instruction"
415        let struct_name = self
416            .instr_type
417            .as_deref()
418            .and_then(|t| t.rsplit("::").next())
419            .unwrap_or("Instruction");
420
421        let (code, warnings) = instr_gen::generate_instr_type(&validated, struct_name);
422
423        // Print warnings to stderr (visible during cargo build)
424        for warning in &warnings {
425            eprintln!("cargo:warning={}", warning);
426        }
427
428        fs::write(output, code)?;
429        Ok(())
430    }
431}
432
433/// Parse, validate, and generate code from source text. Returns the
434/// generated Rust code as a `String`.
435///
436/// # Errors
437///
438/// Returns parse or validation errors.
439pub fn generate_from_str(
440    source: &str,
441    filename: &str,
442) -> Result<String, Box<dyn std::error::Error>> {
443    let def = parser::parse(source, filename)
444        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
445
446    let validated = validate::validate(&def)
447        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
448
449    let tree = tree::build_tree(&validated);
450    let code = codegen::generate_code(&validated, &tree, &HashMap::new(), &HashMap::new());
451
452    Ok(code)
453}
454
455pub use config::Dispatch;
456
457/// Builder for generating a decoder with type mappings and dispatch strategy control.
458///
459/// Use this when you need to map chipi type names to Rust wrapper types (replacing
460/// the removed `import`/`as` syntax) or control the dispatch strategy per decoder.
461///
462/// # Example (build.rs)
463///
464/// ```ignore
465/// chipi::CodegenBuilder::new("src/gcdsp.chipi")
466///     .type_map("reg5", "crate::dsp::DspReg")
467///     .decoder_dispatch("GcDsp", chipi::Dispatch::FnPtrLut)
468///     .decoder_dispatch("GcDspExt", chipi::Dispatch::JumpTable)
469///     .output("src/generated/gcdsp.rs")
470///     .run();
471/// ```
472#[derive(Default)]
473pub struct CodegenBuilder {
474    input: String,
475    type_maps: HashMap<String, String>,
476    dispatch_overrides: HashMap<String, Dispatch>,
477    output: Option<String>,
478}
479
480impl CodegenBuilder {
481    /// Create a new builder targeting the given `.chipi` spec file.
482    pub fn new(input: impl Into<String>) -> Self {
483        Self {
484            input: input.into(),
485            ..Default::default()
486        }
487    }
488
489    /// Map a chipi type name to a Rust type path.
490    ///
491    /// Fields declared with this type name in the `.chipi` file will use the
492    /// given Rust type in generated code. The codegen emits a `use` statement
493    /// for paths containing `::`.
494    ///
495    /// # Example
496    ///
497    /// ```ignore
498    /// .type_map("reg5", "crate::dsp::DspReg")
499    /// ```
500    pub fn type_map(mut self, chipi_type: &str, rust_path: &str) -> Self {
501        self.type_maps
502            .insert(chipi_type.to_string(), rust_path.to_string());
503        self
504    }
505
506    /// Set the dispatch strategy for a specific decoder or sub-decoder.
507    ///
508    /// Defaults: `JumpTable` for sub-decoders, decision tree for main decoders.
509    pub fn decoder_dispatch(mut self, decoder_name: &str, strategy: Dispatch) -> Self {
510        self.dispatch_overrides
511            .insert(decoder_name.to_string(), strategy);
512        self
513    }
514
515    /// Set the output file path.
516    pub fn output(mut self, path: &str) -> Self {
517        self.output = Some(path.to_string());
518        self
519    }
520
521    /// Run the full pipeline: parse, validate, and generate code.
522    pub fn run(&self) -> Result<(), Box<dyn std::error::Error>> {
523        let def = parse(&self.input)?;
524        let validated = validate::validate(&def)
525            .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
526
527        let tree = tree::build_tree(&validated);
528        let code =
529            codegen::generate_code(&validated, &tree, &self.type_maps, &self.dispatch_overrides);
530
531        if let Some(ref output) = self.output {
532            fs::write(output, code)?;
533        }
534
535        Ok(())
536    }
537}