Skip to main content

chipi_core/
lib.rs

1//! # chipi-core
2//!
3//! Core library for the chipi instruction decoder generator.
4//!
5//! This crate provides the parser, validation, IR, and code generation backends.
6//! It is consumed by `chipi-cli` (the standalone CLI tool) and `chipi-build`
7//! (the `build.rs` helper for Rust projects).
8//!
9//! ## Crate structure
10//!
11//! - [`parser`]: parses `.chipi` files into a raw AST ([`types::DecoderDef`])
12//! - [`validate`]: validates and lowers the AST into a language-agnostic IR ([`types::ValidatedDef`])
13//! - [`tree`]: builds an optimal decision tree for instruction dispatch
14//! - [`backend`]: code generation backends (currently Rust only)
15//! - [`config`]: TOML config schema and [`config::Dispatch`] enum
16//! - [`codegen`]: Rust decoder/disassembler code generation
17//! - [`lut_gen`]: Rust emulator dispatch LUT generation
18//! - [`instr_gen`]: Rust instruction newtype generation
19//!
20//! ## Quick start
21//!
22//! For `build.rs` usage, prefer `chipi-build` which wraps this library with
23//! `cargo:rerun-if-changed` support. For CLI usage, use `chipi-cli`.
24//! Use `chipi-core` directly only when you need low-level control.
25//!
26//! ```ignore
27//! // Decoder/disassembler generation
28//! chipi_core::CodegenBuilder::new("dsp.chipi")
29//!     .type_map("reg5", "crate::dsp::DspReg")
30//!     .decoder_dispatch("GcDspExt", chipi_core::Dispatch::JumpTable)
31//!     .output("out.rs")
32//!     .run()?;
33//!
34//! // Emulator dispatch LUT (programmatic)
35//! chipi_core::LutBuilder::new("cpu.chipi")
36//!     .handler_mod("crate::cpu::interpreter")
37//!     .ctx_type("crate::Cpu")
38//!     .group("alu", ["addi", "addis"])
39//!     .build_lut("out/lut.rs")?;
40//!
41//! // Emulator dispatch LUT (from chipi.toml config)
42//! let cfg = chipi_core::config::load_config(Path::new("chipi.toml"))?;
43//! for target in &cfg.lut {
44//!     chipi_core::LutBuilder::run_target(target)?;
45//! }
46//! ```
47
48pub mod backend;
49pub mod codegen;
50pub mod codegen_binja;
51pub mod codegen_cpp;
52pub mod codegen_ida;
53pub mod codegen_python;
54pub mod config;
55pub mod error;
56pub mod format_parser;
57pub mod instr_gen;
58pub mod lut_gen;
59pub mod parser;
60pub mod tree;
61pub mod types;
62pub mod validate;
63
64use std::collections::HashMap;
65use std::fs;
66use std::path::Path;
67
68use error::Errors;
69use types::DecoderDef;
70
71/// Parse a `.chipi` file from a file path and return the decoder definition.
72///
73/// # Errors
74///
75/// Returns an error if the file cannot be read or parsed.
76///
77/// # Example
78///
79/// ```ignore
80/// let def = chipi::parse("thumb.chipi")?;
81/// ```
82pub fn parse(input: &str) -> Result<DecoderDef, Box<dyn std::error::Error>> {
83    let path = Path::new(input);
84    // Use include-aware parsing from file path
85    parser::parse_file(path).map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)
86}
87
88/// Parse source text directly without reading from a file.
89///
90/// # Arguments
91///
92/// * `source`: `.chipi` source code
93/// * `filename`: name used in error messages
94pub fn parse_str(source: &str, filename: &str) -> Result<DecoderDef, Vec<error::Error>> {
95    parser::parse(source, filename)
96}
97
98/// Validate a parsed definition and write generated Rust code to a file.
99///
100/// # Errors
101///
102/// Returns validation or I/O errors.
103pub fn emit(def: &DecoderDef, output: &str) -> Result<(), Box<dyn std::error::Error>> {
104    let validated = validate::validate(def)
105        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
106
107    let tree = tree::build_tree(&validated);
108    let code = codegen::generate_code(&validated, &tree, &HashMap::new(), &HashMap::new());
109
110    fs::write(output, code)?;
111    Ok(())
112}
113
114/// Full pipeline: parse a `.chipi` file and generate a Rust decoder.
115///
116/// # Example
117///
118/// ```ignore
119/// chipi::generate("thumb.chipi", "thumb_decoder.rs")?;
120/// ```
121pub fn generate(input: &str, output: &str) -> Result<(), Box<dyn std::error::Error>> {
122    let def = parse(input)?;
123    emit(&def, output)?;
124    Ok(())
125}
126
127/// Generate a function-pointer LUT from a `.chipi` spec file.
128///
129/// Produces a Rust source file containing:
130/// - `pub type Handler = fn(&mut Ctx, u32)`
131/// - Static dispatch tables (`_T0`, `_T1`, ...) indexed by opcode bit ranges
132/// - `pub fn dispatch(ctx: &mut Ctx, opcode: u32)`
133///
134/// `handler_mod` is the module path where handler functions live, e.g.
135/// `"crate::cpu::interpreter"`Each instruction `foo` in the spec must have
136/// a corresponding `pub fn foo(ctx: &mut Ctx, opcode: u32)` there.
137///
138/// `ctx_type` is the mutable context passed to every handler, e.g.
139/// `"crate::gekko::Gekko"`.
140///
141/// # Example (build.rs)
142///
143/// ```ignore
144/// chipi::generate_lut(
145///     "cpu.chipi",
146///     out_dir.join("cpu_lut.rs").to_str().unwrap(),
147///     "crate::cpu::interpreter",
148///     "crate::Cpu",
149/// )?;
150/// ```
151pub fn generate_lut(
152    input: &str,
153    output: &str,
154    handler_mod: &str,
155    ctx_type: &str,
156) -> Result<(), Box<dyn std::error::Error>> {
157    let def = parse(input)?;
158    let validated = validate::validate(&def)
159        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
160    let t = tree::build_tree(&validated);
161    let code = lut_gen::generate_lut_code(
162        &validated,
163        &t,
164        handler_mod,
165        ctx_type,
166        &HashMap::new(),
167        None,
168        None,
169        Dispatch::FnPtrLut,
170    );
171    fs::write(output, code)?;
172    Ok(())
173}
174
175/// Generate an instruction newtype with field accessor methods from a `.chipi` spec.
176///
177/// Collects all unique fields across all instructions and generates a
178/// `pub struct Name(pub u32)` with one `#[inline]` accessor method per field.
179///
180/// Fields with the same name but conflicting definitions (different bit ranges
181/// or types) generate separate accessors with bit range suffixes (e.g., `d_15_0`
182/// and `d_11_0`).
183///
184/// # Example
185///
186/// ```ignore
187/// chipi::generate_instr_type("cpu.chipi", "out/instruction.rs", "Instruction")?;
188/// ```
189///
190/// Then in your code:
191///
192/// ```ignore
193/// mod cpu {
194///     include!(concat!(env!("OUT_DIR"), "/instruction.rs"));
195/// }
196/// ```
197pub fn generate_instr_type(
198    input: &str,
199    output: &str,
200    struct_name: &str,
201) -> Result<(), Box<dyn std::error::Error>> {
202    let def = parse(input)?;
203    let validated = validate::validate(&def)
204        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
205    let (code, warnings) = instr_gen::generate_instr_type(&validated, struct_name);
206
207    // Print warnings to stderr (visible during cargo build)
208    for warning in &warnings {
209        eprintln!("warning: {}", warning);
210    }
211
212    fs::write(output, code)?;
213    Ok(())
214}
215
216/// Builder for generating a function-pointer LUT and handler stubs,
217/// with optional grouping of instructions under shared const-generic handlers.
218///
219/// Use this when you want multiple instructions to share one handler function
220/// via a `const OP: u32` generic parameter. See the crate documentation for
221/// the full pattern.
222///
223/// # Example (build.rs)
224///
225/// ```ignore
226/// chipi::LutBuilder::new("cpu.chipi")
227///     .handler_mod("crate::cpu::interpreter")
228///     .ctx_type("crate::Cpu")
229///     .lut_mod("crate::cpu::lut")
230///     .group("alu", ["addi", "addis", "ori", "oris"])
231///     .group("mem", ["lwz", "stw", "lbz", "stb"])
232///     .build_lut(out_dir.join("cpu_lut.rs").to_str().unwrap())?;
233///
234/// ```
235#[derive(Default)]
236pub struct LutBuilder {
237    input: String,
238    handler_mod: String,
239    ctx_type: String,
240    /// instruction name -> group fn name
241    instr_to_group: HashMap<String, String>,
242    /// group fn name -> instruction names (for stubs)
243    group_to_instrs: HashMap<String, Vec<String>>,
244    lut_mod: Option<String>,
245    /// Type of the second parameter of every handler (default: `u32`).
246    instr_type: Option<String>,
247    /// Expression to extract the raw `u32` from the instr local (default: `"instr.0"`
248    /// when `instr_type` is set, `"opcode"` otherwise).
249    raw_expr: Option<String>,
250    /// Dispatch strategy (default: `FnPtrLut`).
251    dispatch: Dispatch,
252    /// Sub-decoder groups: sub-decoder name -> { instr_name -> group_fn_name }
253    subdecoder_groups: HashMap<String, HashMap<String, String>>,
254    /// Sub-decoder instruction types: sub-decoder name -> Rust type path
255    subdecoder_instr_types: HashMap<String, String>,
256}
257
258impl LutBuilder {
259    /// Create a new builder targeting the given `.chipi` spec file.
260    pub fn new(input: impl Into<String>) -> Self {
261        Self {
262            input: input.into(),
263            ..Default::default()
264        }
265    }
266
267    /// Set the Rust module path where handler functions live (e.g. `"crate::cpu::interpreter"`).
268    pub fn handler_mod(mut self, m: impl Into<String>) -> Self {
269        self.handler_mod = m.into();
270        self
271    }
272
273    /// Set the mutable context type passed to every handler (e.g. `"crate::Cpu"`).
274    pub fn ctx_type(mut self, t: impl Into<String>) -> Self {
275        self.ctx_type = t.into();
276        self
277    }
278
279    /// Set the Rust module path where the generated `OP_*` constants live
280    /// (e.g. `"crate::cpu::lut"`). Required when using groups so that stubs
281    /// can `use {lut_mod}::*` to import the constants.
282    pub fn lut_mod(mut self, path: impl Into<String>) -> Self {
283        self.lut_mod = Some(path.into());
284        self
285    }
286
287    /// Override the type of the second parameter of every handler function.
288    ///
289    /// Defaults to `u32` (raw opcode word). Set to a wrapper type such as
290    /// `"crate::cpu::semantics::Instruction"` to have handlers receive a
291    /// richer type instead. You must also call [`Self::raw_expr`] to tell
292    /// chipi how to extract the underlying `u32` for table indexing.
293    pub fn instr_type(mut self, t: impl Into<String>) -> Self {
294        self.instr_type = Some(t.into());
295        self
296    }
297
298    /// Expression that yields a `u32` from the `instr` local inside a generated
299    /// dispatch function. Only meaningful when [`Self::instr_type`] is set.
300    ///
301    /// For a newtype `struct Instruction(pub u32)` this is `"instr.0"` (the default
302    /// when `instr_type` is set). For a struct with a `raw()` method use `"instr.raw()"`.
303    pub fn raw_expr(mut self, expr: impl Into<String>) -> Self {
304        self.raw_expr = Some(expr.into());
305        self
306    }
307
308    /// Set the dispatch strategy.
309    ///
310    /// - [`Dispatch::FnPtrLut`] (default): static `[Handler; N]` arrays with indirect
311    ///   calls. Each tree level gets its own table.
312    /// - [`Dispatch::JumpTable`]: a single `#[inline(always)]` function with nested
313    ///   match statements. The compiler can inline handler calls for zero-overhead
314    ///   dispatch when handlers are also `#[inline(always)]`.
315    pub fn dispatch(mut self, strategy: Dispatch) -> Self {
316        self.dispatch = strategy;
317        self
318    }
319
320    /// Register a group: `name` is the shared handler function name (e.g. `"alu"`),
321    /// `instrs` lists the instruction names that route to it.
322    ///
323    /// Each instruction in `instrs` will appear in the LUT as
324    /// `handler_mod::alu::<{ OP_INSTR }>` instead of `handler_mod::instr`.
325    /// The generated stub is `pub fn alu<const OP: u32>(...)` with a `match OP` body.
326    pub fn group(
327        mut self,
328        name: impl Into<String>,
329        instrs: impl IntoIterator<Item = impl Into<String>>,
330    ) -> Self {
331        let name = name.into();
332        let instrs: Vec<String> = instrs.into_iter().map(|s| s.into()).collect();
333        for instr in &instrs {
334            self.instr_to_group.insert(instr.clone(), name.clone());
335        }
336        self.group_to_instrs.insert(name, instrs);
337        self
338    }
339
340    /// Create a `LutBuilder` from a [`config::LutTarget`].
341    pub fn from_config(target: &config::LutTarget) -> Self {
342        let mut builder = Self::new(&target.input)
343            .handler_mod(&target.handler_mod)
344            .ctx_type(&target.ctx_type)
345            .dispatch(target.dispatch);
346
347        if let Some(ref lut_mod) = target.lut_mod {
348            builder = builder.lut_mod(lut_mod);
349        }
350        if let Some(ref instr_type) = target.instr_type {
351            builder = builder.instr_type(instr_type);
352        }
353        if let Some(ref raw_expr) = target.raw_expr {
354            builder = builder.raw_expr(raw_expr);
355        }
356        for (name, instrs) in &target.groups {
357            builder = builder.group(name, instrs.iter().map(|s| s.as_str()));
358        }
359        // Build sub-decoder groups: sd_name -> { instr_name -> group_fn_name }
360        for (sd_name, groups) in &target.subdecoder_groups {
361            let mut instr_to_group = HashMap::new();
362            for (group_name, instrs) in groups {
363                for instr in instrs {
364                    instr_to_group.insert(instr.clone(), group_name.clone());
365                }
366            }
367            builder
368                .subdecoder_groups
369                .insert(sd_name.clone(), instr_to_group);
370        }
371        for (sd_name, sd_type) in &target.subdecoder_instr_types {
372            builder
373                .subdecoder_instr_types
374                .insert(sd_name.clone(), sd_type.clone());
375        }
376        builder
377    }
378
379    /// Run all outputs defined in a [`config::LutTarget`].
380    ///
381    /// Generates the LUT file, and optionally the instruction type and stubs
382    /// if configured. Stubs are only generated if the target file does not exist.
383    pub fn run_target(target: &config::LutTarget) -> Result<(), Box<dyn std::error::Error>> {
384        let builder = Self::from_config(target);
385
386        builder.build_lut(&target.output)?;
387
388        if let Some(ref instr_output) = target.instr_type_output {
389            builder.build_instr_type(instr_output)?;
390        }
391
392        for (sd_name, sd_output) in &target.subdecoder_instr_type_outputs {
393            builder.build_subdecoder_instr_type(sd_name, sd_output)?;
394        }
395
396        Ok(())
397    }
398
399    /// Generate the LUT source file.
400    pub fn build_lut(&self, output: &str) -> Result<(), Box<dyn std::error::Error>> {
401        let def = parse(&self.input)?;
402        let validated = validate::validate(&def)
403            .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
404        let t = tree::build_tree(&validated);
405        let mut code = lut_gen::generate_lut_code(
406            &validated,
407            &t,
408            &self.handler_mod,
409            &self.ctx_type,
410            &self.instr_to_group,
411            self.instr_type.as_deref(),
412            self.raw_expr.as_deref(),
413            self.dispatch,
414        );
415
416        // Generate dispatch functions for sub-decoders that have groups configured
417        for sd in &validated.sub_decoders {
418            if let Some(groups) = self.subdecoder_groups.get(&sd.name) {
419                code.push('\n');
420                code.push_str(&lut_gen::generate_subdecoder_dispatch(
421                    &validated,
422                    sd,
423                    &self.handler_mod,
424                    &self.ctx_type,
425                    groups,
426                    self.subdecoder_instr_types
427                        .get(&sd.name)
428                        .map(|s| s.as_str()),
429                ));
430            }
431        }
432
433        fs::write(output, code)?;
434        Ok(())
435    }
436
437    /// Generate an instruction newtype with field accessor methods.
438    ///
439    /// Collects all unique fields from the spec and generates a
440    /// `pub struct Name(pub u32)` with one `#[inline]` accessor per field.
441    ///
442    /// The struct name is derived from the last path segment of `.instr_type()`
443    /// (e.g., `"crate::cpu::Instruction"` -> `"Instruction"`), or defaults to
444    /// `"Instruction"` if `.instr_type()` was not called.
445    ///
446    /// Fields with conflicting definitions across instructions generate separate
447    /// accessors with bit range suffixes (e.g., `d_15_0` and `d_11_0`).
448    ///
449    /// # Example
450    ///
451    /// ```ignore
452    /// chipi::LutBuilder::new("cpu.chipi")
453    ///     .instr_type("crate::cpu::Instruction")
454    ///     .build_instr_type(out_dir.join("instruction.rs").to_str().unwrap())?;
455    /// ```
456    pub fn build_instr_type(&self, output: &str) -> Result<(), Box<dyn std::error::Error>> {
457        let def = parse(&self.input)?;
458        let validated = validate::validate(&def)
459            .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
460
461        // Derive struct name from instr_type path or default to "Instruction"
462        let struct_name = self
463            .instr_type
464            .as_deref()
465            .and_then(|t| t.rsplit("::").next())
466            .unwrap_or("Instruction");
467
468        let (code, warnings) = instr_gen::generate_instr_type(&validated, struct_name);
469
470        // Print warnings to stderr (visible during cargo build)
471        for warning in &warnings {
472            eprintln!("cargo:warning={}", warning);
473        }
474
475        fs::write(output, code)?;
476        Ok(())
477    }
478
479    /// Generate an instruction newtype for a sub-decoder.
480    ///
481    /// Collects all unique fields from the sub-decoder's instructions and generates
482    /// a `pub struct Name(pub u8/u16/u32)` with accessor methods.
483    pub fn build_subdecoder_instr_type(
484        &self,
485        sd_name: &str,
486        output: &str,
487    ) -> Result<(), Box<dyn std::error::Error>> {
488        let def = parse(&self.input)?;
489        let validated = validate::validate(&def)
490            .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
491
492        let sd = validated
493            .sub_decoders
494            .iter()
495            .find(|sd| sd.name == sd_name)
496            .ok_or_else(|| format!("sub-decoder '{}' not found in spec", sd_name))?;
497
498        let (code, warnings) = instr_gen::generate_subdecoder_instr_type(sd, sd_name);
499
500        for warning in &warnings {
501            eprintln!("cargo:warning={}", warning);
502        }
503
504        fs::write(output, code)?;
505        Ok(())
506    }
507}
508
509/// Parse, validate, and generate code from source text. Returns the
510/// generated Rust code as a `String`.
511///
512/// # Errors
513///
514/// Returns parse or validation errors.
515pub fn generate_from_str(
516    source: &str,
517    filename: &str,
518) -> Result<String, Box<dyn std::error::Error>> {
519    let def = parser::parse(source, filename)
520        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
521
522    let validated = validate::validate(&def)
523        .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
524
525    let tree = tree::build_tree(&validated);
526    let code = codegen::generate_code(&validated, &tree, &HashMap::new(), &HashMap::new());
527
528    Ok(code)
529}
530
531pub use config::Dispatch;
532
533/// Builder for generating a decoder with type mappings and dispatch strategy control.
534///
535/// Use this when you need to map chipi type names to Rust wrapper types (replacing
536/// the removed `import`/`as` syntax) or control the dispatch strategy per decoder.
537///
538/// # Example (build.rs)
539///
540/// ```ignore
541/// chipi::CodegenBuilder::new("src/gcdsp.chipi")
542///     .type_map("reg5", "crate::dsp::DspReg")
543///     .decoder_dispatch("GcDsp", chipi::Dispatch::FnPtrLut)
544///     .decoder_dispatch("GcDspExt", chipi::Dispatch::JumpTable)
545///     .output("src/generated/gcdsp.rs")
546///     .run();
547/// ```
548#[derive(Default)]
549pub struct CodegenBuilder {
550    input: String,
551    type_maps: HashMap<String, String>,
552    dispatch_overrides: HashMap<String, Dispatch>,
553    output: Option<String>,
554}
555
556impl CodegenBuilder {
557    /// Create a new builder targeting the given `.chipi` spec file.
558    pub fn new(input: impl Into<String>) -> Self {
559        Self {
560            input: input.into(),
561            ..Default::default()
562        }
563    }
564
565    /// Map a chipi type name to a Rust type path.
566    ///
567    /// Fields declared with this type name in the `.chipi` file will use the
568    /// given Rust type in generated code. The codegen emits a `use` statement
569    /// for paths containing `::`.
570    ///
571    /// # Example
572    ///
573    /// ```ignore
574    /// .type_map("reg5", "crate::dsp::DspReg")
575    /// ```
576    pub fn type_map(mut self, chipi_type: &str, rust_path: &str) -> Self {
577        self.type_maps
578            .insert(chipi_type.to_string(), rust_path.to_string());
579        self
580    }
581
582    /// Set the dispatch strategy for a specific decoder or sub-decoder.
583    ///
584    /// Defaults: `JumpTable` for sub-decoders, decision tree for main decoders.
585    pub fn decoder_dispatch(mut self, decoder_name: &str, strategy: Dispatch) -> Self {
586        self.dispatch_overrides
587            .insert(decoder_name.to_string(), strategy);
588        self
589    }
590
591    /// Set the output file path.
592    pub fn output(mut self, path: &str) -> Self {
593        self.output = Some(path.to_string());
594        self
595    }
596
597    /// Run the full pipeline: parse, validate, and generate code.
598    pub fn run(&self) -> Result<(), Box<dyn std::error::Error>> {
599        let def = parse(&self.input)?;
600        let validated = validate::validate(&def)
601            .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
602
603        let tree = tree::build_tree(&validated);
604        let code =
605            codegen::generate_code(&validated, &tree, &self.type_maps, &self.dispatch_overrides);
606
607        if let Some(ref output) = self.output {
608            fs::write(output, code)?;
609        }
610
611        Ok(())
612    }
613}