chipi/lib.rs
1//! # chipi
2//!
3//! Generate instruction decoders and disassemblers from `.chipi` files.
4//!
5//! Write your CPU instruction encoding in a simple DSL, and chipi generates
6//! the Rust decoder and formatting code for you.
7//!
8//! ## Usage
9//!
10//! Add to `Cargo.toml`:
11//!
12//! ```toml
13//! [build-dependencies]
14//! chipi = "0.2.0"
15//! ```
16//!
17//! Create `build.rs`:
18//!
19//! ```ignore
20//! use std::env;
21//! use std::path::PathBuf;
22//!
23//! fn main() {
24//! let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
25//! chipi::generate("cpu.chipi", out_dir.join("cpu.rs").to_str().unwrap())
26//! .expect("failed to generate decoder");
27//! println!("cargo:rerun-if-changed=cpu.chipi");
28//! }
29//! ```
30//!
31//! Use the generated decoder:
32//!
33//! ```ignore
34//! mod cpu {
35//! include!(concat!(env!("OUT_DIR"), "/cpu.rs"));
36//! }
37//!
38//! match cpu::CpuInstruction::decode(raw) {
39//! Some(instr) => println!("{}", instr),
40//! None => println!("invalid instruction"),
41//! }
42//! ```
43//!
44//! ## Example .chipi file
45//!
46//! ```text
47//! decoder Cpu {
48//! width = 32
49//! bit_order = msb0
50//! }
51//!
52//! type simm16 = i32 { sign_extend(16) }
53//! type simm24 = i32 { sign_extend(24), shift_left(2) }
54//!
55//! bx [0:5]=010010 li:simm24[6:29] aa:bool[30] lk:bool[31]
56//! | "b{lk ? l}{aa ? a} {li:#x}"
57//!
58//! addi [0:5]=001110 rd:u8[6:10] ra:u8[11:15] simm:simm16[16:31]
59//! | ra == 0: "li {rd}, {simm}"
60//! | "addi {rd}, {ra}, {simm}"
61//! ```
62//!
63//! ## Syntax
64//!
65//! ### Decoder block
66//!
67//! ```text
68//! decoder Name {
69//! width = 32 # 8, 16, or 32 bits
70//! bit_order = msb0 # msb0 or lsb0
71//! max_units = 4 # optional: safety guard (validates bit ranges)
72//! }
73//! ```
74//!
75//! #### Variable-Length Instructions
76//!
77//! chipi automatically generates variable-length decoders when you use bit positions
78//! beyond `width-1`. Simply reference subsequent units in your bit ranges:
79//!
80//! ```text
81//! decoder Dsp {
82//! width = 16
83//! bit_order = msb0
84//! max_units = 2 # Optional safety check: ensures bits don't exceed 32 (width * max_units)
85//! }
86//!
87//! nop [0:15]=0000000000000000 # 1 unit (16 bits)
88//! lri [0:10]=00000010000 rd:u5[11:15] imm:u16[16:31] # 2 units (32 bits)
89//! ```
90//!
91//! Generated `decode` signature for `width = u16`:
92//! - Single-unit: `pub fn decode(opcode: u16) -> Option<Self>`
93//! - Variable-length: `pub fn decode(units: &[u16]) -> Option<(Self, usize)>`
94//!
95//! The variable-length decoder returns both the instruction and the number of units consumed.
96//!
97//! ### Instructions
98//!
99//! Each instruction is one line with a name, fixed bit patterns, and fields:
100//!
101//! ```text
102//! add [0:5]=011111 rd:u8[6:10] ra:u8[11:15]
103//! ```
104//!
105//! Fixed bits use `[range]=pattern`. Fields use `name:type[range]`.
106//!
107//! #### Wildcard Bits
108//!
109//! Use `?` in bit patterns for bits that can be any value:
110//!
111//! ```text
112//! # Match when bits [15:8] are 0x8c, bits [7:0] can be anything
113//! clr15 [15:0]=10001100????????
114//! | "CLR15"
115//!
116//! # Mix wildcards with specific bits
117//! nop [7:4]=0000 [3:0]=????
118//! | "nop"
119//! ```
120//!
121//! Wildcard bits are excluded from the matching mask, so instructions match
122//! regardless of the values in those positions. This is useful for reserved or
123//! architecturally undefined bits.
124//!
125//! #### Overlapping Patterns
126//!
127//! chipi supports overlapping instruction patterns where one pattern is a subset of another.
128//! More specific patterns (with more fixed bits) are checked first:
129//!
130//! ```text
131//! # Generic instruction - matches 0x1X (any value in bits 4-7)
132//! load [0:3]=0001 reg:u4[4:7]
133//! | "load r{reg}"
134//!
135//! # Specific instruction - matches only 0x1F
136//! load_max [0:3]=0001 [4:7]=1111
137//! | "load rmax"
138//! ```
139//!
140//! The decoder will check `load_max` first (all bits fixed), then fall back to `load`
141//! (bits 4-7 are wildcards). This works across all units in variable-length decoders.
142//!
143//! ### Types
144//!
145//! Builtin types:
146//! * `bool` (converts bit to true/false)
147//! * `u1` to `u7` (maps to u8)
148//! * `u8`, `u16`, `u32`
149//! * `i8`, `i16`, `i32`
150//!
151//! Custom types:
152//!
153//! ```text
154//! type simm = i32 { sign_extend(16) }
155//! type reg = u8 as Register
156//! ```
157//!
158//! Available transformations:
159//! * `sign_extend(n)` - sign extend from n bits
160//! * `zero_extend(n)` - zero extend from n bits
161//! * `shift_left(n)` - shift left by n bits
162//!
163//! Display format hints (controls how the field is printed in format strings):
164//! * `display(signed_hex)` - signed hex: `0x1A`, `-0x1A`, `0`
165//! * `display(hex)` - unsigned hex: `0x1A`, `0`
166//!
167//! ### Imports
168//!
169//! Import Rust types to wrap extracted values:
170//!
171//! ```text
172//! import crate::cpu::Register
173//! import std::num::Wrapping
174//! ```
175//!
176//! ### Format lines
177//!
178//! Format lines follow an instruction and define its disassembly output:
179//!
180//! ```text
181//! bx [0:5]=010010 li:simm24[6:29] aa:bool[30] lk:bool[31]
182//! | "b{lk ? l}{aa ? a} {li:#x}"
183//! ```
184//!
185//! Features:
186//! * `{field}` - insert field value, with optional format spec: `{field:#x}`
187//! * `{field ? text}` - emit `text` if nonzero, `{field ? yes : no}` for else
188//! * `{a + b * 4}` - inline arithmetic (`+`, `-`, `*`, `/`, `%`)
189//! * `{-field}` - unary negation
190//! * `{map_name(arg)}` - call a map lookup
191//! * `{rotate_right(val, amt)}` - builtin functions
192//! * Guards: `| ra == 0: "li {rd}, {simm}"` - conditional format selection
193//! * Guard arithmetic: `| sh == 32 - mb : "srwi ..."` - arithmetic in guard operands
194//!
195//! ### Maps
196//!
197//! Lookup tables for use in format strings:
198//!
199//! ```text
200//! map spr_name(spr) {
201//! 1 => "xer"
202//! 8 => "lr"
203//! 9 => "ctr"
204//! _ => "???"
205//! }
206//! ```
207//!
208//! ### Formatting trait
209//!
210//! chipi generates a `{Name}Format` trait with one method per instruction.
211//! Default implementations come from format lines. Override selectively:
212//!
213//! ```ignore
214//! struct MyFormat;
215//! impl cpu::CpuFormat for MyFormat {
216//! fn fmt_bx(li: i32, aa: bool, lk: bool,
217//! f: &mut std::fmt::Formatter) -> std::fmt::Result {
218//! write!(f, "BRANCH {:#x}", li)
219//! }
220//! }
221//!
222//! println!("{}", instr.display::<MyFormat>());
223//! ```
224//!
225//! ## API
226//!
227//! ```ignore
228//! // Parse and generate from file
229//! chipi::generate("cpu.chipi", "out.rs")?;
230//!
231//! // Generate from string
232//! let code = chipi::generate_from_str(source, "cpu.chipi")?;
233//!
234//! // Step by step
235//! let def = chipi::parse("cpu.chipi")?;
236//! chipi::emit(&def, "out.rs")?;
237//! ```
238
239pub mod codegen;
240pub mod error;
241pub mod format_parser;
242pub mod parser;
243pub mod tree;
244pub mod types;
245pub mod validate;
246
247use std::fs;
248use std::path::Path;
249
250use error::Errors;
251use types::DecoderDef;
252
253/// Parse a `.chipi` file from a file path and return the decoder definition.
254///
255/// # Errors
256///
257/// Returns an error if the file cannot be read or parsed.
258///
259/// # Example
260///
261/// ```ignore
262/// let def = chipi::parse("thumb.chipi")?;
263/// ```
264pub fn parse(input: &str) -> Result<DecoderDef, Box<dyn std::error::Error>> {
265 let path = Path::new(input);
266 let source = fs::read_to_string(path)?;
267 let filename = path
268 .file_name()
269 .and_then(|f| f.to_str())
270 .unwrap_or(input);
271
272 parser::parse(&source, filename).map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)
273}
274
275/// Parse source text directly without reading from a file.
276///
277/// # Arguments
278///
279/// * `source`: `.chipi` source code
280/// * `filename`: name used in error messages
281pub fn parse_str(source: &str, filename: &str) -> Result<DecoderDef, Vec<error::Error>> {
282 parser::parse(source, filename)
283}
284
285/// Validate a parsed definition and write generated Rust code to a file.
286///
287/// # Errors
288///
289/// Returns validation or I/O errors.
290pub fn emit(def: &DecoderDef, output: &str) -> Result<(), Box<dyn std::error::Error>> {
291 let validated = validate::validate(def)
292 .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
293
294 let tree = tree::build_tree(&validated);
295 let code = codegen::generate_code(&validated, &tree);
296
297 fs::write(output, code)?;
298 Ok(())
299}
300
301/// Full pipeline: parse a `.chipi` file and generate a Rust decoder.
302///
303/// # Example
304///
305/// ```ignore
306/// chipi::generate("thumb.chipi", "thumb_decoder.rs")?;
307/// ```
308pub fn generate(input: &str, output: &str) -> Result<(), Box<dyn std::error::Error>> {
309 let def = parse(input)?;
310 emit(&def, output)?;
311 Ok(())
312}
313
314/// Parse, validate, and generate code from source text. Returns the
315/// generated Rust code as a `String`.
316///
317/// # Errors
318///
319/// Returns parse or validation errors.
320pub fn generate_from_str(
321 source: &str,
322 filename: &str,
323) -> Result<String, Box<dyn std::error::Error>> {
324 let def = parser::parse(source, filename)
325 .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
326
327 let validated = validate::validate(&def)
328 .map_err(|errs| Box::new(Errors(errs)) as Box<dyn std::error::Error>)?;
329
330 let tree = tree::build_tree(&validated);
331 let code = codegen::generate_code(&validated, &tree);
332
333 Ok(code)
334}