Skip to main content

chipi_core/
config.rs

1//! Internal configuration types consumed by the code generation pipeline.
2//!
3//! These types are populated by the bindings frontend (`crate::bindings`).
4//! They are not user-facing. The user-facing format is `*.bindings.chipi`.
5
6use std::collections::HashMap;
7use std::path::Path;
8
9use crate::backend::{FlowConfig, OperandKind};
10
11/// A single decoder/disassembler code generation target.
12#[derive(Debug, Clone)]
13pub struct GenTarget {
14    /// Path to the input `.chipi` file.
15    pub input: String,
16
17    /// Target language backend. One of `"rust"`, `"cpp"`, `"ida"`, `"binja"`.
18    pub lang: String,
19
20    /// Output file path. Supports `$VAR` and `${VAR}` env var expansion.
21    pub output: String,
22
23    /// Run a language-appropriate formatter on the output when true.
24    pub format: bool,
25
26    /// Default dispatch strategy for all decoders and sub-decoders.
27    pub dispatch: Dispatch,
28
29    /// Per-decoder dispatch strategy overrides.
30    pub dispatch_overrides: HashMap<String, Dispatch>,
31
32    /// Map a chipi type name to a language-specific type path.
33    pub type_map: HashMap<String, String>,
34
35    /// Backend-specific options. Each backend reads only its own variant.
36    pub lang_options: LangOptions,
37}
38
39impl GenTarget {
40    pub fn new(
41        input: impl Into<String>,
42        lang: impl Into<String>,
43        output: impl Into<String>,
44    ) -> Self {
45        Self {
46            input: input.into(),
47            lang: lang.into(),
48            output: output.into(),
49            format: false,
50            dispatch: Dispatch::default(),
51            dispatch_overrides: HashMap::new(),
52            type_map: HashMap::new(),
53            lang_options: LangOptions::None,
54        }
55    }
56}
57
58/// A single emulator dispatch LUT generation target.
59#[derive(Debug, Clone)]
60pub struct LutTarget {
61    /// Path to the input `.chipi` file.
62    pub input: String,
63
64    /// Output file path for the LUT dispatch code.
65    pub output: String,
66
67    /// Rust module path where handler functions live.
68    pub handler_mod: String,
69
70    /// Mutable context type passed to every handler.
71    pub ctx_type: String,
72
73    /// Dispatch strategy.
74    pub dispatch: Dispatch,
75
76    /// Map a group name to its list of instruction names.
77    pub groups: HashMap<String, Vec<String>>,
78
79    /// Rust module path where the generated `OP_*` constants live.
80    pub lut_mod: Option<String>,
81
82    /// Override the type of the second handler parameter.
83    pub instr_type: Option<String>,
84
85    /// Expression to extract the raw integer from the instr local.
86    pub raw_expr: Option<String>,
87
88    /// Output file path for the instruction newtype with field accessors.
89    pub instr_type_output: Option<String>,
90
91    /// Map a sub-decoder name to its group map.
92    pub subdecoder_groups: HashMap<String, HashMap<String, Vec<String>>>,
93
94    /// Map a sub-decoder name to its instr-type output path.
95    pub subdecoder_instr_type_outputs: HashMap<String, String>,
96
97    /// Map a sub-decoder name to its instr-type Rust path.
98    pub subdecoder_instr_types: HashMap<String, String>,
99
100    /// Map a sub-decoder name to its dispatch strategy.
101    /// An empty map makes sub-decoders inherit from `dispatch`.
102    pub subdecoder_dispatch: HashMap<String, Dispatch>,
103
104    /// Path to the handler called for unmatched opcodes.
105    /// `None` falls back to a `todo!()` panic.
106    pub invalid_handler: Option<String>,
107
108    /// Map a sub-decoder name to its invalid handler path.
109    pub subdecoder_invalid_handlers: HashMap<String, String>,
110
111    /// Map a sub-decoder name to its handler module override.
112    pub subdecoder_handler_mods: HashMap<String, String>,
113
114    /// Extra const-generic arguments appended to every handler reference in
115    /// the generated LUT. Each entry becomes its own `{ ... }`-wrapped arg
116    /// so handlers with more than one const generic can be dispatched
117    /// without per-instantiation wrapper modules. Example: `["crate::sys::GC"]`
118    /// emits `handler::<{ OP_X }, { crate::sys::GC }>`.
119    pub handler_consts: Vec<String>,
120}
121
122/// Dispatch strategy for code generation.
123#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
124pub enum Dispatch {
125    /// `#[inline(always)]` match statement.
126    JumpTable,
127    /// Static function pointer lookup table per decision-tree branch.
128    #[default]
129    FnPtrLut,
130    /// Full-width function-pointer table indexed by raw decoder value.
131    FlatLut,
132    /// Full-width match with adjacent equal handlers compressed into ranges.
133    FlatMatch,
134}
135
136/// Backend-specific options.
137#[derive(Debug, Clone, Default)]
138pub enum LangOptions {
139    #[default]
140    None,
141    Cpp(CppOptions),
142    Ida(IdaOptions),
143    Binja(BinjaOptions),
144}
145
146impl LangOptions {
147    pub fn as_cpp(&self) -> Option<&CppOptions> {
148        match self {
149            LangOptions::Cpp(o) => Some(o),
150            _ => None,
151        }
152    }
153
154    pub fn as_ida(&self) -> Option<&IdaOptions> {
155        match self {
156            LangOptions::Ida(o) => Some(o),
157            _ => None,
158        }
159    }
160
161    pub fn as_binja(&self) -> Option<&BinjaOptions> {
162        match self {
163            LangOptions::Binja(o) => Some(o),
164            _ => None,
165        }
166    }
167}
168
169/// IDA processor module options.
170#[derive(Debug, Clone)]
171pub struct IdaOptions {
172    pub processor_name: String,
173    pub processor_long_name: String,
174    pub processor_id: u64,
175    pub register_names: Vec<String>,
176    pub segment_registers: Vec<String>,
177    pub address_size: u32,
178    /// Bytes per addressable unit. Word-addressed architectures use 2.
179    pub bytes_per_unit: u32,
180    pub flags: Vec<String>,
181    pub operand_types: HashMap<String, OperandKind>,
182    /// Map type alias names to display prefixes. Example: `"gpr"` -> `"r"`.
183    pub display_prefixes: HashMap<String, String>,
184    pub flow: FlowConfig,
185}
186
187/// Binary Ninja architecture plugin options.
188#[derive(Debug, Clone)]
189pub struct BinjaOptions {
190    pub architecture_name: String,
191    pub address_size: u32,
192    pub default_int_size: u32,
193    pub max_instr_length: u32,
194    pub endianness: String,
195    pub register_names: Vec<String>,
196    pub register_size: u32,
197    pub stack_pointer: Option<String>,
198    pub link_register: Option<String>,
199    pub bytes_per_unit: u32,
200    pub display_prefixes: HashMap<String, String>,
201    pub operand_types: HashMap<String, OperandKind>,
202    pub flow: FlowConfig,
203}
204
205/// C++ backend options.
206#[derive(Debug, Clone, Default)]
207pub struct CppOptions {
208    /// C++ namespace for generated code. Defaults to the decoder name in
209    /// snake_case.
210    pub namespace: Option<String>,
211    pub guard_style: CppGuardStyle,
212    /// Extra `#include` directives for user-provided type headers.
213    pub includes: Vec<String>,
214}
215
216#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
217pub enum CppGuardStyle {
218    #[default]
219    Pragma,
220    Ifndef,
221}
222
223/// Expand environment variables (`$VAR` or `${VAR}`) in a string.
224/// Unresolved variables are left as-is.
225pub fn expand_env(s: &str) -> String {
226    let mut result = String::with_capacity(s.len());
227    let mut chars = s.chars().peekable();
228    while let Some(c) = chars.next() {
229        if c == '$' {
230            let braced = chars.peek() == Some(&'{');
231            if braced {
232                chars.next();
233            }
234            let mut name = String::new();
235            if braced {
236                while let Some(&c) = chars.peek() {
237                    if c == '}' {
238                        chars.next();
239                        break;
240                    }
241                    name.push(c);
242                    chars.next();
243                }
244            } else {
245                while let Some(&c) = chars.peek() {
246                    if c.is_ascii_alphanumeric() || c == '_' {
247                        name.push(c);
248                        chars.next();
249                    } else {
250                        break;
251                    }
252                }
253            }
254            if let Ok(val) = std::env::var(&name) {
255                result.push_str(&val);
256            } else if braced {
257                result.push_str(&format!("${{{}}}", name));
258            } else {
259                result.push('$');
260                result.push_str(&name);
261            }
262        } else {
263            result.push(c);
264        }
265    }
266    result
267}
268
269/// Resolve a path. Expands env vars first. Joins relative paths to `base_dir`.
270pub fn resolve_path(path: &str, base_dir: &Path) -> String {
271    let expanded = expand_env(path);
272    let p = Path::new(&expanded);
273    if p.is_absolute() {
274        expanded
275    } else {
276        base_dir.join(&expanded).to_string_lossy().into_owned()
277    }
278}
279
280/// Resolve paths in a `GenTarget` relative to a base directory.
281pub fn resolve_gen_paths(target: &mut GenTarget, base_dir: &Path) {
282    target.input = resolve_path(&target.input, base_dir);
283    target.output = resolve_path(&target.output, base_dir);
284}
285
286/// Resolve paths in a `LutTarget` relative to a base directory.
287pub fn resolve_lut_paths(target: &mut LutTarget, base_dir: &Path) {
288    target.input = resolve_path(&target.input, base_dir);
289    target.output = resolve_path(&target.output, base_dir);
290    if let Some(ref mut p) = target.instr_type_output {
291        *p = resolve_path(p, base_dir);
292    }
293    for p in target.subdecoder_instr_type_outputs.values_mut() {
294        *p = resolve_path(p, base_dir);
295    }
296}