Skip to main content

chipi_core/
config.rs

1//! Internal configuration types consumed by the code generation pipeline.
2//!
3//! These types are populated by the bindings frontend (`crate::bindings`).
4//! They are not user-facing. The user-facing format is `*.bindings.chipi`.
5
6use std::collections::HashMap;
7use std::path::Path;
8
9use crate::backend::{FlowConfig, OperandKind};
10
11/// A single decoder/disassembler code generation target.
12#[derive(Debug, Clone)]
13pub struct GenTarget {
14    /// Path to the input `.chipi` file.
15    pub input: String,
16
17    /// Target language backend. One of `"rust"`, `"cpp"`, `"ida"`, `"binja"`.
18    pub lang: String,
19
20    /// Output file path. Supports `$VAR` and `${VAR}` env var expansion.
21    pub output: String,
22
23    /// Run a language-appropriate formatter on the output when true.
24    pub format: bool,
25
26    /// Default dispatch strategy for all decoders and sub-decoders.
27    pub dispatch: Dispatch,
28
29    /// Per-decoder dispatch strategy overrides.
30    pub dispatch_overrides: HashMap<String, Dispatch>,
31
32    /// Map a chipi type name to a language-specific type path.
33    pub type_map: HashMap<String, String>,
34
35    /// Backend-specific options. Each backend reads only its own variant.
36    pub lang_options: LangOptions,
37}
38
39impl GenTarget {
40    pub fn new(
41        input: impl Into<String>,
42        lang: impl Into<String>,
43        output: impl Into<String>,
44    ) -> Self {
45        Self {
46            input: input.into(),
47            lang: lang.into(),
48            output: output.into(),
49            format: false,
50            dispatch: Dispatch::default(),
51            dispatch_overrides: HashMap::new(),
52            type_map: HashMap::new(),
53            lang_options: LangOptions::None,
54        }
55    }
56}
57
58/// A single emulator dispatch LUT generation target.
59#[derive(Debug, Clone)]
60pub struct LutTarget {
61    /// Path to the input `.chipi` file.
62    pub input: String,
63
64    /// Output file path for the LUT dispatch code.
65    pub output: String,
66
67    /// Rust module path where handler functions live.
68    pub handler_mod: String,
69
70    /// Mutable context type passed to every handler.
71    pub ctx_type: String,
72
73    /// Dispatch strategy.
74    pub dispatch: Dispatch,
75
76    /// Map a group name to its list of instruction names.
77    pub groups: HashMap<String, Vec<String>>,
78
79    /// Rust module path where the generated `OP_*` constants live.
80    pub lut_mod: Option<String>,
81
82    /// Override the type of the second handler parameter.
83    pub instr_type: Option<String>,
84
85    /// Expression to extract the raw integer from the instr local.
86    pub raw_expr: Option<String>,
87
88    /// Output file path for the instruction newtype with field accessors.
89    pub instr_type_output: Option<String>,
90
91    /// Map a sub-decoder name to its group map.
92    pub subdecoder_groups: HashMap<String, HashMap<String, Vec<String>>>,
93
94    /// Map a sub-decoder name to its instr-type output path.
95    pub subdecoder_instr_type_outputs: HashMap<String, String>,
96
97    /// Map a sub-decoder name to its instr-type Rust path.
98    pub subdecoder_instr_types: HashMap<String, String>,
99
100    /// Map a sub-decoder name to its dispatch strategy.
101    /// An empty map makes sub-decoders inherit from `dispatch`.
102    pub subdecoder_dispatch: HashMap<String, Dispatch>,
103
104    /// Path to the handler called for unmatched opcodes.
105    /// `None` falls back to a `todo!()` panic.
106    pub invalid_handler: Option<String>,
107
108    /// Map a sub-decoder name to its invalid handler path.
109    pub subdecoder_invalid_handlers: HashMap<String, String>,
110
111    /// Map a sub-decoder name to its handler module override.
112    pub subdecoder_handler_mods: HashMap<String, String>,
113}
114
115/// Dispatch strategy for code generation.
116#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
117pub enum Dispatch {
118    /// `#[inline(always)]` match statement.
119    JumpTable,
120    /// Static function pointer lookup table per decision-tree branch.
121    #[default]
122    FnPtrLut,
123    /// Full-width function-pointer table indexed by raw decoder value.
124    FlatLut,
125    /// Full-width match with adjacent equal handlers compressed into ranges.
126    FlatMatch,
127}
128
129/// Backend-specific options.
130#[derive(Debug, Clone, Default)]
131pub enum LangOptions {
132    #[default]
133    None,
134    Cpp(CppOptions),
135    Ida(IdaOptions),
136    Binja(BinjaOptions),
137}
138
139impl LangOptions {
140    pub fn as_cpp(&self) -> Option<&CppOptions> {
141        match self {
142            LangOptions::Cpp(o) => Some(o),
143            _ => None,
144        }
145    }
146
147    pub fn as_ida(&self) -> Option<&IdaOptions> {
148        match self {
149            LangOptions::Ida(o) => Some(o),
150            _ => None,
151        }
152    }
153
154    pub fn as_binja(&self) -> Option<&BinjaOptions> {
155        match self {
156            LangOptions::Binja(o) => Some(o),
157            _ => None,
158        }
159    }
160}
161
162/// IDA processor module options.
163#[derive(Debug, Clone)]
164pub struct IdaOptions {
165    pub processor_name: String,
166    pub processor_long_name: String,
167    pub processor_id: u64,
168    pub register_names: Vec<String>,
169    pub segment_registers: Vec<String>,
170    pub address_size: u32,
171    /// Bytes per addressable unit. Word-addressed architectures use 2.
172    pub bytes_per_unit: u32,
173    pub flags: Vec<String>,
174    pub operand_types: HashMap<String, OperandKind>,
175    /// Map type alias names to display prefixes. Example: `"gpr"` -> `"r"`.
176    pub display_prefixes: HashMap<String, String>,
177    pub flow: FlowConfig,
178}
179
180/// Binary Ninja architecture plugin options.
181#[derive(Debug, Clone)]
182pub struct BinjaOptions {
183    pub architecture_name: String,
184    pub address_size: u32,
185    pub default_int_size: u32,
186    pub max_instr_length: u32,
187    pub endianness: String,
188    pub register_names: Vec<String>,
189    pub register_size: u32,
190    pub stack_pointer: Option<String>,
191    pub link_register: Option<String>,
192    pub bytes_per_unit: u32,
193    pub display_prefixes: HashMap<String, String>,
194    pub operand_types: HashMap<String, OperandKind>,
195    pub flow: FlowConfig,
196}
197
198/// C++ backend options.
199#[derive(Debug, Clone, Default)]
200pub struct CppOptions {
201    /// C++ namespace for generated code. Defaults to the decoder name in
202    /// snake_case.
203    pub namespace: Option<String>,
204    pub guard_style: CppGuardStyle,
205    /// Extra `#include` directives for user-provided type headers.
206    pub includes: Vec<String>,
207}
208
209#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
210pub enum CppGuardStyle {
211    #[default]
212    Pragma,
213    Ifndef,
214}
215
216/// Expand environment variables (`$VAR` or `${VAR}`) in a string.
217/// Unresolved variables are left as-is.
218pub fn expand_env(s: &str) -> String {
219    let mut result = String::with_capacity(s.len());
220    let mut chars = s.chars().peekable();
221    while let Some(c) = chars.next() {
222        if c == '$' {
223            let braced = chars.peek() == Some(&'{');
224            if braced {
225                chars.next();
226            }
227            let mut name = String::new();
228            if braced {
229                while let Some(&c) = chars.peek() {
230                    if c == '}' {
231                        chars.next();
232                        break;
233                    }
234                    name.push(c);
235                    chars.next();
236                }
237            } else {
238                while let Some(&c) = chars.peek() {
239                    if c.is_ascii_alphanumeric() || c == '_' {
240                        name.push(c);
241                        chars.next();
242                    } else {
243                        break;
244                    }
245                }
246            }
247            if let Ok(val) = std::env::var(&name) {
248                result.push_str(&val);
249            } else if braced {
250                result.push_str(&format!("${{{}}}", name));
251            } else {
252                result.push('$');
253                result.push_str(&name);
254            }
255        } else {
256            result.push(c);
257        }
258    }
259    result
260}
261
262/// Resolve a path. Expands env vars first. Joins relative paths to `base_dir`.
263pub fn resolve_path(path: &str, base_dir: &Path) -> String {
264    let expanded = expand_env(path);
265    let p = Path::new(&expanded);
266    if p.is_absolute() {
267        expanded
268    } else {
269        base_dir.join(&expanded).to_string_lossy().into_owned()
270    }
271}
272
273/// Resolve paths in a `GenTarget` relative to a base directory.
274pub fn resolve_gen_paths(target: &mut GenTarget, base_dir: &Path) {
275    target.input = resolve_path(&target.input, base_dir);
276    target.output = resolve_path(&target.output, base_dir);
277}
278
279/// Resolve paths in a `LutTarget` relative to a base directory.
280pub fn resolve_lut_paths(target: &mut LutTarget, base_dir: &Path) {
281    target.input = resolve_path(&target.input, base_dir);
282    target.output = resolve_path(&target.output, base_dir);
283    if let Some(ref mut p) = target.instr_type_output {
284        *p = resolve_path(p, base_dir);
285    }
286    for p in target.subdecoder_instr_type_outputs.values_mut() {
287        *p = resolve_path(p, base_dir);
288    }
289}