Skip to main content

synth_core/
backend.rs

1//! Backend trait and registry for multi-backend compilation
2//!
3//! Every compiler backend (ARM, aWsm, wasker, w2c2) implements the `Backend`
4//! trait, allowing the CLI and verification framework to treat them uniformly.
5
6use crate::target::TargetSpec;
7use crate::wasm_decoder::DecodedModule;
8use crate::wasm_op::WasmOp;
9use std::collections::HashMap;
10use thiserror::Error;
11
12/// Errors from backend compilation
13#[derive(Debug, Error)]
14pub enum BackendError {
15    #[error("compilation failed: {0}")]
16    CompilationFailed(String),
17
18    #[error("backend not available: {0}")]
19    NotAvailable(String),
20
21    #[error("unsupported configuration: {0}")]
22    UnsupportedConfig(String),
23
24    #[error("external tool error: {0}")]
25    ExternalToolError(String),
26}
27
28/// Memory-bounds safety strategy. Phase 1 of `docs/binary-safety-design.md` §3.1.
29///
30/// - `Mpu`/PMP: rely on hardware (ARM MPU or RV32 PMP) — no inline check.
31/// - `Software`: emit a `CMP/BHS Trap_Handler` (ARM) or `bgeu addr, mem_size, ebreak` (RV32)
32///   before every load/store.
33/// - `Mask`: emit `AND addr, addr, #(mem_size - 1)` — only valid when memory size
34///   is a power of two. Wraps on OOB rather than trapping (fuzz-profile semantics).
35/// - `None`: no bounds enforcement.
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
37pub enum SafetyBounds {
38    /// No bounds check (caller assumes the WASM module is trusted)
39    #[default]
40    None,
41    /// ARM MPU / RV32 PMP — hardware enforcement, no inline guard
42    Mpu,
43    /// Software CMP/BHS (ARM) or BGEU+EBREAK (RV32) per access
44    Software,
45    /// AND-mask, requires power-of-two memory size
46    Mask,
47}
48
49impl SafetyBounds {
50    /// Parse the `--safety-bounds` argument value.
51    pub fn parse(s: &str) -> std::result::Result<Self, String> {
52        match s {
53            "none" => Ok(SafetyBounds::None),
54            "mpu" | "pmp" => Ok(SafetyBounds::Mpu),
55            "software" | "soft" => Ok(SafetyBounds::Software),
56            "mask" | "masking" => Ok(SafetyBounds::Mask),
57            other => Err(format!(
58                "unknown --safety-bounds value '{}'; expected one of: none, mpu, software, mask",
59                other
60            )),
61        }
62    }
63
64    /// String form used in the safety manifest.
65    pub fn as_str(self) -> &'static str {
66        match self {
67            SafetyBounds::None => "none",
68            SafetyBounds::Mpu => "mpu",
69            SafetyBounds::Software => "software",
70            SafetyBounds::Mask => "mask",
71        }
72    }
73}
74
75/// Configuration for a compilation run
76#[derive(Debug, Clone)]
77pub struct CompileConfig {
78    /// Optimization level (0 = none, 1 = fast, 2 = default, 3 = aggressive)
79    pub opt_level: u8,
80    /// Target specification
81    pub target: TargetSpec,
82    /// Legacy: enable software bounds checking for memory operations.
83    /// Deprecated in favor of `safety_bounds`. When set, equivalent to
84    /// `SafetyBounds::Software`. Kept for backwards compatibility with
85    /// callers that haven't migrated yet.
86    pub bounds_check: bool,
87    /// Phase-1 unified safety-bounds knob. If `bounds_check` is `true` and
88    /// this is `None`, the legacy field wins (back-compat). If both are set,
89    /// `safety_bounds` wins.
90    pub safety_bounds: SafetyBounds,
91    /// Hardware profile name (e.g. "nrf52840", "stm32f407")
92    pub hardware: String,
93    /// Skip optimization passes (direct instruction selection)
94    pub no_optimize: bool,
95    /// Use Loom-compatible optimization preset
96    pub loom_compat: bool,
97    /// Number of imported functions (calls to indices below this use Meld dispatch)
98    pub num_imports: u32,
99    /// AAPCS integer-argument count per function, indexed by full WASM function
100    /// index (imports first, then locals). Lets `Call` marshal the right number
101    /// of operand-stack values into R0–R3 (issue #195). Empty = pass no args
102    /// (pre-#195 behaviour).
103    pub func_arg_counts: Vec<u32>,
104    /// AAPCS integer-argument count per function type, indexed by type index.
105    /// Used by `call_indirect` (issue #195).
106    pub type_arg_counts: Vec<u32>,
107    /// Produce relocatable (ET_REL) host-link output. When set, the backend
108    /// uses the direct instruction selector (`select_with_stack`) rather than
109    /// the optimized path: the optimizer materializes an *absolute* linear-
110    /// memory base (0x20000100) and does not preserve caller-saved registers
111    /// across calls, both wrong for a host-linked object where the linmem base
112    /// is supplied via `fp` at runtime and callees follow AAPCS. Imports are
113    /// also emitted as direct `func_N` BLs (resolved to the wasm field name)
114    /// instead of `__meld_dispatch_import`. (#197 — follow-up to #188/#171.)
115    pub relocatable: bool,
116
117    /// #237: emit wasm function-static data as a base-independent `.data`
118    /// section (`__synth_wasm_data`) addressed via MOVW/MOVT symbol relocations,
119    /// so a host-pointer drop-in (linmem base = 0 for native `*ptr` derefs)
120    /// doesn't mis-resolve the statics. Off by default — only the leaves'
121    /// base-relative `[R11+const]` path is used unless explicitly requested.
122    pub native_pointer_abi: bool,
123
124    /// #237: wasm linear-memory minimum size in bytes — the full static-data
125    /// extent (initialized `(data)` segments plus the zero-init/BSS region).
126    /// Under `native_pointer_abi`, a const memory address below this is a wasm
127    /// static → symbol-relative; any address beyond it is a runtime host pointer
128    /// → `[R11=0 + addr]`.
129    pub linear_memory_bytes: u32,
130}
131
132impl CompileConfig {
133    /// Resolve the effective safety-bounds setting, honouring the legacy
134    /// `bounds_check` field as a fallback. Used by backends to pick the
135    /// inline-check shape.
136    pub fn effective_safety_bounds(&self) -> SafetyBounds {
137        match (self.safety_bounds, self.bounds_check) {
138            (SafetyBounds::None, true) => SafetyBounds::Software,
139            (s, _) => s,
140        }
141    }
142}
143
144impl Default for CompileConfig {
145    fn default() -> Self {
146        Self {
147            opt_level: 2,
148            target: TargetSpec::cortex_m4(),
149            bounds_check: false,
150            safety_bounds: SafetyBounds::None,
151            hardware: String::new(),
152            no_optimize: false,
153            loom_compat: false,
154            num_imports: 0,
155            func_arg_counts: Vec::new(),
156            type_arg_counts: Vec::new(),
157            relocatable: false,
158            native_pointer_abi: false,
159            linear_memory_bytes: 0,
160        }
161    }
162}
163
164/// A relocation entry produced during compilation
165///
166/// Records that a BL instruction at `offset` bytes into the function's code
167/// targets an external symbol (e.g., `__meld_dispatch_import`). The linker
168/// resolves these when combining the Synth object with the Kiln bridge.
169#[derive(Debug, Clone, Copy, PartialEq, Eq)]
170pub enum RelocKind {
171    /// R_ARM_THM_CALL — a Thumb BL call site (the default; #167).
172    ThmCall,
173    /// R_ARM_MOVW_ABS_NC — the MOVW half of a symbol-relative address (#237).
174    MovwAbs,
175    /// R_ARM_MOVT_ABS — the MOVT half of a symbol-relative address (#237).
176    MovtAbs,
177}
178
179#[derive(Debug, Clone, PartialEq, Eq)]
180pub struct CodeRelocation {
181    /// Byte offset within the function's machine code where the reloc applies
182    pub offset: u32,
183    /// Target symbol name (e.g., "__meld_dispatch_import", "__synth_wasm_data")
184    pub symbol: String,
185    /// Which ARM relocation type to emit for this site.
186    pub kind: RelocKind,
187}
188
189/// A single compiled function
190#[derive(Debug, Clone)]
191pub struct CompiledFunction {
192    /// Function name (from WASM export or generated)
193    pub name: String,
194    /// Raw machine code bytes
195    pub code: Vec<u8>,
196    /// Original WASM ops (retained for verification)
197    pub wasm_ops: Vec<WasmOp>,
198    /// Relocations for external symbol references (BL to bridge functions)
199    pub relocations: Vec<CodeRelocation>,
200}
201
202/// Result of compiling a full module
203#[derive(Debug)]
204pub struct CompilationResult {
205    /// Compiled functions
206    pub functions: Vec<CompiledFunction>,
207    /// Complete ELF binary (if backend produces one directly)
208    pub elf: Option<Vec<u8>>,
209    /// Name of the backend that produced this result
210    pub backend_name: String,
211}
212
213/// What a backend can and cannot do
214#[derive(Debug, Clone)]
215pub struct BackendCapabilities {
216    /// Backend produces complete ELF files (external backends like aWsm)
217    pub produces_elf: bool,
218    /// Backend supports per-rule verification (only our custom ARM backend)
219    pub supports_rule_verification: bool,
220    /// Backend supports binary-level verification (all backends via disassembly)
221    pub supports_binary_verification: bool,
222    /// Backend is an external tool (not a library)
223    pub is_external: bool,
224}
225
226/// Trait that every compilation backend implements
227pub trait Backend: Send + Sync {
228    /// Human-readable backend name
229    fn name(&self) -> &str;
230
231    /// What this backend can do
232    fn capabilities(&self) -> BackendCapabilities;
233
234    /// Which targets this backend supports
235    fn supported_targets(&self) -> Vec<TargetSpec>;
236
237    /// Compile an entire decoded WASM module
238    fn compile_module(
239        &self,
240        module: &DecodedModule,
241        config: &CompileConfig,
242    ) -> std::result::Result<CompilationResult, BackendError>;
243
244    /// Compile a single function from WASM ops to machine code
245    fn compile_function(
246        &self,
247        name: &str,
248        ops: &[WasmOp],
249        config: &CompileConfig,
250    ) -> std::result::Result<CompiledFunction, BackendError>;
251
252    /// Check if this backend is available (external tools installed, etc.)
253    fn is_available(&self) -> bool;
254}
255
256/// Registry of available backends
257pub struct BackendRegistry {
258    backends: HashMap<String, Box<dyn Backend>>,
259}
260
261impl BackendRegistry {
262    pub fn new() -> Self {
263        Self {
264            backends: HashMap::new(),
265        }
266    }
267
268    /// Register a backend under its name
269    pub fn register(&mut self, backend: Box<dyn Backend>) {
270        let name = backend.name().to_string();
271        self.backends.insert(name, backend);
272    }
273
274    /// Get a backend by name
275    pub fn get(&self, name: &str) -> Option<&dyn Backend> {
276        self.backends.get(name).map(|b| b.as_ref())
277    }
278
279    /// List all registered backends
280    pub fn list(&self) -> Vec<&dyn Backend> {
281        self.backends.values().map(|b| b.as_ref()).collect()
282    }
283
284    /// List backends that are actually available (installed and working)
285    pub fn available(&self) -> Vec<&dyn Backend> {
286        self.backends
287            .values()
288            .filter(|b| b.is_available())
289            .map(|b| b.as_ref())
290            .collect()
291    }
292}
293
294impl Default for BackendRegistry {
295    fn default() -> Self {
296        Self::new()
297    }
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303
304    #[test]
305    fn test_registry_empty() {
306        let reg = BackendRegistry::new();
307        assert!(reg.list().is_empty());
308        assert!(reg.available().is_empty());
309        assert!(reg.get("arm").is_none());
310    }
311
312    #[test]
313    fn test_compile_config_default() {
314        let config = CompileConfig::default();
315        assert_eq!(config.opt_level, 2);
316        assert!(!config.bounds_check);
317        assert_eq!(config.safety_bounds, SafetyBounds::None);
318        assert!(!config.no_optimize);
319    }
320
321    #[test]
322    fn safety_bounds_parse_round_trip() {
323        for s in ["none", "mpu", "software", "mask"] {
324            let sb = SafetyBounds::parse(s).unwrap();
325            assert_eq!(sb.as_str(), s);
326        }
327        assert_eq!(SafetyBounds::parse("pmp").unwrap(), SafetyBounds::Mpu);
328        assert_eq!(SafetyBounds::parse("soft").unwrap(), SafetyBounds::Software);
329        assert!(SafetyBounds::parse("nonsense").is_err());
330    }
331
332    #[test]
333    fn effective_safety_bounds_legacy_promotes_to_software() {
334        let cfg = CompileConfig {
335            bounds_check: true,
336            ..Default::default()
337        };
338        assert_eq!(cfg.effective_safety_bounds(), SafetyBounds::Software);
339    }
340
341    #[test]
342    fn effective_safety_bounds_new_field_wins() {
343        let cfg = CompileConfig {
344            bounds_check: true,
345            safety_bounds: SafetyBounds::Mpu,
346            ..Default::default()
347        };
348        assert_eq!(cfg.effective_safety_bounds(), SafetyBounds::Mpu);
349    }
350}