synth-core 0.12.0

Core types, error handling, and backend trait for the Synth compiler
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
//! Backend trait and registry for multi-backend compilation
//!
//! Every compiler backend (ARM, aWsm, wasker, w2c2) implements the `Backend`
//! trait, allowing the CLI and verification framework to treat them uniformly.

use crate::target::TargetSpec;
use crate::wasm_decoder::DecodedModule;
use crate::wasm_op::WasmOp;
use std::collections::HashMap;
use thiserror::Error;

/// Errors from backend compilation
#[derive(Debug, Error)]
pub enum BackendError {
    #[error("compilation failed: {0}")]
    CompilationFailed(String),

    #[error("backend not available: {0}")]
    NotAvailable(String),

    #[error("unsupported configuration: {0}")]
    UnsupportedConfig(String),

    #[error("external tool error: {0}")]
    ExternalToolError(String),
}

/// Memory-bounds safety strategy. Phase 1 of `docs/binary-safety-design.md` ยง3.1.
///
/// - `Mpu`/PMP: rely on hardware (ARM MPU or RV32 PMP) โ€” no inline check.
/// - `Software`: emit a `CMP/BHS Trap_Handler` (ARM) or `bgeu addr, mem_size, ebreak` (RV32)
///   before every load/store.
/// - `Mask`: emit `AND addr, addr, #(mem_size - 1)` โ€” only valid when memory size
///   is a power of two. Wraps on OOB rather than trapping (fuzz-profile semantics).
/// - `None`: no bounds enforcement.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum SafetyBounds {
    /// No bounds check (caller assumes the WASM module is trusted)
    #[default]
    None,
    /// ARM MPU / RV32 PMP โ€” hardware enforcement, no inline guard
    Mpu,
    /// Software CMP/BHS (ARM) or BGEU+EBREAK (RV32) per access
    Software,
    /// AND-mask, requires power-of-two memory size
    Mask,
}

impl SafetyBounds {
    /// Parse the `--safety-bounds` argument value.
    pub fn parse(s: &str) -> std::result::Result<Self, String> {
        match s {
            "none" => Ok(SafetyBounds::None),
            "mpu" | "pmp" => Ok(SafetyBounds::Mpu),
            "software" | "soft" => Ok(SafetyBounds::Software),
            "mask" | "masking" => Ok(SafetyBounds::Mask),
            other => Err(format!(
                "unknown --safety-bounds value '{}'; expected one of: none, mpu, software, mask",
                other
            )),
        }
    }

    /// String form used in the safety manifest.
    pub fn as_str(self) -> &'static str {
        match self {
            SafetyBounds::None => "none",
            SafetyBounds::Mpu => "mpu",
            SafetyBounds::Software => "software",
            SafetyBounds::Mask => "mask",
        }
    }
}

/// Configuration for a compilation run
#[derive(Debug, Clone)]
pub struct CompileConfig {
    /// Optimization level (0 = none, 1 = fast, 2 = default, 3 = aggressive)
    pub opt_level: u8,
    /// Target specification
    pub target: TargetSpec,
    /// Legacy: enable software bounds checking for memory operations.
    /// Deprecated in favor of `safety_bounds`. When set, equivalent to
    /// `SafetyBounds::Software`. Kept for backwards compatibility with
    /// callers that haven't migrated yet.
    pub bounds_check: bool,
    /// Phase-1 unified safety-bounds knob. If `bounds_check` is `true` and
    /// this is `None`, the legacy field wins (back-compat). If both are set,
    /// `safety_bounds` wins.
    pub safety_bounds: SafetyBounds,
    /// Hardware profile name (e.g. "nrf52840", "stm32f407")
    pub hardware: String,
    /// Skip optimization passes (direct instruction selection)
    pub no_optimize: bool,
    /// Use Loom-compatible optimization preset
    pub loom_compat: bool,
    /// Number of imported functions (calls to indices below this use Meld dispatch)
    pub num_imports: u32,
    /// AAPCS integer-argument count per function, indexed by full WASM function
    /// index (imports first, then locals). Lets `Call` marshal the right number
    /// of operand-stack values into R0โ€“R3 (issue #195). Empty = pass no args
    /// (pre-#195 behaviour).
    pub func_arg_counts: Vec<u32>,
    /// AAPCS integer-argument count per function type, indexed by type index.
    /// Used by `call_indirect` (issue #195).
    pub type_arg_counts: Vec<u32>,
    /// Produce relocatable (ET_REL) host-link output. When set, the backend
    /// uses the direct instruction selector (`select_with_stack`) rather than
    /// the optimized path: the optimizer materializes an *absolute* linear-
    /// memory base (0x20000100) and does not preserve caller-saved registers
    /// across calls, both wrong for a host-linked object where the linmem base
    /// is supplied via `fp` at runtime and callees follow AAPCS. Imports are
    /// also emitted as direct `func_N` BLs (resolved to the wasm field name)
    /// instead of `__meld_dispatch_import`. (#197 โ€” follow-up to #188/#171.)
    pub relocatable: bool,

    /// #237: emit wasm function-static data as a base-independent `.data`
    /// section (`__synth_wasm_data`) addressed via MOVW/MOVT symbol relocations,
    /// so a host-pointer drop-in (linmem base = 0 for native `*ptr` derefs)
    /// doesn't mis-resolve the statics. Off by default โ€” only the leaves'
    /// base-relative `[R11+const]` path is used unless explicitly requested.
    pub native_pointer_abi: bool,

    /// #237: wasm linear-memory minimum size in bytes โ€” the full static-data
    /// extent (initialized `(data)` segments plus the zero-init/BSS region).
    /// Under `native_pointer_abi`, a const memory address below this is a wasm
    /// static โ†’ symbol-relative; any address beyond it is a runtime host pointer
    /// โ†’ `[R11=0 + addr]`.
    pub linear_memory_bytes: u32,

    /// #237: the wasm stack-pointer global as `(index, init_value)`, if the
    /// module has one. Under `native_pointer_abi` the backend register-promotes
    /// it: `global.get` materializes `__synth_wasm_data + init` (the real stack
    /// top) and the init value doubles as the static-data base that separates
    /// pointer consts (`>= init`) from frame-size scalars (`< init`).
    pub stack_pointer_global: Option<(u32, i32)>,
    /// #311: per-function (full index) / per-type "returns i64" โ€” the call
    /// lowering must tag i64 results as a register pair or the hi half is
    /// invisible to liveness.
    pub func_ret_i64: Vec<bool>,
    pub type_ret_i64: Vec<bool>,
    /// #359: declared parameter widths per *function* (full index, imports
    /// first): `func_params_i64[f][k]` is true when param `k` of function `f` is
    /// i64/f64. The AAPCS stack-argument path needs the *declared* widths
    /// (op-stream inference can't see an unused i64 param that still shifts the
    /// incoming-stack layout). The source of truth โ€” a per-function driver loop
    /// (`compile_module` / the CLI loop) indexes it by `func.index` and copies
    /// the slice into [`current_func_params_i64`] before each `compile_function`.
    /// Empty โ†’ every param assumed i32 (the legacy path; keeps every function
    /// with <=4 params, or all-i32 params, byte-identical).
    pub func_params_i64: Vec<Vec<bool>>,
    /// #359: declared parameter widths of the function CURRENTLY being compiled
    /// โ€” `current_func_params_i64[k]` is true when param `k` is i64/f64. Set per
    /// function (a cheap clone of the config) from [`func_params_i64`] by the
    /// driver loop, because `compile_function` is shared across backends and
    /// carries no function index. Empty โ†’ assume i32.
    pub current_func_params_i64: Vec<bool>,
}

impl CompileConfig {
    /// Resolve the effective safety-bounds setting, honouring the legacy
    /// `bounds_check` field as a fallback. Used by backends to pick the
    /// inline-check shape.
    pub fn effective_safety_bounds(&self) -> SafetyBounds {
        match (self.safety_bounds, self.bounds_check) {
            (SafetyBounds::None, true) => SafetyBounds::Software,
            (s, _) => s,
        }
    }
}

impl Default for CompileConfig {
    fn default() -> Self {
        Self {
            opt_level: 2,
            target: TargetSpec::cortex_m4(),
            bounds_check: false,
            safety_bounds: SafetyBounds::None,
            hardware: String::new(),
            no_optimize: false,
            loom_compat: false,
            num_imports: 0,
            func_arg_counts: Vec::new(),
            type_arg_counts: Vec::new(),
            relocatable: false,
            native_pointer_abi: false,
            linear_memory_bytes: 0,
            stack_pointer_global: None,
            func_ret_i64: Vec::new(),
            type_ret_i64: Vec::new(),
            func_params_i64: Vec::new(),
            current_func_params_i64: Vec::new(),
        }
    }
}

/// A relocation entry produced during compilation
///
/// Records that a BL instruction at `offset` bytes into the function's code
/// targets an external symbol (e.g., `__meld_dispatch_import`). The linker
/// resolves these when combining the Synth object with the Kiln bridge.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RelocKind {
    /// R_ARM_THM_CALL โ€” a Thumb BL call site (the default; #167).
    ThmCall,
    /// R_ARM_MOVW_ABS_NC โ€” the MOVW half of a symbol-relative address (#237).
    MovwAbs,
    /// R_ARM_MOVT_ABS โ€” the MOVT half of a symbol-relative address (#237).
    MovtAbs,
    /// R_ARM_ABS32 โ€” a 32-bit absolute address held in a `.text` literal-pool
    /// word, loaded via `LDR rX, [pc, #off]` (#345). The link-survivable
    /// replacement for the inline-immediate MOVW/MOVT-ABS pair: `ld`/bfd patches
    /// the data word at link time (`S + A`, the addend living in the word, REL
    /// semantics), which survives placement into a large multi-object image โ€”
    /// whereas an inline-instruction MOVW_ABS immediate can be mangled.
    Abs32,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CodeRelocation {
    /// Byte offset within the function's machine code where the reloc applies
    pub offset: u32,
    /// Target symbol name (e.g., "__meld_dispatch_import", "__synth_wasm_data")
    pub symbol: String,
    /// Which ARM relocation type to emit for this site.
    pub kind: RelocKind,
}

/// VCR-DBG-001: a per-instruction source map โ€” `(machine_offset_within_code,
/// wasm_op_index)` pairs, one per emitted machine instruction. A `None` op-index
/// marks an instruction with no originating wasm op (prologue/epilogue, literal
/// pool). Consumed by the DWARF `.debug_line` emitter; empty when no source map
/// was produced.
pub type LineMap = Vec<(u32, Option<usize>)>;

/// A single compiled function
#[derive(Debug, Clone)]
pub struct CompiledFunction {
    /// Function name (from WASM export or generated)
    pub name: String,
    /// Raw machine code bytes
    pub code: Vec<u8>,
    /// Original WASM ops (retained for verification)
    pub wasm_ops: Vec<WasmOp>,
    /// Relocations for external symbol references (BL to bridge functions)
    pub relocations: Vec<CodeRelocation>,
    /// VCR-DBG-001: per-instruction source map for DWARF `.debug_line` emission โ€”
    /// `(machine_offset_within_code, wasm_op_index)` captured at encode time, one
    /// entry per emitted machine instruction. A `None` op-index marks an
    /// instruction with no originating wasm op (prologue/epilogue, literal-pool
    /// word). This is purely additive metadata: it is never serialized unless
    /// `.debug_line` emission is requested, so the emitted `.text` is
    /// byte-identical with or without it. Empty for backends/paths that do not
    /// yet produce a source map (RISC-V, the optimized ARM path).
    pub line_map: LineMap,
}

/// Result of compiling a full module
#[derive(Debug)]
pub struct CompilationResult {
    /// Compiled functions
    pub functions: Vec<CompiledFunction>,
    /// Complete ELF binary (if backend produces one directly)
    pub elf: Option<Vec<u8>>,
    /// Name of the backend that produced this result
    pub backend_name: String,
}

/// What a backend can and cannot do
#[derive(Debug, Clone)]
pub struct BackendCapabilities {
    /// Backend produces complete ELF files (external backends like aWsm)
    pub produces_elf: bool,
    /// Backend supports per-rule verification (only our custom ARM backend)
    pub supports_rule_verification: bool,
    /// Backend supports binary-level verification (all backends via disassembly)
    pub supports_binary_verification: bool,
    /// Backend is an external tool (not a library)
    pub is_external: bool,
}

/// Trait that every compilation backend implements
pub trait Backend: Send + Sync {
    /// Human-readable backend name
    fn name(&self) -> &str;

    /// What this backend can do
    fn capabilities(&self) -> BackendCapabilities;

    /// Which targets this backend supports
    fn supported_targets(&self) -> Vec<TargetSpec>;

    /// Compile an entire decoded WASM module
    fn compile_module(
        &self,
        module: &DecodedModule,
        config: &CompileConfig,
    ) -> std::result::Result<CompilationResult, BackendError>;

    /// Compile a single function from WASM ops to machine code
    fn compile_function(
        &self,
        name: &str,
        ops: &[WasmOp],
        config: &CompileConfig,
    ) -> std::result::Result<CompiledFunction, BackendError>;

    /// Check if this backend is available (external tools installed, etc.)
    fn is_available(&self) -> bool;
}

/// Registry of available backends
pub struct BackendRegistry {
    backends: HashMap<String, Box<dyn Backend>>,
}

impl BackendRegistry {
    pub fn new() -> Self {
        Self {
            backends: HashMap::new(),
        }
    }

    /// Register a backend under its name
    pub fn register(&mut self, backend: Box<dyn Backend>) {
        let name = backend.name().to_string();
        self.backends.insert(name, backend);
    }

    /// Get a backend by name
    pub fn get(&self, name: &str) -> Option<&dyn Backend> {
        self.backends.get(name).map(|b| b.as_ref())
    }

    /// List all registered backends
    pub fn list(&self) -> Vec<&dyn Backend> {
        self.backends.values().map(|b| b.as_ref()).collect()
    }

    /// List backends that are actually available (installed and working)
    pub fn available(&self) -> Vec<&dyn Backend> {
        self.backends
            .values()
            .filter(|b| b.is_available())
            .map(|b| b.as_ref())
            .collect()
    }
}

impl Default for BackendRegistry {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_registry_empty() {
        let reg = BackendRegistry::new();
        assert!(reg.list().is_empty());
        assert!(reg.available().is_empty());
        assert!(reg.get("arm").is_none());
    }

    #[test]
    fn test_compile_config_default() {
        let config = CompileConfig::default();
        assert_eq!(config.opt_level, 2);
        assert!(!config.bounds_check);
        assert_eq!(config.safety_bounds, SafetyBounds::None);
        assert!(!config.no_optimize);
    }

    #[test]
    fn safety_bounds_parse_round_trip() {
        for s in ["none", "mpu", "software", "mask"] {
            let sb = SafetyBounds::parse(s).unwrap();
            assert_eq!(sb.as_str(), s);
        }
        assert_eq!(SafetyBounds::parse("pmp").unwrap(), SafetyBounds::Mpu);
        assert_eq!(SafetyBounds::parse("soft").unwrap(), SafetyBounds::Software);
        assert!(SafetyBounds::parse("nonsense").is_err());
    }

    #[test]
    fn effective_safety_bounds_legacy_promotes_to_software() {
        let cfg = CompileConfig {
            bounds_check: true,
            ..Default::default()
        };
        assert_eq!(cfg.effective_safety_bounds(), SafetyBounds::Software);
    }

    #[test]
    fn effective_safety_bounds_new_field_wins() {
        let cfg = CompileConfig {
            bounds_check: true,
            safety_bounds: SafetyBounds::Mpu,
            ..Default::default()
        };
        assert_eq!(cfg.effective_safety_bounds(), SafetyBounds::Mpu);
    }
}