Skip to main content

synth_backend/
arm_backend.rs

1//! ARM Backend — wraps the instruction selector + optimizer + encoder as a Backend
2//!
3//! This is Synth's custom ARM compiler targeting Cortex-M (Thumb-2).
4//! It's the only backend that supports per-rule formal verification (ASIL D path).
5
6use crate::ArmEncoder;
7use synth_core::backend::{
8    Backend, BackendCapabilities, BackendError, CodeRelocation, CompilationResult, CompileConfig,
9    CompiledFunction, SafetyBounds,
10};
11use synth_core::target::{IsaVariant, TargetSpec};
12use synth_core::wasm_decoder::DecodedModule;
13use synth_core::wasm_op::WasmOp;
14use synth_synthesis::{
15    ArmInstruction, ArmOp, BoundsCheckConfig, InstructionSelector, OptimizationConfig,
16    OptimizerBridge, RuleDatabase, validate_instructions,
17};
18
19/// ARM Cortex-M backend using Synth's custom compiler pipeline
20pub struct ArmBackend;
21
22impl ArmBackend {
23    pub fn new() -> Self {
24        Self
25    }
26}
27
28impl Default for ArmBackend {
29    fn default() -> Self {
30        Self::new()
31    }
32}
33
34impl Backend for ArmBackend {
35    fn name(&self) -> &str {
36        "arm"
37    }
38
39    fn capabilities(&self) -> BackendCapabilities {
40        BackendCapabilities {
41            produces_elf: false,
42            supports_rule_verification: true,
43            supports_binary_verification: true,
44            is_external: false,
45        }
46    }
47
48    fn supported_targets(&self) -> Vec<TargetSpec> {
49        vec![
50            TargetSpec::cortex_m3(),
51            TargetSpec::cortex_m4(),
52            TargetSpec::cortex_m4f(),
53            TargetSpec::cortex_m7(),
54            TargetSpec::cortex_m7dp(),
55        ]
56    }
57
58    fn compile_module(
59        &self,
60        module: &DecodedModule,
61        config: &CompileConfig,
62    ) -> Result<CompilationResult, BackendError> {
63        let exports: Vec<_> = module
64            .functions
65            .iter()
66            .filter(|f| f.export_name.is_some())
67            .collect();
68
69        if exports.is_empty() {
70            return Err(BackendError::CompilationFailed(
71                "no exported functions found".into(),
72            ));
73        }
74
75        let mut functions = Vec::new();
76        for func in &exports {
77            let name = func.export_name.clone().unwrap();
78            let compiled = self.compile_function(&name, &func.ops, config)?;
79            functions.push(compiled);
80        }
81
82        Ok(CompilationResult {
83            functions,
84            elf: None,
85            backend_name: self.name().to_string(),
86        })
87    }
88
89    fn compile_function(
90        &self,
91        name: &str,
92        ops: &[WasmOp],
93        config: &CompileConfig,
94    ) -> Result<CompiledFunction, BackendError> {
95        let (code, relocations) =
96            compile_wasm_to_arm(ops, config).map_err(BackendError::CompilationFailed)?;
97
98        Ok(CompiledFunction {
99            name: name.to_string(),
100            code,
101            wasm_ops: ops.to_vec(),
102            relocations,
103        })
104    }
105
106    fn is_available(&self) -> bool {
107        true // Always available — it's a library backend
108    }
109}
110
111/// Count the number of function parameters by analyzing LocalGet patterns
112fn count_params(wasm_ops: &[WasmOp]) -> u32 {
113    let mut first_access: std::collections::HashMap<u32, bool> = std::collections::HashMap::new();
114    for op in wasm_ops {
115        match op {
116            WasmOp::LocalGet(idx) => {
117                first_access.entry(*idx).or_insert(true);
118            }
119            WasmOp::LocalSet(idx) | WasmOp::LocalTee(idx) => {
120                first_access.entry(*idx).or_insert(false);
121            }
122            _ => {}
123        }
124    }
125
126    first_access
127        .iter()
128        .filter_map(
129            |(&idx, &is_read_first)| {
130                if is_read_first { Some(idx + 1) } else { None }
131            },
132        )
133        .max()
134        .unwrap_or(0)
135}
136
137/// Core compilation: WASM ops → ARM machine code bytes + relocations
138///
139/// Returns (code_bytes, relocations) where relocations record BL instructions
140/// that target external symbols (e.g., `__meld_dispatch_import` for import calls).
141fn compile_wasm_to_arm(
142    wasm_ops: &[WasmOp],
143    config: &CompileConfig,
144) -> Result<(Vec<u8>, Vec<CodeRelocation>), String> {
145    let num_params = count_params(wasm_ops);
146
147    let bounds_config = match config.effective_safety_bounds() {
148        SafetyBounds::None => BoundsCheckConfig::None,
149        SafetyBounds::Mpu => BoundsCheckConfig::Mpu,
150        SafetyBounds::Software => BoundsCheckConfig::Software,
151        SafetyBounds::Mask => BoundsCheckConfig::Masking,
152    };
153
154    // The non-optimized (direct) instruction-selection path. Handles f32 via
155    // VFP/FPU. Used directly when `--no-optimize` is set, and as the fallback
156    // when the optimized path declines a module (see issue #120 below).
157    let select_direct = || -> Result<Vec<ArmInstruction>, String> {
158        let db = RuleDatabase::with_standard_rules();
159        let mut selector =
160            InstructionSelector::with_bounds_check(db.rules().to_vec(), bounds_config);
161        selector.set_target(config.target.fpu, &config.target.triple);
162        if config.num_imports > 0 {
163            selector.set_num_imports(config.num_imports);
164        }
165        // #195: plumb the callee argument-count tables so the direct selector can
166        // marshal call arguments into R0–R3 per AAPCS.
167        selector.set_func_arg_counts(
168            config.func_arg_counts.clone(),
169            config.type_arg_counts.clone(),
170        );
171        // #197: in relocatable host-link mode, emit direct `func_N` BLs for
172        // imports (rewritten to the wasm field name by build_relocatable_elf)
173        // instead of `__meld_dispatch_import`.
174        selector.set_relocatable(config.relocatable);
175        // #237: native-pointer ABI — wasm statics become __synth_wasm_data-relative.
176        selector.set_native_pointer_abi(config.native_pointer_abi, config.linear_memory_bytes);
177        // Stack-pointer promotion is meaningful only under the native-pointer ABI;
178        // gating here keeps every non-native compile (all frozen fixtures) on the
179        // legacy R9 globals-table path, bit-identical.
180        if config.native_pointer_abi
181            && let Some((sp_idx, sp_init)) = config.stack_pointer_global
182        {
183            selector.set_native_pointer_stack(sp_idx, sp_init);
184        }
185        selector
186            .select_with_stack(wasm_ops, num_params)
187            .map_err(|e| format!("instruction selection failed: {}", e))
188    };
189
190    // Instruction selection: optimized or direct.
191    //
192    // #197: `--relocatable` (host-link ET_REL) forces the direct selector. The
193    // optimized path materializes an absolute linmem base (0x20000100) and does
194    // not preserve caller-saved registers across calls — both wrong for a
195    // host-linked object, where the linmem base arrives via `fp` at runtime and
196    // callees follow AAPCS. `select_with_stack` (now i64-spill capable after
197    // #171) handles fp-relative memory + caller-saved preservation correctly.
198    let arm_instrs = if config.no_optimize || config.relocatable {
199        select_direct()?
200    } else {
201        let opt_config = if config.loom_compat {
202            OptimizationConfig::loom_compat()
203        } else {
204            OptimizationConfig::all()
205        };
206
207        let mut bridge = OptimizerBridge::with_config(opt_config);
208        // #188: tell the bridge how many imports there are so it declines only
209        // LOCAL calls (and leaves import calls on the optimized path, keeping
210        // the #173 field-name relocation rewrite intact).
211        bridge.set_num_imports(config.num_imports);
212        // `ir_to_arm` now returns `Result` — an `Err` means the optimized path
213        // hit an unmapped vreg (issue-#93-class). Treat it identically to an
214        // `optimize_full` failure: fall back to the direct selector rather
215        // than propagating, so the function still compiles correctly.
216        match bridge
217            .optimize_full(wasm_ops)
218            .and_then(|(opt_ir, _cfg, _stats)| bridge.ir_to_arm(&opt_ir, num_params as usize))
219        {
220            Ok(arm_ops) => arm_ops
221                .into_iter()
222                .map(|op| ArmInstruction {
223                    op,
224                    source_line: None,
225                })
226                .collect(),
227            // Issue #120: the optimized path declines modules it cannot lower
228            // (notably scalar f32/f64 ops — the IR has no float opcodes). Fall
229            // back to the direct instruction selector, which handles f32 via
230            // VFP/FPU. This is honest degradation: the function still compiles
231            // correctly, just without IR-level optimization.
232            Err(_) => select_direct()?,
233        }
234    };
235
236    // #257: fuse `mul` + `add` into `mla`. Runs on the selected stream *before*
237    // branch resolution (it removes instructions, shifting byte offsets) — and is
238    // sound across control flow (the fusion only fires when the mul result is read
239    // solely by the add; see `fuse_mul_add`). A no-op for streams with no fusable
240    // pattern, so existing output stays bit-identical unless a `mul;…;add` pair
241    // qualifies.
242    let (arm_instrs, _fused) = synth_synthesis::liveness::fuse_mul_add(&arm_instrs);
243
244    // ISA feature gate: validate that all generated instructions are supported
245    // by the target. This catches FPU instructions on no-FPU targets, double-precision
246    // instructions on single-precision targets, etc.
247    validate_instructions(&arm_instrs, config.target.fpu, &config.target.triple)
248        .map_err(|e| format!("ISA validation failed: {}", e))?;
249
250    // Encode to binary — use Thumb-2 for Cortex-M targets
251    let use_thumb2 = matches!(config.target.isa, IsaVariant::Thumb2 | IsaVariant::Thumb);
252
253    let encoder = if use_thumb2 {
254        ArmEncoder::new_thumb2_with_fpu(config.target.fpu)
255    } else {
256        ArmEncoder::new_arm32()
257    };
258
259    // #202: resolve local label branches (Bcc/B/Bhs/Blo) to byte-accurate
260    // offsets before encoding. `select_with_stack` emits them as label
261    // placeholders and never resolves them — without this they encode as
262    // `bne.n #0` and land mid-instruction whenever a 32-bit Thumb-2 instruction
263    // sits between the branch and its target (UsageFault on real hardware).
264    // Only meaningful for Thumb-2 (the offset units are halfword/PC+4).
265    let arm_instrs = if use_thumb2 {
266        resolve_label_branches(arm_instrs, &encoder)?
267    } else {
268        arm_instrs
269    };
270
271    let mut code = Vec::new();
272    let mut relocations = Vec::new();
273
274    for instr in &arm_instrs {
275        // Record a relocation for every BL: the encoder emits `bl #0` and
276        // relies on a relocation to patch the target. This covers BOTH import
277        // dispatch stubs (`__meld_*`, undefined externals) AND internal calls
278        // (`func_N`, defined in this object). Previously only `__meld_*` was
279        // recorded, so internal `BL func_N` calls were left as unpatched
280        // `bl #0` placeholders branching to a garbage address (#167).
281        if let ArmOp::Bl { label } = &instr.op {
282            relocations.push(CodeRelocation {
283                offset: code.len() as u32,
284                symbol: label.clone(),
285                kind: synth_core::backend::RelocKind::ThmCall,
286            });
287        }
288        // #237: symbol-relative MOVW/MOVT (the `--native-pointer-abi` static-data
289        // addressing). The encoder writes the addend in place; record the matching
290        // R_ARM_MOVW_ABS_NC / R_ARM_MOVT_ABS so the linker adds the symbol address.
291        if let ArmOp::MovwSym { symbol, .. } = &instr.op {
292            relocations.push(CodeRelocation {
293                offset: code.len() as u32,
294                symbol: symbol.clone(),
295                kind: synth_core::backend::RelocKind::MovwAbs,
296            });
297        }
298        if let ArmOp::MovtSym { symbol, .. } = &instr.op {
299            relocations.push(CodeRelocation {
300                offset: code.len() as u32,
301                symbol: symbol.clone(),
302                kind: synth_core::backend::RelocKind::MovtAbs,
303            });
304        }
305
306        let encoded = encoder
307            .encode(&instr.op)
308            .map_err(|e| format!("ARM encoding failed: {}", e))?;
309        code.extend_from_slice(&encoded);
310    }
311
312    Ok((code, relocations))
313}
314
315/// Resolve local label branches to byte-accurate offsets (#202).
316///
317/// `select_with_stack` emits conditional/unconditional branches as label
318/// placeholders (`Bcc`/`B`/`Bhs`/`Blo` + `Label`) and never resolves them; the
319/// encoder then emits a `0xD000`/`0xE000` placeholder with offset 0. Before #197
320/// this path only ran for `--no-optimize`/declined functions, so the latent bug
321/// stayed hidden — routing relocatable code through it surfaced branches that
322/// land mid-instruction (a Cortex-M UsageFault) whenever a 32-bit Thumb-2
323/// instruction sits between the branch and its target.
324///
325/// This pass encodes each instruction to learn its real byte length (so 16- vs
326/// 32-bit forms and multi-instruction expansions are exact), maps each `Label`
327/// to its byte position, and rewrites every label branch to the displacement
328/// the encoder consumes: `(target - branch - 4) / 2` halfwords. A bounded
329/// fixed-point handles an offset growing a branch from 16- to 32-bit (which
330/// shifts later positions). `BCondOffset`/`BOffset` already produced inline by
331/// the optimized path carry no label and are left untouched.
332fn resolve_label_branches(
333    arm_instrs: Vec<ArmInstruction>,
334    encoder: &ArmEncoder,
335) -> Result<Vec<ArmInstruction>, String> {
336    use std::collections::HashMap;
337    use synth_synthesis::Condition;
338
339    enum BKind {
340        Cond(Condition),
341        Uncond,
342    }
343    // Record each label branch ONCE — indices are stable across iterations.
344    let mut branches: Vec<(usize, BKind, String)> = Vec::new();
345    for (i, instr) in arm_instrs.iter().enumerate() {
346        match &instr.op {
347            ArmOp::Bcc { cond, label } => branches.push((i, BKind::Cond(*cond), label.clone())),
348            ArmOp::Bhs { label } => branches.push((i, BKind::Cond(Condition::HS), label.clone())),
349            ArmOp::Blo { label } => branches.push((i, BKind::Cond(Condition::LO), label.clone())),
350            ArmOp::B { label } => branches.push((i, BKind::Uncond, label.clone())),
351            _ => {}
352        }
353    }
354    if branches.is_empty() {
355        return Ok(arm_instrs);
356    }
357
358    let mut resolved = arm_instrs;
359    // Sizes only grow (16→32-bit), so this converges quickly; cap for safety.
360    for _ in 0..16 {
361        // 1. Byte position of each instruction (Label encodes to 0 bytes).
362        let mut positions = Vec::with_capacity(resolved.len());
363        let mut pos: i64 = 0;
364        for instr in &resolved {
365            positions.push(pos);
366            pos += encoder
367                .encode(&instr.op)
368                .map_err(|e| format!("branch-resolve size probe failed: {}", e))?
369                .len() as i64;
370        }
371        // 2. Label name -> byte position (owned keys so the borrow ends here).
372        let mut labels: HashMap<String, i64> = HashMap::new();
373        for (i, instr) in resolved.iter().enumerate() {
374            if let ArmOp::Label { name } = &instr.op {
375                labels.insert(name.clone(), positions[i]);
376            }
377        }
378        // 3. Rewrite each branch to its byte-accurate offset.
379        let mut changed = false;
380        for (idx, kind, label) in &branches {
381            // A label not defined locally is an EXTERNAL target (e.g.
382            // `Trap_Handler` resolved by a relocation / the vector table). Leave
383            // such branches as their placeholder for the existing relocation
384            // path — only local control-flow labels are byte-resolved here.
385            let Some(&target) = labels.get(label) else {
386                continue;
387            };
388            // Encoder consumes the field as (target - branch - 4) / 2 halfwords.
389            // Positions are always even, so this division is exact.
390            let halfword_offset = ((target - positions[*idx] - 4) / 2) as i32;
391            let new_op = match kind {
392                BKind::Cond(c) => ArmOp::BCondOffset {
393                    cond: *c,
394                    offset: halfword_offset,
395                },
396                BKind::Uncond => ArmOp::BOffset {
397                    offset: halfword_offset,
398                },
399            };
400            if resolved[*idx].op != new_op {
401                resolved[*idx].op = new_op;
402                changed = true;
403            }
404        }
405        if !changed {
406            break;
407        }
408    }
409    Ok(resolved)
410}
411
412#[cfg(test)]
413mod tests {
414    use super::*;
415
416    #[test]
417    fn test_arm_backend_name() {
418        let backend = ArmBackend::new();
419        assert_eq!(backend.name(), "arm");
420        assert!(backend.is_available());
421    }
422
423    #[test]
424    fn test_arm_backend_capabilities() {
425        let backend = ArmBackend::new();
426        let caps = backend.capabilities();
427        assert!(!caps.produces_elf);
428        assert!(caps.supports_rule_verification);
429        assert!(!caps.is_external);
430    }
431
432    #[test]
433    fn test_compile_add_function() {
434        let backend = ArmBackend::new();
435        let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add];
436        let config = CompileConfig::default();
437
438        let result = backend.compile_function("add", &ops, &config);
439        assert!(result.is_ok());
440
441        let func = result.unwrap();
442        assert_eq!(func.name, "add");
443        assert!(!func.code.is_empty());
444        assert_eq!(func.wasm_ops, ops);
445    }
446
447    #[test]
448    fn test_count_params() {
449        let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add];
450        assert_eq!(count_params(&ops), 2);
451
452        let no_params = vec![WasmOp::I32Const(5), WasmOp::I32Const(3), WasmOp::I32Add];
453        assert_eq!(count_params(&no_params), 0);
454    }
455
456    #[test]
457    fn test_arm_backend_register() {
458        let mut registry = synth_core::BackendRegistry::new();
459        registry.register(Box::new(ArmBackend::new()));
460        assert!(registry.get("arm").is_some());
461        assert_eq!(registry.available().len(), 1);
462    }
463
464    #[test]
465    fn test_compile_import_call_produces_relocations() {
466        let backend = ArmBackend::new();
467        // Simulate a WASM module where func index 0 is an import.
468        // Call(0) should generate MOV R0, #0; BL __meld_dispatch_import
469        let ops = vec![WasmOp::Call(0)];
470        let config = CompileConfig {
471            num_imports: 1,
472            no_optimize: true, // Direct instruction selection to preserve Call semantics
473            ..CompileConfig::default()
474        };
475
476        let result = backend.compile_function("caller", &ops, &config);
477        assert!(result.is_ok());
478
479        let func = result.unwrap();
480        assert!(!func.code.is_empty());
481        assert_eq!(func.relocations.len(), 1);
482        assert_eq!(func.relocations[0].symbol, "__meld_dispatch_import");
483        // The BL is the second instruction (after MOV R0, #0), so offset should be > 0
484        assert!(func.relocations[0].offset > 0);
485    }
486
487    /// Regression test for #197: in `relocatable` mode, an import call must
488    /// relocate against the direct `func_N` symbol (rewritten to the wasm field
489    /// name by `build_relocatable_elf`), NOT `__meld_dispatch_import`. This is
490    /// the ABI half of the #197 fix — without it, a host linker cannot resolve
491    /// the call to the real kernel symbol (e.g. `k_spin_lock`).
492    #[test]
493    fn test_compile_relocatable_import_uses_direct_func_symbol_197() {
494        let backend = ArmBackend::new();
495        let ops = vec![WasmOp::Call(0)]; // func 0 is an import
496        let config = CompileConfig {
497            num_imports: 1,
498            relocatable: true,
499            ..CompileConfig::default()
500        };
501
502        let func = backend
503            .compile_function("caller", &ops, &config)
504            .expect("relocatable import call compiles");
505
506        assert_eq!(func.relocations.len(), 1);
507        assert_eq!(
508            func.relocations[0].symbol, "func_0",
509            "#197: relocatable import must relocate against func_0 (→ field name), not Meld dispatch"
510        );
511    }
512
513    #[test]
514    fn test_compile_no_imports_no_relocations() {
515        let backend = ArmBackend::new();
516        let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add];
517        let config = CompileConfig::default();
518
519        let func = backend.compile_function("add", &ops, &config).unwrap();
520        assert!(func.relocations.is_empty());
521    }
522
523    /// Regression test for #167: a call to an INTERNAL function
524    /// (index `>= num_imports`) must record a relocation against `func_{index}`.
525    /// Before the fix, only `__meld_*` (import) BLs were relocated, so
526    /// internal `BL func_N` was emitted as an unpatched `bl #0` branching
527    /// to a garbage address — making the object non-linkable. This test
528    /// would have caught that regression.
529    #[test]
530    fn test_compile_internal_call_produces_relocation_167() {
531        let backend = ArmBackend::new();
532        // num_imports = 1, so Call(2) is an INTERNAL call → `BL func_2`.
533        let ops = vec![WasmOp::Call(2)];
534        let config = CompileConfig {
535            num_imports: 1,
536            no_optimize: true,
537            ..CompileConfig::default()
538        };
539
540        let func = backend
541            .compile_function("caller", &ops, &config)
542            .expect("internal call compiles");
543
544        assert_eq!(
545            func.relocations.len(),
546            1,
547            "an internal call must emit exactly one relocation (#167)"
548        );
549        assert_eq!(
550            func.relocations[0].symbol, "func_2",
551            "internal call must relocate against the callee's func_{{index}} symbol (#167)"
552        );
553    }
554
555    // ─── Phase 1 safety-bounds plumbing for ARM ──────────────────────────
556
557    #[test]
558    fn arm_safety_bounds_mpu_emits_same_code_as_none() {
559        // Mpu mode must not introduce any inline check on ARM — the MPU
560        // handles faults via hardware. The encoded bytes for an i32.load
561        // should be identical between None and Mpu.
562        let backend = ArmBackend::new();
563        let ops = vec![
564            WasmOp::LocalGet(0),
565            WasmOp::I32Load {
566                offset: 0,
567                align: 2,
568            },
569        ];
570        let cfg_none = CompileConfig {
571            no_optimize: true,
572            ..Default::default()
573        };
574        let cfg_mpu = CompileConfig {
575            no_optimize: true,
576            safety_bounds: SafetyBounds::Mpu,
577            ..Default::default()
578        };
579        let n = backend.compile_function("ld", &ops, &cfg_none).unwrap();
580        let m = backend.compile_function("ld", &ops, &cfg_mpu).unwrap();
581        assert_eq!(
582            n.code, m.code,
583            "Mpu and None should produce identical ARM bytes (Mpu relies on hardware)"
584        );
585    }
586
587    #[test]
588    fn arm_legacy_bounds_check_still_emits_software_check() {
589        // Legacy CLI users with `--bounds-check` should keep getting the
590        // software path even though the new SafetyBounds field defaults to None.
591        let backend = ArmBackend::new();
592        let ops = vec![
593            WasmOp::LocalGet(0),
594            WasmOp::I32Load {
595                offset: 0,
596                align: 2,
597            },
598        ];
599        let cfg_legacy = CompileConfig {
600            no_optimize: true,
601            bounds_check: true,
602            ..Default::default()
603        };
604        let cfg_software = CompileConfig {
605            no_optimize: true,
606            safety_bounds: SafetyBounds::Software,
607            ..Default::default()
608        };
609        let l = backend.compile_function("ld", &ops, &cfg_legacy).unwrap();
610        let s = backend.compile_function("ld", &ops, &cfg_software).unwrap();
611        assert_eq!(
612            l.code, s.code,
613            "--bounds-check should produce the same bytes as --safety-bounds=software"
614        );
615    }
616
617    // ========================================================================
618    // ISA feature gate tests — ensure the compiler never emits unsupported
619    // instructions for a given target
620    // ========================================================================
621
622    #[test]
623    fn test_f32_rejected_on_cortex_m3_no_fpu() {
624        let backend = ArmBackend::new();
625        let ops = vec![WasmOp::F32Const(1.0), WasmOp::F32Const(2.0), WasmOp::F32Add];
626        let config = CompileConfig {
627            target: TargetSpec::cortex_m3(),
628            no_optimize: true,
629            ..CompileConfig::default()
630        };
631
632        let result = backend.compile_function("fadd", &ops, &config);
633        assert!(
634            result.is_err(),
635            "f32 operations should fail on Cortex-M3 (no FPU)"
636        );
637    }
638
639    #[test]
640    fn test_f32_accepted_on_cortex_m4f() {
641        let backend = ArmBackend::new();
642        let ops = vec![WasmOp::F32Const(1.0), WasmOp::F32Const(2.0), WasmOp::F32Add];
643        let config = CompileConfig {
644            target: TargetSpec::cortex_m4f(),
645            no_optimize: true,
646            ..CompileConfig::default()
647        };
648
649        let result = backend.compile_function("fadd", &ops, &config);
650        assert!(
651            result.is_ok(),
652            "f32 operations should succeed on Cortex-M4F, got: {:?}",
653            result.unwrap_err()
654        );
655    }
656
657    #[test]
658    fn test_i32_works_on_all_targets() {
659        let backend = ArmBackend::new();
660        let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add];
661
662        // Cortex-M3 (no FPU)
663        let config_m3 = CompileConfig {
664            target: TargetSpec::cortex_m3(),
665            no_optimize: true,
666            ..CompileConfig::default()
667        };
668        assert!(
669            backend.compile_function("add", &ops, &config_m3).is_ok(),
670            "i32 ops should work on Cortex-M3"
671        );
672
673        // Cortex-M4F (single FPU)
674        let config_m4f = CompileConfig {
675            target: TargetSpec::cortex_m4f(),
676            no_optimize: true,
677            ..CompileConfig::default()
678        };
679        assert!(
680            backend.compile_function("add", &ops, &config_m4f).is_ok(),
681            "i32 ops should work on Cortex-M4F"
682        );
683
684        // Cortex-M7DP (double FPU)
685        let config_m7dp = CompileConfig {
686            target: TargetSpec::cortex_m7dp(),
687            no_optimize: true,
688            ..CompileConfig::default()
689        };
690        assert!(
691            backend.compile_function("add", &ops, &config_m7dp).is_ok(),
692            "i32 ops should work on Cortex-M7DP"
693        );
694    }
695
696    #[test]
697    fn test_f32_rejected_on_cortex_m4_no_fpu() {
698        // Cortex-M4 (without F suffix) has no FPU
699        let backend = ArmBackend::new();
700        let ops = vec![WasmOp::F32Const(1.5), WasmOp::F32Const(2.5), WasmOp::F32Mul];
701        let config = CompileConfig {
702            target: TargetSpec::cortex_m4(),
703            no_optimize: true,
704            ..CompileConfig::default()
705        };
706
707        let result = backend.compile_function("fmul", &ops, &config);
708        assert!(
709            result.is_err(),
710            "f32 operations should fail on Cortex-M4 (no FPU)"
711        );
712    }
713
714    // ========================================================================
715    // Issue #120 — f32 ops in the optimized lowering path
716    //
717    // `OptimizerBridge::wasm_to_ir` has no handlers for f32/f64 ops, so a
718    // value-producing float op fell through to `Opcode::Nop`, leaving a
719    // downstream consumer with an unmapped vreg and tripping the PR #101
720    // defensive panic in `ir_to_arm`. Customer reproducer: `compiler_builtins
721    // float::div` and `gale_compute_ipi_mask` in the `falcon-rate-component`
722    // module.
723    //
724    // Fix: `optimize_full` declines float modules with a typed `Err`;
725    // `compile_wasm_to_arm` falls back to the non-optimized `select_with_stack`
726    // path, which handles f32 via VFP/FPU. These tests use the *default*
727    // (optimized) config — `no_optimize` is NOT set — which is the exact
728    // configuration that panicked pre-fix.
729    // ========================================================================
730
731    /// Pre-fix: this panicked with "vreg vN has no assigned ARM register and
732    /// no spill slot" inside `ir_to_arm`. Post-fix: the optimized path declines
733    /// the module and the backend falls back to direct selection, producing a
734    /// non-empty f32.div lowering on a Cortex-M4F.
735    #[test]
736    fn test_issue120_f32_div_compiles_via_optimized_default() {
737        let backend = ArmBackend::new();
738        let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Div];
739        let config = CompileConfig {
740            target: TargetSpec::cortex_m4f(),
741            // no_optimize NOT set — this exercises the optimized path that
742            // panicked in issue #120, then the fallback to direct selection.
743            ..CompileConfig::default()
744        };
745
746        let result = backend.compile_function("fdiv", &ops, &config);
747        assert!(
748            result.is_ok(),
749            "f32.div must compile on Cortex-M4F via the optimized->direct \
750             fallback (issue #120), got: {:?}",
751            result.as_ref().err()
752        );
753        assert!(
754            !result.unwrap().code.is_empty(),
755            "f32.div must produce non-empty machine code"
756        );
757    }
758
759    /// A spread of f32 ops, all through the optimized (default) config, must
760    /// compile via the fallback on an FPU target without panicking.
761    #[test]
762    fn test_issue120_assorted_f32_ops_compile_via_optimized_default() {
763        let backend = ArmBackend::new();
764        let config = CompileConfig {
765            target: TargetSpec::cortex_m4f(),
766            ..CompileConfig::default()
767        };
768
769        let cases: Vec<(&str, Vec<WasmOp>)> = vec![
770            (
771                "fadd",
772                vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Add],
773            ),
774            (
775                "fmul",
776                vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Mul],
777            ),
778            (
779                "fsub",
780                vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Sub],
781            ),
782        ];
783
784        for (name, ops) in cases {
785            let result = backend.compile_function(name, &ops, &config);
786            assert!(
787                result.is_ok(),
788                "{name} must compile via the optimized->direct fallback \
789                 (issue #120), got: {:?}",
790                result.as_ref().err()
791            );
792            assert!(
793                !result.unwrap().code.is_empty(),
794                "{name} must produce non-empty machine code"
795            );
796        }
797    }
798
799    /// The fallback must still honor the ISA feature gate: f32 on a no-FPU
800    /// target must fail cleanly (not panic) even on the optimized path.
801    #[test]
802    fn test_issue120_f32_div_rejected_on_no_fpu_via_optimized() {
803        let backend = ArmBackend::new();
804        let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Div];
805        let config = CompileConfig {
806            target: TargetSpec::cortex_m3(),
807            ..CompileConfig::default()
808        };
809
810        let result = backend.compile_function("fdiv", &ops, &config);
811        assert!(
812            result.is_err(),
813            "f32.div must be rejected on Cortex-M3 (no FPU), not panic"
814        );
815    }
816
817    /// Issue #94: end-to-end byte-size check for the canonical u64-packed
818    /// FFI-return hi32 extract pattern. Compiles two near-identical
819    /// functions — one with the optimized shift-by-32, one with a generic
820    /// shift-by-7 — and asserts the optimized form is meaningfully smaller.
821    #[test]
822    fn test_issue94_hi32_extract_is_smaller_than_generic_shift() {
823        let backend = ArmBackend::new();
824        let config = CompileConfig {
825            target: TargetSpec::cortex_m4f(),
826            ..CompileConfig::default()
827        };
828
829        // Optimized path: `(local.get 0) >>> 32; wrap_i64`
830        let ops_hi32 = vec![
831            WasmOp::LocalGet(0), // i64 param in R0:R1
832            WasmOp::I64Const(32),
833            WasmOp::I64ShrU,
834            WasmOp::I32WrapI64,
835        ];
836        let func_hi32 = backend
837            .compile_function("hi32_extract", &ops_hi32, &config)
838            .unwrap();
839
840        // Generic path: `(local.get 0) >>> 7; wrap_i64` — same shape, but the
841        // shift amount is not a multiple of 32, so it falls through to the
842        // 38-byte runtime shift.
843        let ops_generic = vec![
844            WasmOp::LocalGet(0),
845            WasmOp::I64Const(7),
846            WasmOp::I64ShrU,
847            WasmOp::I32WrapI64,
848        ];
849        let func_generic = backend
850            .compile_function("generic_shr", &ops_generic, &config)
851            .unwrap();
852
853        let bytes_hi32 = func_hi32.code.len();
854        let bytes_generic = func_generic.code.len();
855        println!(
856            "\n[issue #94] hi32 extract: {} bytes (vs generic shift: {} bytes; saved {})",
857            bytes_hi32,
858            bytes_generic,
859            bytes_generic.saturating_sub(bytes_hi32)
860        );
861        let hex: String = func_hi32
862            .code
863            .iter()
864            .map(|b| format!("{:02x}", b))
865            .collect::<Vec<_>>()
866            .join(" ");
867        println!("[issue #94] hi32 bytes: {}", hex);
868        // We expect the optimized form to be at least 30 bytes smaller than
869        // the generic 64-bit shift sequence. (Empirically: 14 vs 50 bytes.)
870        assert!(
871            bytes_hi32 + 30 <= bytes_generic,
872            "issue #94: hi32 extract = {} bytes, generic shift = {} bytes; \
873             expected optimized form to be at least 30 bytes smaller",
874            bytes_hi32,
875            bytes_generic,
876        );
877    }
878}