Skip to main content

synth_backend/
arm_backend.rs

1//! ARM Backend — wraps the instruction selector + optimizer + encoder as a Backend
2//!
3//! This is Synth's custom ARM compiler targeting Cortex-M (Thumb-2).
4//! It's the only backend that supports per-rule formal verification (ASIL D path).
5
6use crate::ArmEncoder;
7use synth_core::backend::{
8    Backend, BackendCapabilities, BackendError, CodeRelocation, CompilationResult, CompileConfig,
9    CompiledFunction, SafetyBounds,
10};
11use synth_core::target::{IsaVariant, TargetSpec};
12use synth_core::wasm_decoder::DecodedModule;
13use synth_core::wasm_op::WasmOp;
14use synth_synthesis::{
15    ArmInstruction, ArmOp, BoundsCheckConfig, InstructionSelector, OptimizationConfig,
16    OptimizerBridge, RuleDatabase, validate_instructions,
17};
18
19/// ARM Cortex-M backend using Synth's custom compiler pipeline
20pub struct ArmBackend;
21
22impl ArmBackend {
23    pub fn new() -> Self {
24        Self
25    }
26}
27
28impl Default for ArmBackend {
29    fn default() -> Self {
30        Self::new()
31    }
32}
33
34impl Backend for ArmBackend {
35    fn name(&self) -> &str {
36        "arm"
37    }
38
39    fn capabilities(&self) -> BackendCapabilities {
40        BackendCapabilities {
41            produces_elf: false,
42            supports_rule_verification: true,
43            supports_binary_verification: true,
44            is_external: false,
45        }
46    }
47
48    fn supported_targets(&self) -> Vec<TargetSpec> {
49        vec![
50            TargetSpec::cortex_m3(),
51            TargetSpec::cortex_m4(),
52            TargetSpec::cortex_m4f(),
53            TargetSpec::cortex_m7(),
54            TargetSpec::cortex_m7dp(),
55        ]
56    }
57
58    fn compile_module(
59        &self,
60        module: &DecodedModule,
61        config: &CompileConfig,
62    ) -> Result<CompilationResult, BackendError> {
63        let exports: Vec<_> = module
64            .functions
65            .iter()
66            .filter(|f| f.export_name.is_some())
67            .collect();
68
69        if exports.is_empty() {
70            return Err(BackendError::CompilationFailed(
71                "no exported functions found".into(),
72            ));
73        }
74
75        let mut functions = Vec::new();
76        for func in &exports {
77            let name = func.export_name.clone().unwrap();
78            let compiled = self.compile_function(&name, &func.ops, config)?;
79            functions.push(compiled);
80        }
81
82        Ok(CompilationResult {
83            functions,
84            elf: None,
85            backend_name: self.name().to_string(),
86        })
87    }
88
89    fn compile_function(
90        &self,
91        name: &str,
92        ops: &[WasmOp],
93        config: &CompileConfig,
94    ) -> Result<CompiledFunction, BackendError> {
95        let (code, relocations) =
96            compile_wasm_to_arm(ops, config).map_err(BackendError::CompilationFailed)?;
97
98        Ok(CompiledFunction {
99            name: name.to_string(),
100            code,
101            wasm_ops: ops.to_vec(),
102            relocations,
103        })
104    }
105
106    fn is_available(&self) -> bool {
107        true // Always available — it's a library backend
108    }
109}
110
111/// Count the number of function parameters by analyzing LocalGet patterns
112fn count_params(wasm_ops: &[WasmOp]) -> u32 {
113    let mut first_access: std::collections::HashMap<u32, bool> = std::collections::HashMap::new();
114    for op in wasm_ops {
115        match op {
116            WasmOp::LocalGet(idx) => {
117                first_access.entry(*idx).or_insert(true);
118            }
119            WasmOp::LocalSet(idx) | WasmOp::LocalTee(idx) => {
120                first_access.entry(*idx).or_insert(false);
121            }
122            _ => {}
123        }
124    }
125
126    first_access
127        .iter()
128        .filter_map(
129            |(&idx, &is_read_first)| {
130                if is_read_first { Some(idx + 1) } else { None }
131            },
132        )
133        .max()
134        .unwrap_or(0)
135}
136
137/// Core compilation: WASM ops → ARM machine code bytes + relocations
138///
139/// Returns (code_bytes, relocations) where relocations record BL instructions
140/// that target external symbols (e.g., `__meld_dispatch_import` for import calls).
141fn compile_wasm_to_arm(
142    wasm_ops: &[WasmOp],
143    config: &CompileConfig,
144) -> Result<(Vec<u8>, Vec<CodeRelocation>), String> {
145    let num_params = count_params(wasm_ops);
146
147    let bounds_config = match config.effective_safety_bounds() {
148        SafetyBounds::None => BoundsCheckConfig::None,
149        SafetyBounds::Mpu => BoundsCheckConfig::Mpu,
150        SafetyBounds::Software => BoundsCheckConfig::Software,
151        SafetyBounds::Mask => BoundsCheckConfig::Masking,
152    };
153
154    // The non-optimized (direct) instruction-selection path. Handles f32 via
155    // VFP/FPU. Used directly when `--no-optimize` is set, and as the fallback
156    // when the optimized path declines a module (see issue #120 below).
157    let select_direct = || -> Result<Vec<ArmInstruction>, String> {
158        let db = RuleDatabase::with_standard_rules();
159        let mut selector =
160            InstructionSelector::with_bounds_check(db.rules().to_vec(), bounds_config);
161        selector.set_target(config.target.fpu, &config.target.triple);
162        if config.num_imports > 0 {
163            selector.set_num_imports(config.num_imports);
164        }
165        // #195: plumb the callee argument-count tables so the direct selector can
166        // marshal call arguments into R0–R3 per AAPCS.
167        selector.set_func_arg_counts(
168            config.func_arg_counts.clone(),
169            config.type_arg_counts.clone(),
170        );
171        selector
172            .select_with_stack(wasm_ops, num_params)
173            .map_err(|e| format!("instruction selection failed: {}", e))
174    };
175
176    // Instruction selection: optimized or direct
177    let arm_instrs = if config.no_optimize {
178        select_direct()?
179    } else {
180        let opt_config = if config.loom_compat {
181            OptimizationConfig::loom_compat()
182        } else {
183            OptimizationConfig::all()
184        };
185
186        let mut bridge = OptimizerBridge::with_config(opt_config);
187        // #188: tell the bridge how many imports there are so it declines only
188        // LOCAL calls (and leaves import calls on the optimized path, keeping
189        // the #173 field-name relocation rewrite intact).
190        bridge.set_num_imports(config.num_imports);
191        // `ir_to_arm` now returns `Result` — an `Err` means the optimized path
192        // hit an unmapped vreg (issue-#93-class). Treat it identically to an
193        // `optimize_full` failure: fall back to the direct selector rather
194        // than propagating, so the function still compiles correctly.
195        match bridge
196            .optimize_full(wasm_ops)
197            .and_then(|(opt_ir, _cfg, _stats)| bridge.ir_to_arm(&opt_ir, num_params as usize))
198        {
199            Ok(arm_ops) => arm_ops
200                .into_iter()
201                .map(|op| ArmInstruction {
202                    op,
203                    source_line: None,
204                })
205                .collect(),
206            // Issue #120: the optimized path declines modules it cannot lower
207            // (notably scalar f32/f64 ops — the IR has no float opcodes). Fall
208            // back to the direct instruction selector, which handles f32 via
209            // VFP/FPU. This is honest degradation: the function still compiles
210            // correctly, just without IR-level optimization.
211            Err(_) => select_direct()?,
212        }
213    };
214
215    // ISA feature gate: validate that all generated instructions are supported
216    // by the target. This catches FPU instructions on no-FPU targets, double-precision
217    // instructions on single-precision targets, etc.
218    validate_instructions(&arm_instrs, config.target.fpu, &config.target.triple)
219        .map_err(|e| format!("ISA validation failed: {}", e))?;
220
221    // Encode to binary — use Thumb-2 for Cortex-M targets
222    let use_thumb2 = matches!(config.target.isa, IsaVariant::Thumb2 | IsaVariant::Thumb);
223
224    let encoder = if use_thumb2 {
225        ArmEncoder::new_thumb2_with_fpu(config.target.fpu)
226    } else {
227        ArmEncoder::new_arm32()
228    };
229
230    let mut code = Vec::new();
231    let mut relocations = Vec::new();
232
233    for instr in &arm_instrs {
234        // Record a relocation for every BL: the encoder emits `bl #0` and
235        // relies on a relocation to patch the target. This covers BOTH import
236        // dispatch stubs (`__meld_*`, undefined externals) AND internal calls
237        // (`func_N`, defined in this object). Previously only `__meld_*` was
238        // recorded, so internal `BL func_N` calls were left as unpatched
239        // `bl #0` placeholders branching to a garbage address (#167).
240        if let ArmOp::Bl { label } = &instr.op {
241            relocations.push(CodeRelocation {
242                offset: code.len() as u32,
243                symbol: label.clone(),
244            });
245        }
246
247        let encoded = encoder
248            .encode(&instr.op)
249            .map_err(|e| format!("ARM encoding failed: {}", e))?;
250        code.extend_from_slice(&encoded);
251    }
252
253    Ok((code, relocations))
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259
260    #[test]
261    fn test_arm_backend_name() {
262        let backend = ArmBackend::new();
263        assert_eq!(backend.name(), "arm");
264        assert!(backend.is_available());
265    }
266
267    #[test]
268    fn test_arm_backend_capabilities() {
269        let backend = ArmBackend::new();
270        let caps = backend.capabilities();
271        assert!(!caps.produces_elf);
272        assert!(caps.supports_rule_verification);
273        assert!(!caps.is_external);
274    }
275
276    #[test]
277    fn test_compile_add_function() {
278        let backend = ArmBackend::new();
279        let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add];
280        let config = CompileConfig::default();
281
282        let result = backend.compile_function("add", &ops, &config);
283        assert!(result.is_ok());
284
285        let func = result.unwrap();
286        assert_eq!(func.name, "add");
287        assert!(!func.code.is_empty());
288        assert_eq!(func.wasm_ops, ops);
289    }
290
291    #[test]
292    fn test_count_params() {
293        let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add];
294        assert_eq!(count_params(&ops), 2);
295
296        let no_params = vec![WasmOp::I32Const(5), WasmOp::I32Const(3), WasmOp::I32Add];
297        assert_eq!(count_params(&no_params), 0);
298    }
299
300    #[test]
301    fn test_arm_backend_register() {
302        let mut registry = synth_core::BackendRegistry::new();
303        registry.register(Box::new(ArmBackend::new()));
304        assert!(registry.get("arm").is_some());
305        assert_eq!(registry.available().len(), 1);
306    }
307
308    #[test]
309    fn test_compile_import_call_produces_relocations() {
310        let backend = ArmBackend::new();
311        // Simulate a WASM module where func index 0 is an import.
312        // Call(0) should generate MOV R0, #0; BL __meld_dispatch_import
313        let ops = vec![WasmOp::Call(0)];
314        let config = CompileConfig {
315            num_imports: 1,
316            no_optimize: true, // Direct instruction selection to preserve Call semantics
317            ..CompileConfig::default()
318        };
319
320        let result = backend.compile_function("caller", &ops, &config);
321        assert!(result.is_ok());
322
323        let func = result.unwrap();
324        assert!(!func.code.is_empty());
325        assert_eq!(func.relocations.len(), 1);
326        assert_eq!(func.relocations[0].symbol, "__meld_dispatch_import");
327        // The BL is the second instruction (after MOV R0, #0), so offset should be > 0
328        assert!(func.relocations[0].offset > 0);
329    }
330
331    #[test]
332    fn test_compile_no_imports_no_relocations() {
333        let backend = ArmBackend::new();
334        let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add];
335        let config = CompileConfig::default();
336
337        let func = backend.compile_function("add", &ops, &config).unwrap();
338        assert!(func.relocations.is_empty());
339    }
340
341    /// Regression test for #167: a call to an INTERNAL function
342    /// (index `>= num_imports`) must record a relocation against `func_{index}`.
343    /// Before the fix, only `__meld_*` (import) BLs were relocated, so
344    /// internal `BL func_N` was emitted as an unpatched `bl #0` branching
345    /// to a garbage address — making the object non-linkable. This test
346    /// would have caught that regression.
347    #[test]
348    fn test_compile_internal_call_produces_relocation_167() {
349        let backend = ArmBackend::new();
350        // num_imports = 1, so Call(2) is an INTERNAL call → `BL func_2`.
351        let ops = vec![WasmOp::Call(2)];
352        let config = CompileConfig {
353            num_imports: 1,
354            no_optimize: true,
355            ..CompileConfig::default()
356        };
357
358        let func = backend
359            .compile_function("caller", &ops, &config)
360            .expect("internal call compiles");
361
362        assert_eq!(
363            func.relocations.len(),
364            1,
365            "an internal call must emit exactly one relocation (#167)"
366        );
367        assert_eq!(
368            func.relocations[0].symbol, "func_2",
369            "internal call must relocate against the callee's func_{{index}} symbol (#167)"
370        );
371    }
372
373    // ─── Phase 1 safety-bounds plumbing for ARM ──────────────────────────
374
375    #[test]
376    fn arm_safety_bounds_mpu_emits_same_code_as_none() {
377        // Mpu mode must not introduce any inline check on ARM — the MPU
378        // handles faults via hardware. The encoded bytes for an i32.load
379        // should be identical between None and Mpu.
380        let backend = ArmBackend::new();
381        let ops = vec![
382            WasmOp::LocalGet(0),
383            WasmOp::I32Load {
384                offset: 0,
385                align: 2,
386            },
387        ];
388        let cfg_none = CompileConfig {
389            no_optimize: true,
390            ..Default::default()
391        };
392        let cfg_mpu = CompileConfig {
393            no_optimize: true,
394            safety_bounds: SafetyBounds::Mpu,
395            ..Default::default()
396        };
397        let n = backend.compile_function("ld", &ops, &cfg_none).unwrap();
398        let m = backend.compile_function("ld", &ops, &cfg_mpu).unwrap();
399        assert_eq!(
400            n.code, m.code,
401            "Mpu and None should produce identical ARM bytes (Mpu relies on hardware)"
402        );
403    }
404
405    #[test]
406    fn arm_legacy_bounds_check_still_emits_software_check() {
407        // Legacy CLI users with `--bounds-check` should keep getting the
408        // software path even though the new SafetyBounds field defaults to None.
409        let backend = ArmBackend::new();
410        let ops = vec![
411            WasmOp::LocalGet(0),
412            WasmOp::I32Load {
413                offset: 0,
414                align: 2,
415            },
416        ];
417        let cfg_legacy = CompileConfig {
418            no_optimize: true,
419            bounds_check: true,
420            ..Default::default()
421        };
422        let cfg_software = CompileConfig {
423            no_optimize: true,
424            safety_bounds: SafetyBounds::Software,
425            ..Default::default()
426        };
427        let l = backend.compile_function("ld", &ops, &cfg_legacy).unwrap();
428        let s = backend.compile_function("ld", &ops, &cfg_software).unwrap();
429        assert_eq!(
430            l.code, s.code,
431            "--bounds-check should produce the same bytes as --safety-bounds=software"
432        );
433    }
434
435    // ========================================================================
436    // ISA feature gate tests — ensure the compiler never emits unsupported
437    // instructions for a given target
438    // ========================================================================
439
440    #[test]
441    fn test_f32_rejected_on_cortex_m3_no_fpu() {
442        let backend = ArmBackend::new();
443        let ops = vec![WasmOp::F32Const(1.0), WasmOp::F32Const(2.0), WasmOp::F32Add];
444        let config = CompileConfig {
445            target: TargetSpec::cortex_m3(),
446            no_optimize: true,
447            ..CompileConfig::default()
448        };
449
450        let result = backend.compile_function("fadd", &ops, &config);
451        assert!(
452            result.is_err(),
453            "f32 operations should fail on Cortex-M3 (no FPU)"
454        );
455    }
456
457    #[test]
458    fn test_f32_accepted_on_cortex_m4f() {
459        let backend = ArmBackend::new();
460        let ops = vec![WasmOp::F32Const(1.0), WasmOp::F32Const(2.0), WasmOp::F32Add];
461        let config = CompileConfig {
462            target: TargetSpec::cortex_m4f(),
463            no_optimize: true,
464            ..CompileConfig::default()
465        };
466
467        let result = backend.compile_function("fadd", &ops, &config);
468        assert!(
469            result.is_ok(),
470            "f32 operations should succeed on Cortex-M4F, got: {:?}",
471            result.unwrap_err()
472        );
473    }
474
475    #[test]
476    fn test_i32_works_on_all_targets() {
477        let backend = ArmBackend::new();
478        let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add];
479
480        // Cortex-M3 (no FPU)
481        let config_m3 = CompileConfig {
482            target: TargetSpec::cortex_m3(),
483            no_optimize: true,
484            ..CompileConfig::default()
485        };
486        assert!(
487            backend.compile_function("add", &ops, &config_m3).is_ok(),
488            "i32 ops should work on Cortex-M3"
489        );
490
491        // Cortex-M4F (single FPU)
492        let config_m4f = CompileConfig {
493            target: TargetSpec::cortex_m4f(),
494            no_optimize: true,
495            ..CompileConfig::default()
496        };
497        assert!(
498            backend.compile_function("add", &ops, &config_m4f).is_ok(),
499            "i32 ops should work on Cortex-M4F"
500        );
501
502        // Cortex-M7DP (double FPU)
503        let config_m7dp = CompileConfig {
504            target: TargetSpec::cortex_m7dp(),
505            no_optimize: true,
506            ..CompileConfig::default()
507        };
508        assert!(
509            backend.compile_function("add", &ops, &config_m7dp).is_ok(),
510            "i32 ops should work on Cortex-M7DP"
511        );
512    }
513
514    #[test]
515    fn test_f32_rejected_on_cortex_m4_no_fpu() {
516        // Cortex-M4 (without F suffix) has no FPU
517        let backend = ArmBackend::new();
518        let ops = vec![WasmOp::F32Const(1.5), WasmOp::F32Const(2.5), WasmOp::F32Mul];
519        let config = CompileConfig {
520            target: TargetSpec::cortex_m4(),
521            no_optimize: true,
522            ..CompileConfig::default()
523        };
524
525        let result = backend.compile_function("fmul", &ops, &config);
526        assert!(
527            result.is_err(),
528            "f32 operations should fail on Cortex-M4 (no FPU)"
529        );
530    }
531
532    // ========================================================================
533    // Issue #120 — f32 ops in the optimized lowering path
534    //
535    // `OptimizerBridge::wasm_to_ir` has no handlers for f32/f64 ops, so a
536    // value-producing float op fell through to `Opcode::Nop`, leaving a
537    // downstream consumer with an unmapped vreg and tripping the PR #101
538    // defensive panic in `ir_to_arm`. Customer reproducer: `compiler_builtins
539    // float::div` and `gale_compute_ipi_mask` in the `falcon-rate-component`
540    // module.
541    //
542    // Fix: `optimize_full` declines float modules with a typed `Err`;
543    // `compile_wasm_to_arm` falls back to the non-optimized `select_with_stack`
544    // path, which handles f32 via VFP/FPU. These tests use the *default*
545    // (optimized) config — `no_optimize` is NOT set — which is the exact
546    // configuration that panicked pre-fix.
547    // ========================================================================
548
549    /// Pre-fix: this panicked with "vreg vN has no assigned ARM register and
550    /// no spill slot" inside `ir_to_arm`. Post-fix: the optimized path declines
551    /// the module and the backend falls back to direct selection, producing a
552    /// non-empty f32.div lowering on a Cortex-M4F.
553    #[test]
554    fn test_issue120_f32_div_compiles_via_optimized_default() {
555        let backend = ArmBackend::new();
556        let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Div];
557        let config = CompileConfig {
558            target: TargetSpec::cortex_m4f(),
559            // no_optimize NOT set — this exercises the optimized path that
560            // panicked in issue #120, then the fallback to direct selection.
561            ..CompileConfig::default()
562        };
563
564        let result = backend.compile_function("fdiv", &ops, &config);
565        assert!(
566            result.is_ok(),
567            "f32.div must compile on Cortex-M4F via the optimized->direct \
568             fallback (issue #120), got: {:?}",
569            result.as_ref().err()
570        );
571        assert!(
572            !result.unwrap().code.is_empty(),
573            "f32.div must produce non-empty machine code"
574        );
575    }
576
577    /// A spread of f32 ops, all through the optimized (default) config, must
578    /// compile via the fallback on an FPU target without panicking.
579    #[test]
580    fn test_issue120_assorted_f32_ops_compile_via_optimized_default() {
581        let backend = ArmBackend::new();
582        let config = CompileConfig {
583            target: TargetSpec::cortex_m4f(),
584            ..CompileConfig::default()
585        };
586
587        let cases: Vec<(&str, Vec<WasmOp>)> = vec![
588            (
589                "fadd",
590                vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Add],
591            ),
592            (
593                "fmul",
594                vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Mul],
595            ),
596            (
597                "fsub",
598                vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Sub],
599            ),
600        ];
601
602        for (name, ops) in cases {
603            let result = backend.compile_function(name, &ops, &config);
604            assert!(
605                result.is_ok(),
606                "{name} must compile via the optimized->direct fallback \
607                 (issue #120), got: {:?}",
608                result.as_ref().err()
609            );
610            assert!(
611                !result.unwrap().code.is_empty(),
612                "{name} must produce non-empty machine code"
613            );
614        }
615    }
616
617    /// The fallback must still honor the ISA feature gate: f32 on a no-FPU
618    /// target must fail cleanly (not panic) even on the optimized path.
619    #[test]
620    fn test_issue120_f32_div_rejected_on_no_fpu_via_optimized() {
621        let backend = ArmBackend::new();
622        let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Div];
623        let config = CompileConfig {
624            target: TargetSpec::cortex_m3(),
625            ..CompileConfig::default()
626        };
627
628        let result = backend.compile_function("fdiv", &ops, &config);
629        assert!(
630            result.is_err(),
631            "f32.div must be rejected on Cortex-M3 (no FPU), not panic"
632        );
633    }
634
635    /// Issue #94: end-to-end byte-size check for the canonical u64-packed
636    /// FFI-return hi32 extract pattern. Compiles two near-identical
637    /// functions — one with the optimized shift-by-32, one with a generic
638    /// shift-by-7 — and asserts the optimized form is meaningfully smaller.
639    #[test]
640    fn test_issue94_hi32_extract_is_smaller_than_generic_shift() {
641        let backend = ArmBackend::new();
642        let config = CompileConfig {
643            target: TargetSpec::cortex_m4f(),
644            ..CompileConfig::default()
645        };
646
647        // Optimized path: `(local.get 0) >>> 32; wrap_i64`
648        let ops_hi32 = vec![
649            WasmOp::LocalGet(0), // i64 param in R0:R1
650            WasmOp::I64Const(32),
651            WasmOp::I64ShrU,
652            WasmOp::I32WrapI64,
653        ];
654        let func_hi32 = backend
655            .compile_function("hi32_extract", &ops_hi32, &config)
656            .unwrap();
657
658        // Generic path: `(local.get 0) >>> 7; wrap_i64` — same shape, but the
659        // shift amount is not a multiple of 32, so it falls through to the
660        // 38-byte runtime shift.
661        let ops_generic = vec![
662            WasmOp::LocalGet(0),
663            WasmOp::I64Const(7),
664            WasmOp::I64ShrU,
665            WasmOp::I32WrapI64,
666        ];
667        let func_generic = backend
668            .compile_function("generic_shr", &ops_generic, &config)
669            .unwrap();
670
671        let bytes_hi32 = func_hi32.code.len();
672        let bytes_generic = func_generic.code.len();
673        println!(
674            "\n[issue #94] hi32 extract: {} bytes (vs generic shift: {} bytes; saved {})",
675            bytes_hi32,
676            bytes_generic,
677            bytes_generic.saturating_sub(bytes_hi32)
678        );
679        let hex: String = func_hi32
680            .code
681            .iter()
682            .map(|b| format!("{:02x}", b))
683            .collect::<Vec<_>>()
684            .join(" ");
685        println!("[issue #94] hi32 bytes: {}", hex);
686        // We expect the optimized form to be at least 30 bytes smaller than
687        // the generic 64-bit shift sequence. (Empirically: 14 vs 50 bytes.)
688        assert!(
689            bytes_hi32 + 30 <= bytes_generic,
690            "issue #94: hi32 extract = {} bytes, generic shift = {} bytes; \
691             expected optimized form to be at least 30 bytes smaller",
692            bytes_hi32,
693            bytes_generic,
694        );
695    }
696}