synth_backend/arm_backend.rs
1//! ARM Backend — wraps the instruction selector + optimizer + encoder as a Backend
2//!
3//! This is Synth's custom ARM compiler targeting Cortex-M (Thumb-2).
4//! It's the only backend that supports per-rule formal verification (ASIL D path).
5
6use crate::ArmEncoder;
7use synth_core::backend::{
8 Backend, BackendCapabilities, BackendError, CodeRelocation, CompilationResult, CompileConfig,
9 CompiledFunction, LineMap, SafetyBounds,
10};
11use synth_core::target::{IsaVariant, TargetSpec};
12use synth_core::wasm_decoder::DecodedModule;
13use synth_core::wasm_op::WasmOp;
14use synth_synthesis::{
15 ArmInstruction, ArmOp, BoundsCheckConfig, InstructionSelector, OptimizationConfig,
16 OptimizerBridge, RuleDatabase, validate_instructions,
17};
18
19/// ARM Cortex-M backend using Synth's custom compiler pipeline
20pub struct ArmBackend;
21
22impl ArmBackend {
23 pub fn new() -> Self {
24 Self
25 }
26}
27
28impl Default for ArmBackend {
29 fn default() -> Self {
30 Self::new()
31 }
32}
33
34impl Backend for ArmBackend {
35 fn name(&self) -> &str {
36 "arm"
37 }
38
39 fn capabilities(&self) -> BackendCapabilities {
40 BackendCapabilities {
41 produces_elf: false,
42 supports_rule_verification: true,
43 supports_binary_verification: true,
44 is_external: false,
45 }
46 }
47
48 fn supported_targets(&self) -> Vec<TargetSpec> {
49 vec![
50 TargetSpec::cortex_m3(),
51 TargetSpec::cortex_m4(),
52 TargetSpec::cortex_m4f(),
53 TargetSpec::cortex_m7(),
54 TargetSpec::cortex_m7dp(),
55 ]
56 }
57
58 fn compile_module(
59 &self,
60 module: &DecodedModule,
61 config: &CompileConfig,
62 ) -> Result<CompilationResult, BackendError> {
63 let exports: Vec<_> = module
64 .functions
65 .iter()
66 .filter(|f| f.export_name.is_some())
67 .collect();
68
69 if exports.is_empty() {
70 return Err(BackendError::CompilationFailed(
71 "no exported functions found".into(),
72 ));
73 }
74
75 let mut functions = Vec::new();
76 for func in &exports {
77 let name = func.export_name.clone().unwrap();
78 // #359: copy THIS function's declared param widths into the config so
79 // `compile_function` (which carries no function index) can refuse a
80 // 64-bit param on the AAPCS stack-argument path. Cheap clone only when
81 // a signature table is present and this function has a width entry —
82 // otherwise reuse the shared config (every existing module unchanged).
83 let func_config = match config.func_params_i64.get(func.index as usize) {
84 Some(p) if !p.is_empty() => Some(CompileConfig {
85 current_func_params_i64: p.clone(),
86 ..config.clone()
87 }),
88 _ => None,
89 };
90 let cfg = func_config.as_ref().unwrap_or(config);
91 let compiled = self.compile_function(&name, &func.ops, cfg)?;
92 functions.push(compiled);
93 }
94
95 Ok(CompilationResult {
96 functions,
97 elf: None,
98 backend_name: self.name().to_string(),
99 })
100 }
101
102 fn compile_function(
103 &self,
104 name: &str,
105 ops: &[WasmOp],
106 config: &CompileConfig,
107 ) -> Result<CompiledFunction, BackendError> {
108 let (code, relocations, line_map) =
109 compile_wasm_to_arm(ops, config).map_err(BackendError::CompilationFailed)?;
110
111 Ok(CompiledFunction {
112 name: name.to_string(),
113 code,
114 wasm_ops: ops.to_vec(),
115 relocations,
116 line_map,
117 })
118 }
119
120 fn is_available(&self) -> bool {
121 true // Always available — it's a library backend
122 }
123}
124
125/// Count the number of function parameters by analyzing LocalGet patterns
126fn count_params(wasm_ops: &[WasmOp]) -> u32 {
127 let mut first_access: std::collections::HashMap<u32, bool> = std::collections::HashMap::new();
128 for op in wasm_ops {
129 match op {
130 WasmOp::LocalGet(idx) => {
131 first_access.entry(*idx).or_insert(true);
132 }
133 WasmOp::LocalSet(idx) | WasmOp::LocalTee(idx) => {
134 first_access.entry(*idx).or_insert(false);
135 }
136 _ => {}
137 }
138 }
139
140 first_access
141 .iter()
142 .filter_map(
143 |(&idx, &is_read_first)| {
144 if is_read_first { Some(idx + 1) } else { None }
145 },
146 )
147 .max()
148 .unwrap_or(0)
149}
150
151/// Core compilation: WASM ops → ARM machine code bytes + relocations
152///
153/// Returns (code_bytes, relocations) where relocations record BL instructions
154/// that target external symbols (e.g., `__meld_dispatch_import` for import calls).
155fn compile_wasm_to_arm(
156 wasm_ops: &[WasmOp],
157 config: &CompileConfig,
158) -> Result<(Vec<u8>, Vec<CodeRelocation>, LineMap), String> {
159 let num_params = count_params(wasm_ops);
160
161 let bounds_config = match config.effective_safety_bounds() {
162 SafetyBounds::None => BoundsCheckConfig::None,
163 SafetyBounds::Mpu => BoundsCheckConfig::Mpu,
164 SafetyBounds::Software => BoundsCheckConfig::Software,
165 SafetyBounds::Mask => BoundsCheckConfig::Masking,
166 };
167
168 // The non-optimized (direct) instruction-selection path. Handles f32 via
169 // VFP/FPU. Used directly when `--no-optimize` is set, and as the fallback
170 // when the optimized path declines a module (see issue #120 below).
171 //
172 // VCR-RA-001 step 3b-lite (#242): a FRESH selector per attempt, with
173 // `spill_on_exhaustion` set only on the retry — the first pass is the
174 // unmodified default, so every function that compiles today is selected by
175 // exactly the code that compiled it yesterday (bit-identity is structural,
176 // not behavioural).
177 let select_direct_attempt = |spill_on_exhaustion: bool,
178 param_backing_on_exhaustion: bool|
179 -> Result<Vec<ArmInstruction>, synth_core::Error> {
180 let db = RuleDatabase::with_standard_rules();
181 let mut selector =
182 InstructionSelector::with_bounds_check(db.rules().to_vec(), bounds_config);
183 selector.set_target(config.target.fpu, &config.target.triple);
184 if config.num_imports > 0 {
185 selector.set_num_imports(config.num_imports);
186 }
187 // #195: plumb the callee argument-count tables so the direct selector can
188 // marshal call arguments into R0–R3 per AAPCS.
189 selector.set_func_arg_counts(
190 config.func_arg_counts.clone(),
191 config.type_arg_counts.clone(),
192 );
193 // #197: in relocatable host-link mode, emit direct `func_N` BLs for
194 // imports (rewritten to the wasm field name by build_relocatable_elf)
195 // instead of `__meld_dispatch_import`.
196 selector.set_relocatable(config.relocatable);
197 // #237: native-pointer ABI — wasm statics become __synth_wasm_data-relative.
198 selector.set_native_pointer_abi(config.native_pointer_abi, config.linear_memory_bytes);
199 // #311: i64 call results are register PAIRS — tag them.
200 selector.set_result_types(config.func_ret_i64.clone(), config.type_ret_i64.clone());
201 // #359: declared param widths of THIS function, so the AAPCS stack-arg
202 // path can refuse 64-bit params (Ok-or-Err). Empty ⇒ assume i32.
203 selector.set_params_i64(config.current_func_params_i64.clone());
204 // Stack-pointer promotion is meaningful only under the native-pointer ABI;
205 // gating here keeps every non-native compile (all frozen fixtures) on the
206 // legacy R9 globals-table path, bit-identical.
207 if config.native_pointer_abi
208 && let Some((sp_idx, sp_init)) = config.stack_pointer_global
209 {
210 selector.set_native_pointer_stack(sp_idx, sp_init);
211 }
212 selector.set_spill_on_exhaustion(spill_on_exhaustion);
213 selector.set_param_backing_on_exhaustion(param_backing_on_exhaustion);
214 selector.select_with_stack(wasm_ops, num_params)
215 };
216 let select_direct = || -> Result<Vec<ArmInstruction>, String> {
217 // The two recoverable exhaustion classes. NOT retried: the i64
218 // spill-slot-pool Err ("spill-slot pool exhausted") — the honest
219 // remaining bound of the 3b-lite allocator.
220 const SINGLE_EXHAUSTION: &str = "all allocatable registers are live on the stack";
221 const PAIR_EXHAUSTION: &str = "no consecutive pair of free registers for i64";
222 let mut attempt = select_direct_attempt(false, false);
223 // VCR-RA-001 step 3b-lite (#242): the i32 register-exhaustion
224 // hard-fail is recoverable — retry with spill-on-exhaustion, which
225 // reserves the spill area and spills the deepest stack value when the
226 // pool is full. Only functions that FAILED the first pass ever reach
227 // this, so existing output is untouched by construction.
228 if let Err(e) = &attempt
229 && e.to_string().contains(SINGLE_EXHAUSTION)
230 {
231 attempt = select_direct_attempt(true, false);
232 }
233 // VCR-RA-001 acceptance increment (#242): the i64 consecutive-PAIR
234 // exhaustion is recoverable too — but not by stack spilling (the pair
235 // allocator already spills stack values, #171): the blockers are the
236 // pinned param home registers. The final retry frame-backs the params
237 // (#204 machinery) so they stop pinning R0-R3, with spill-on-exhaustion
238 // kept on for the single-register pressure the reloads add. Reached
239 // only by functions that failed every earlier pass.
240 if let Err(e) = &attempt
241 && e.to_string().contains(PAIR_EXHAUSTION)
242 {
243 attempt = select_direct_attempt(true, true);
244 }
245 attempt.map_err(|e| format!("instruction selection failed: {}", e))
246 };
247
248 // Instruction selection: optimized or direct.
249 //
250 // #197: `--relocatable` (host-link ET_REL) forces the direct selector. The
251 // optimized path materializes an absolute linmem base (0x20000100) and does
252 // not preserve caller-saved registers across calls — both wrong for a
253 // host-linked object, where the linmem base arrives via `fp` at runtime and
254 // callees follow AAPCS. `select_with_stack` (now i64-spill capable after
255 // #171) handles fp-relative memory + caller-saved preservation correctly.
256 let arm_instrs = if config.no_optimize || config.relocatable {
257 select_direct()?
258 } else {
259 let opt_config = if config.loom_compat {
260 OptimizationConfig::loom_compat()
261 } else {
262 OptimizationConfig::all()
263 };
264
265 let mut bridge = OptimizerBridge::with_config(opt_config);
266 // #188: tell the bridge how many imports there are so it declines only
267 // LOCAL calls (and leaves import calls on the optimized path, keeping
268 // the #173 field-name relocation rewrite intact).
269 bridge.set_num_imports(config.num_imports);
270 // `ir_to_arm` now returns `Result` — an `Err` means the optimized path
271 // hit an unmapped vreg (issue-#93-class). Treat it identically to an
272 // `optimize_full` failure: fall back to the direct selector rather
273 // than propagating, so the function still compiles correctly.
274 match bridge
275 .optimize_full(wasm_ops)
276 .and_then(|(opt_ir, _cfg, _stats)| bridge.ir_to_arm(&opt_ir, num_params as usize))
277 {
278 Ok(arm_ops) => arm_ops
279 .into_iter()
280 .map(|op| ArmInstruction {
281 op,
282 source_line: None,
283 })
284 .collect(),
285 // Issue #120: the optimized path declines modules it cannot lower
286 // (notably scalar f32/f64 ops — the IR has no float opcodes). Fall
287 // back to the direct instruction selector, which handles f32 via
288 // VFP/FPU. This is honest degradation: the function still compiles
289 // correctly, just without IR-level optimization.
290 Err(_) => select_direct()?,
291 }
292 };
293
294 // #257/#277: `mul`+`add`→`mla` fusion is intentionally NOT wired here.
295 // The transform is correct and ready (`synth_synthesis::liveness::fuse_mul_add`,
296 // fully tested), but it is **register-allocation-coupled**: over the current
297 // greedy single-pass selector, folding `mul rM,..; add rD,rM,rX` → `mla`
298 // extends the live ranges of the mul inputs to the mla point, and the added
299 // pressure (extra moves/spills) costs more than the single-cycle MLA saves —
300 // gale measured a +2 cyc on-target REGRESSION (flat_flight 255→257, G474RE)
301 // even though it removes 2 instructions and the seam stays 0x07FDF307. So the
302 // fusion stays unwired until the spill-aware allocator (VCR-RA-001) chooses
303 // registers, at which point it becomes net-positive (per #272's plan and the
304 // wiring design note). Lesson (#277): a register-pressure-affecting transform
305 // needs an on-target/allocator-aware gate, not a byte-count gate, before it
306 // can default on.
307
308 // VCR-RA-001 const-CSE / rematerialization-avoidance (#209), the first
309 // allocator-analysis-driven CODEGEN change. Drops `movw` re-materializations
310 // of a constant already resident in another register and retargets the reads
311 // — every rewrite proven by the liveness analysis, and it ONLY removes
312 // materializations (pressure never rises), so unlike the mla fusion (#277) it
313 // cannot regress on-target. Runs on the selected stream before branch
314 // resolution (it removes instructions, shifting byte offsets). Behind
315 // `SYNTH_CONST_CSE=1` while it is validated against the differential oracle +
316 // gale's five on-target baselines; off by default keeps every fixture
317 // bit-identical.
318 let arm_instrs = if std::env::var("SYNTH_CONST_CSE").is_ok() {
319 synth_synthesis::liveness::apply_const_cse(&arm_instrs).0
320 } else {
321 arm_instrs
322 };
323
324 // VCR-RA-001 RANGE RE-ALLOCATION (#209/#242, wiring step 3a) — the first
325 // CONSEQUENTIAL allocator pass: re-colour each maximal straight-line
326 // segment over the R0-R8 pool with value ranges as the allocation unit
327 // (segment inputs + per-register live-outs pinned to their original
328 // registers, reserved R9-R12/SP identity-assigned — each segment is
329 // independently sound, no cross-segment liveness assumed). Renames
330 // registers only: never adds, removes, or reorders instructions, so
331 // labels/branch offsets are unaffected.
332 //
333 // DEFAULT-ON since v0.11.36: gale cleared the gate on-target (G474RE,
334 // #209 2026-06-10) — flag-on output byte-identical to flag-off on
335 // flat_flight/controller/control_step, fires on the filter family with
336 // zero cycle delta and a small size win, all selfchecks green on silicon.
337 // Opt out with `SYNTH_RANGE_REALLOC=0`; per-function stats with
338 // `SYNTH_REALLOC_STATS=1`.
339 //
340 // The companion dead callee-saved-save elimination (gale's "next
341 // consequential lever", same issue comment) then shrinks the prologue
342 // `push {r4-r8,lr}` / epilogue `pop {r4-r8,pc}` to the callee-saved
343 // registers the re-allocated body still touches (leaf-only,
344 // SP-untouched, even-count-padded — see shrink_callee_saved_saves):
345 // ~12 cycles of pure save/restore overhead removed on small leaves.
346 let realloc_on = std::env::var("SYNTH_RANGE_REALLOC").map_or(true, |v| v != "0");
347 let arm_instrs = if realloc_on {
348 use synth_synthesis::rules::Reg;
349 const POOL: [Reg; 9] = [
350 Reg::R0,
351 Reg::R1,
352 Reg::R2,
353 Reg::R3,
354 Reg::R4,
355 Reg::R5,
356 Reg::R6,
357 Reg::R7,
358 Reg::R8,
359 ];
360 let (out, stats) = synth_synthesis::liveness::reallocate_function(&arm_instrs, &POOL);
361 if std::env::var("SYNTH_REALLOC_STATS").is_ok() {
362 eprintln!(
363 "[range-realloc] {} segments: {} reallocated, {} declined ({} validator-rejected), {} need spill (step 4)",
364 stats.segments,
365 stats.reallocated,
366 stats.declined,
367 stats.validator_rejects,
368 stats.needs_spill
369 );
370 }
371 synth_synthesis::liveness::shrink_callee_saved_saves(&out).unwrap_or(out)
372 } else {
373 arm_instrs
374 };
375
376 // VCR-RA-001 SHADOW ALLOCATION (#209/#242): run the register allocator on
377 // the selected stream and LOG what it finds — without changing a single
378 // emitted byte. This is the measure-only bridge between the built analysis
379 // layer and the eventual virtual-register wiring: it shows, per real
380 // function, whether the allocator can colour it within the R0–R8 pool and
381 // how much const-CSE / rematerialization headroom exists (#209). Enable with
382 // `SYNTH_SHADOW_ALLOC=1`; off by default and side-effect-free either way.
383 if std::env::var("SYNTH_SHADOW_ALLOC").is_ok() {
384 use synth_synthesis::liveness::{
385 AllocationOutcome, allocate_function, function_peak_pressure,
386 };
387 // R9 globals / R10 mem-size / R11 mem-base / R12 IP-scratch are reserved;
388 // pin them above the 0..9 allocatable pool so the colourer keeps R0–R8.
389 let precolored = std::collections::BTreeMap::from([
390 (synth_synthesis::rules::Reg::R9, 9usize),
391 (synth_synthesis::rules::Reg::R10, 10),
392 (synth_synthesis::rules::Reg::R11, 11),
393 (synth_synthesis::rules::Reg::R12, 12),
394 ]);
395 // True VALUE pressure (one node per value, not per reused physical reg):
396 // a NeedsSpill with peak ≤ 9 is a SPURIOUS physical-register spill — the
397 // function fits once virtually allocated.
398 let peak = function_peak_pressure(&arm_instrs);
399 match allocate_function(&arm_instrs, 9, &precolored) {
400 AllocationOutcome::Allocated {
401 remat_opportunities,
402 coloring,
403 } => eprintln!(
404 "[shadow-alloc] OK: {} pregs coloured within R0-R8 pool, peak value-pressure {}, {} const-CSE/remat opportunities",
405 coloring.len(),
406 peak,
407 remat_opportunities
408 ),
409 AllocationOutcome::NeedsSpill(s) => eprintln!(
410 "[shadow-alloc] physical-graph would spill {:?}, but peak value-pressure is {} (≤9 ⇒ spurious; fits once virtually allocated)",
411 s, peak
412 ),
413 AllocationOutcome::Declined => {
414 eprintln!(
415 "[shadow-alloc] declined (unmodeled construct — calls/i64/fp/offset-branch)"
416 )
417 }
418 }
419 }
420
421 // ISA feature gate: validate that all generated instructions are supported
422 // by the target. This catches FPU instructions on no-FPU targets, double-precision
423 // instructions on single-precision targets, etc.
424 validate_instructions(&arm_instrs, config.target.fpu, &config.target.triple)
425 .map_err(|e| format!("ISA validation failed: {}", e))?;
426
427 // Encode to binary — use Thumb-2 for Cortex-M targets
428 let use_thumb2 = matches!(config.target.isa, IsaVariant::Thumb2 | IsaVariant::Thumb);
429
430 let encoder = if use_thumb2 {
431 ArmEncoder::new_thumb2_with_fpu(config.target.fpu)
432 } else {
433 ArmEncoder::new_arm32()
434 };
435
436 // #202: resolve local label branches (Bcc/B/Bhs/Blo) to byte-accurate
437 // offsets before encoding. `select_with_stack` emits them as label
438 // placeholders and never resolves them — without this they encode as
439 // `bne.n #0` and land mid-instruction whenever a 32-bit Thumb-2 instruction
440 // sits between the branch and its target (UsageFault on real hardware).
441 // Only meaningful for Thumb-2 (the offset units are halfword/PC+4).
442 let arm_instrs = if use_thumb2 {
443 resolve_label_branches(arm_instrs, &encoder)?
444 } else {
445 arm_instrs
446 };
447
448 let mut code = Vec::new();
449 let mut relocations = Vec::new();
450
451 // #345: literal-pool address loads. Each `LdrSym` was encoded as a placeholder
452 // `LDR.W rd,[pc,#0]`; record where its instruction sits and what it loads so
453 // we can append a pooled word (carrying the symbol address via R_ARM_ABS32)
454 // and patch the PC-relative offset once the pool position is known.
455 struct PendingLiteral {
456 ldr_offset: u32,
457 symbol: String,
458 addend: i32,
459 }
460 let mut pending_literals: Vec<PendingLiteral> = Vec::new();
461
462 // VCR-DBG-001: per-instruction source map for DWARF `.debug_line`. Captured
463 // here because `code.len()` immediately before `encode()` is the final
464 // machine offset of the instruction within this function's `.text` — nothing
465 // after the loop shifts earlier instructions (the literal pool is appended at
466 // the end; the LDR patch below is in-place/length-preserving). Purely
467 // additive: it does not touch `code`, so `.text` is byte-identical.
468 let mut line_map: LineMap = Vec::new();
469
470 for instr in &arm_instrs {
471 // Record a relocation for every BL: the encoder emits `bl #0` and
472 // relies on a relocation to patch the target. This covers BOTH import
473 // dispatch stubs (`__meld_*`, undefined externals) AND internal calls
474 // (`func_N`, defined in this object). Previously only `__meld_*` was
475 // recorded, so internal `BL func_N` calls were left as unpatched
476 // `bl #0` placeholders branching to a garbage address (#167).
477 if let ArmOp::Bl { label } = &instr.op {
478 relocations.push(CodeRelocation {
479 offset: code.len() as u32,
480 symbol: label.clone(),
481 kind: synth_core::backend::RelocKind::ThmCall,
482 });
483 }
484 // #237: symbol-relative MOVW/MOVT (the `--native-pointer-abi` static-data
485 // addressing). The encoder writes the addend in place; record the matching
486 // R_ARM_MOVW_ABS_NC / R_ARM_MOVT_ABS so the linker adds the symbol address.
487 if let ArmOp::MovwSym { symbol, .. } = &instr.op {
488 relocations.push(CodeRelocation {
489 offset: code.len() as u32,
490 symbol: symbol.clone(),
491 kind: synth_core::backend::RelocKind::MovwAbs,
492 });
493 }
494 if let ArmOp::MovtSym { symbol, .. } = &instr.op {
495 relocations.push(CodeRelocation {
496 offset: code.len() as u32,
497 symbol: symbol.clone(),
498 kind: synth_core::backend::RelocKind::MovtAbs,
499 });
500 }
501 // #345: defer the literal-pool word + reloc + offset patch to the
502 // post-loop pass (the pool address is not yet known).
503 if let ArmOp::LdrSym { symbol, addend, .. } = &instr.op {
504 pending_literals.push(PendingLiteral {
505 ldr_offset: code.len() as u32,
506 symbol: symbol.clone(),
507 addend: *addend,
508 });
509 }
510
511 // The machine offset of this instruction is the current code length,
512 // captured before the bytes are appended.
513 line_map.push((code.len() as u32, instr.source_line));
514
515 let encoded = encoder
516 .encode(&instr.op)
517 .map_err(|e| format!("ARM encoding failed: {}", e))?;
518 code.extend_from_slice(&encoded);
519 }
520
521 // #345: place the literal pool at the end of this function's `.text`. Gated on
522 // there being at least one `LdrSym` — functions without one are byte-identical
523 // to before (no trailing padding, so downstream `func_offsets` are unchanged
524 // and the frozen differential fixtures stay bit-for-bit equal).
525 if !pending_literals.is_empty() {
526 if !use_thumb2 {
527 return Err("LdrSym literal-pool addressing requires Thumb-2".to_string());
528 }
529 // 4-byte align the pool start (Thumb-2 word loads require it, and
530 // `Align(PC,4)` in the LDR-literal semantics assumes a word-aligned pool).
531 while code.len() % 4 != 0 {
532 code.push(0x00);
533 }
534 // One distinct pooled word per LdrSym (no dedup: different sites carry
535 // different addends, and the REL addend lives in the word).
536 for lit in &pending_literals {
537 let word_offset = code.len() as u32;
538
539 // REL semantics: the linker computes `S + A`, where A is the in-place
540 // value of the relocated word. Initialize the word to the addend so
541 // the final loaded address is `symbol + addend`.
542 code.extend_from_slice(&(lit.addend as u32).to_le_bytes());
543 relocations.push(CodeRelocation {
544 offset: word_offset,
545 symbol: lit.symbol.clone(),
546 kind: synth_core::backend::RelocKind::Abs32,
547 });
548
549 // Patch the placeholder `LDR.W rd,[pc,#imm12]`. Thumb-2 LDR (literal):
550 // address = Align(PC,4) + imm12, with PC = ldr_offset + 4. The pool is
551 // always after the LDR, so U=1 (already set in hw1 = 0xF8DF).
552 let pc = lit.ldr_offset + 4;
553 let aligned_pc = pc & !3u32;
554 let imm12 = word_offset - aligned_pc;
555 if imm12 > 0xFFF {
556 // Wide LDR-literal range is ±4 KB; these function bodies are far
557 // smaller, but fail cleanly rather than miscompile if exceeded.
558 return Err(format!(
559 "LdrSym literal pool out of range (#345): imm12={} > 4095 \
560 for symbol {}",
561 imm12, lit.symbol
562 ));
563 }
564 let hw2_off = (lit.ldr_offset + 2) as usize;
565 let mut hw2 = u16::from_le_bytes([code[hw2_off], code[hw2_off + 1]]);
566 hw2 = (hw2 & 0xF000) | (imm12 as u16); // keep Rt, set imm12
567 let hw2_bytes = hw2.to_le_bytes();
568 code[hw2_off] = hw2_bytes[0];
569 code[hw2_off + 1] = hw2_bytes[1];
570 }
571 }
572
573 Ok((code, relocations, line_map))
574}
575
576/// Resolve local label branches to byte-accurate offsets (#202).
577///
578/// `select_with_stack` emits conditional/unconditional branches as label
579/// placeholders (`Bcc`/`B`/`Bhs`/`Blo` + `Label`) and never resolves them; the
580/// encoder then emits a `0xD000`/`0xE000` placeholder with offset 0. Before #197
581/// this path only ran for `--no-optimize`/declined functions, so the latent bug
582/// stayed hidden — routing relocatable code through it surfaced branches that
583/// land mid-instruction (a Cortex-M UsageFault) whenever a 32-bit Thumb-2
584/// instruction sits between the branch and its target.
585///
586/// This pass encodes each instruction to learn its real byte length (so 16- vs
587/// 32-bit forms and multi-instruction expansions are exact), maps each `Label`
588/// to its byte position, and rewrites every label branch to the displacement
589/// the encoder consumes: `(target - branch - 4) / 2` halfwords. A bounded
590/// fixed-point handles an offset growing a branch from 16- to 32-bit (which
591/// shifts later positions). `BCondOffset`/`BOffset` already produced inline by
592/// the optimized path carry no label and are left untouched.
593fn resolve_label_branches(
594 arm_instrs: Vec<ArmInstruction>,
595 encoder: &ArmEncoder,
596) -> Result<Vec<ArmInstruction>, String> {
597 use std::collections::HashMap;
598 use synth_synthesis::Condition;
599
600 enum BKind {
601 Cond(Condition),
602 Uncond,
603 }
604 // Record each label branch ONCE — indices are stable across iterations.
605 let mut branches: Vec<(usize, BKind, String)> = Vec::new();
606 for (i, instr) in arm_instrs.iter().enumerate() {
607 match &instr.op {
608 ArmOp::Bcc { cond, label } => branches.push((i, BKind::Cond(*cond), label.clone())),
609 ArmOp::Bhs { label } => branches.push((i, BKind::Cond(Condition::HS), label.clone())),
610 ArmOp::Blo { label } => branches.push((i, BKind::Cond(Condition::LO), label.clone())),
611 ArmOp::B { label } => branches.push((i, BKind::Uncond, label.clone())),
612 _ => {}
613 }
614 }
615 if branches.is_empty() {
616 return Ok(arm_instrs);
617 }
618
619 let mut resolved = arm_instrs;
620 // Sizes only grow (16→32-bit), so this converges quickly; cap for safety.
621 for _ in 0..16 {
622 // 1. Byte position of each instruction (Label encodes to 0 bytes).
623 let mut positions = Vec::with_capacity(resolved.len());
624 let mut pos: i64 = 0;
625 for instr in &resolved {
626 positions.push(pos);
627 pos += encoder
628 .encode(&instr.op)
629 .map_err(|e| format!("branch-resolve size probe failed: {}", e))?
630 .len() as i64;
631 }
632 // 2. Label name -> byte position (owned keys so the borrow ends here).
633 let mut labels: HashMap<String, i64> = HashMap::new();
634 for (i, instr) in resolved.iter().enumerate() {
635 if let ArmOp::Label { name } = &instr.op {
636 labels.insert(name.clone(), positions[i]);
637 }
638 }
639 // 3. Rewrite each branch to its byte-accurate offset.
640 let mut changed = false;
641 for (idx, kind, label) in &branches {
642 // A label not defined locally is an EXTERNAL target (e.g.
643 // `Trap_Handler` resolved by a relocation / the vector table). Leave
644 // such branches as their placeholder for the existing relocation
645 // path — only local control-flow labels are byte-resolved here.
646 let Some(&target) = labels.get(label) else {
647 continue;
648 };
649 // Encoder consumes the field as (target - branch - 4) / 2 halfwords.
650 // Positions are always even, so this division is exact.
651 let halfword_offset = ((target - positions[*idx] - 4) / 2) as i32;
652 let new_op = match kind {
653 BKind::Cond(c) => ArmOp::BCondOffset {
654 cond: *c,
655 offset: halfword_offset,
656 },
657 BKind::Uncond => ArmOp::BOffset {
658 offset: halfword_offset,
659 },
660 };
661 if resolved[*idx].op != new_op {
662 resolved[*idx].op = new_op;
663 changed = true;
664 }
665 }
666 if !changed {
667 break;
668 }
669 }
670 Ok(resolved)
671}
672
673#[cfg(test)]
674mod tests {
675 use super::*;
676
677 #[test]
678 fn test_arm_backend_name() {
679 let backend = ArmBackend::new();
680 assert_eq!(backend.name(), "arm");
681 assert!(backend.is_available());
682 }
683
684 #[test]
685 fn test_arm_backend_capabilities() {
686 let backend = ArmBackend::new();
687 let caps = backend.capabilities();
688 assert!(!caps.produces_elf);
689 assert!(caps.supports_rule_verification);
690 assert!(!caps.is_external);
691 }
692
693 #[test]
694 fn test_compile_add_function() {
695 let backend = ArmBackend::new();
696 let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add];
697 let config = CompileConfig::default();
698
699 let result = backend.compile_function("add", &ops, &config);
700 assert!(result.is_ok());
701
702 let func = result.unwrap();
703 assert_eq!(func.name, "add");
704 assert!(!func.code.is_empty());
705 assert_eq!(func.wasm_ops, ops);
706 }
707
708 /// VCR-DBG-001: the per-instruction source map must cover the function with
709 /// monotonic, in-bounds machine offsets, and must not perturb the emitted
710 /// code (it is captured at encode time, never serialized here).
711 #[test]
712 fn test_line_map_is_wellformed_dbg001() {
713 let backend = ArmBackend::new();
714 let ops = vec![
715 WasmOp::LocalGet(0),
716 WasmOp::LocalGet(1),
717 WasmOp::I32Add,
718 WasmOp::End,
719 ];
720 let config = CompileConfig::default();
721 let func = backend.compile_function("add", &ops, &config).unwrap();
722
723 // Non-empty, and the first instruction starts at machine offset 0.
724 assert!(
725 !func.line_map.is_empty(),
726 "a non-trivial function captures a source map"
727 );
728 assert_eq!(func.line_map[0].0, 0, "first instruction at offset 0");
729
730 // Offsets strictly increase by at least one ARM/Thumb instruction (>= 2
731 // bytes) and every mapped offset lies inside the emitted `.text`.
732 for w in func.line_map.windows(2) {
733 assert!(w[1].0 > w[0].0, "instruction offsets strictly increase");
734 assert!(
735 w[1].0 - w[0].0 >= 2,
736 "each ARM/Thumb instruction is >= 2 bytes"
737 );
738 }
739 let last = func.line_map.last().unwrap().0 as usize;
740 assert!(
741 last < func.code.len(),
742 "every mapped offset lies inside .text"
743 );
744
745 // The side-table is additive: recompiling is deterministic and the map is
746 // consistent with that exact code (capturing it does not alter output).
747 let again = backend.compile_function("add", &ops, &config).unwrap();
748 assert_eq!(
749 again.code, func.code,
750 "compilation deterministic; map is additive"
751 );
752 assert_eq!(again.line_map, func.line_map);
753 }
754
755 #[test]
756 fn test_count_params() {
757 let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add];
758 assert_eq!(count_params(&ops), 2);
759
760 let no_params = vec![WasmOp::I32Const(5), WasmOp::I32Const(3), WasmOp::I32Add];
761 assert_eq!(count_params(&no_params), 0);
762 }
763
764 #[test]
765 fn test_arm_backend_register() {
766 let mut registry = synth_core::BackendRegistry::new();
767 registry.register(Box::new(ArmBackend::new()));
768 assert!(registry.get("arm").is_some());
769 assert_eq!(registry.available().len(), 1);
770 }
771
772 #[test]
773 fn test_compile_import_call_produces_relocations() {
774 let backend = ArmBackend::new();
775 // Simulate a WASM module where func index 0 is an import.
776 // Call(0) should generate MOV R0, #0; BL __meld_dispatch_import
777 let ops = vec![WasmOp::Call(0)];
778 let config = CompileConfig {
779 num_imports: 1,
780 no_optimize: true, // Direct instruction selection to preserve Call semantics
781 ..CompileConfig::default()
782 };
783
784 let result = backend.compile_function("caller", &ops, &config);
785 assert!(result.is_ok());
786
787 let func = result.unwrap();
788 assert!(!func.code.is_empty());
789 assert_eq!(func.relocations.len(), 1);
790 assert_eq!(func.relocations[0].symbol, "__meld_dispatch_import");
791 // The BL is the second instruction (after MOV R0, #0), so offset should be > 0
792 assert!(func.relocations[0].offset > 0);
793 }
794
795 /// Regression test for #197: in `relocatable` mode, an import call must
796 /// relocate against the direct `func_N` symbol (rewritten to the wasm field
797 /// name by `build_relocatable_elf`), NOT `__meld_dispatch_import`. This is
798 /// the ABI half of the #197 fix — without it, a host linker cannot resolve
799 /// the call to the real kernel symbol (e.g. `k_spin_lock`).
800 #[test]
801 fn test_compile_relocatable_import_uses_direct_func_symbol_197() {
802 let backend = ArmBackend::new();
803 let ops = vec![WasmOp::Call(0)]; // func 0 is an import
804 let config = CompileConfig {
805 num_imports: 1,
806 relocatable: true,
807 ..CompileConfig::default()
808 };
809
810 let func = backend
811 .compile_function("caller", &ops, &config)
812 .expect("relocatable import call compiles");
813
814 assert_eq!(func.relocations.len(), 1);
815 assert_eq!(
816 func.relocations[0].symbol, "func_0",
817 "#197: relocatable import must relocate against func_0 (→ field name), not Meld dispatch"
818 );
819 }
820
821 #[test]
822 fn test_compile_no_imports_no_relocations() {
823 let backend = ArmBackend::new();
824 let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add];
825 let config = CompileConfig::default();
826
827 let func = backend.compile_function("add", &ops, &config).unwrap();
828 assert!(func.relocations.is_empty());
829 }
830
831 /// Regression test for #167: a call to an INTERNAL function
832 /// (index `>= num_imports`) must record a relocation against `func_{index}`.
833 /// Before the fix, only `__meld_*` (import) BLs were relocated, so
834 /// internal `BL func_N` was emitted as an unpatched `bl #0` branching
835 /// to a garbage address — making the object non-linkable. This test
836 /// would have caught that regression.
837 #[test]
838 fn test_compile_internal_call_produces_relocation_167() {
839 let backend = ArmBackend::new();
840 // num_imports = 1, so Call(2) is an INTERNAL call → `BL func_2`.
841 let ops = vec![WasmOp::Call(2)];
842 let config = CompileConfig {
843 num_imports: 1,
844 no_optimize: true,
845 ..CompileConfig::default()
846 };
847
848 let func = backend
849 .compile_function("caller", &ops, &config)
850 .expect("internal call compiles");
851
852 assert_eq!(
853 func.relocations.len(),
854 1,
855 "an internal call must emit exactly one relocation (#167)"
856 );
857 assert_eq!(
858 func.relocations[0].symbol, "func_2",
859 "internal call must relocate against the callee's func_{{index}} symbol (#167)"
860 );
861 }
862
863 // ─── Phase 1 safety-bounds plumbing for ARM ──────────────────────────
864
865 #[test]
866 fn arm_safety_bounds_mpu_emits_same_code_as_none() {
867 // Mpu mode must not introduce any inline check on ARM — the MPU
868 // handles faults via hardware. The encoded bytes for an i32.load
869 // should be identical between None and Mpu.
870 let backend = ArmBackend::new();
871 let ops = vec![
872 WasmOp::LocalGet(0),
873 WasmOp::I32Load {
874 offset: 0,
875 align: 2,
876 },
877 ];
878 let cfg_none = CompileConfig {
879 no_optimize: true,
880 ..Default::default()
881 };
882 let cfg_mpu = CompileConfig {
883 no_optimize: true,
884 safety_bounds: SafetyBounds::Mpu,
885 ..Default::default()
886 };
887 let n = backend.compile_function("ld", &ops, &cfg_none).unwrap();
888 let m = backend.compile_function("ld", &ops, &cfg_mpu).unwrap();
889 assert_eq!(
890 n.code, m.code,
891 "Mpu and None should produce identical ARM bytes (Mpu relies on hardware)"
892 );
893 }
894
895 #[test]
896 fn arm_legacy_bounds_check_still_emits_software_check() {
897 // Legacy CLI users with `--bounds-check` should keep getting the
898 // software path even though the new SafetyBounds field defaults to None.
899 let backend = ArmBackend::new();
900 let ops = vec![
901 WasmOp::LocalGet(0),
902 WasmOp::I32Load {
903 offset: 0,
904 align: 2,
905 },
906 ];
907 let cfg_legacy = CompileConfig {
908 no_optimize: true,
909 bounds_check: true,
910 ..Default::default()
911 };
912 let cfg_software = CompileConfig {
913 no_optimize: true,
914 safety_bounds: SafetyBounds::Software,
915 ..Default::default()
916 };
917 let l = backend.compile_function("ld", &ops, &cfg_legacy).unwrap();
918 let s = backend.compile_function("ld", &ops, &cfg_software).unwrap();
919 assert_eq!(
920 l.code, s.code,
921 "--bounds-check should produce the same bytes as --safety-bounds=software"
922 );
923 }
924
925 // ========================================================================
926 // ISA feature gate tests — ensure the compiler never emits unsupported
927 // instructions for a given target
928 // ========================================================================
929
930 #[test]
931 fn test_f32_rejected_on_cortex_m3_no_fpu() {
932 let backend = ArmBackend::new();
933 let ops = vec![WasmOp::F32Const(1.0), WasmOp::F32Const(2.0), WasmOp::F32Add];
934 let config = CompileConfig {
935 target: TargetSpec::cortex_m3(),
936 no_optimize: true,
937 ..CompileConfig::default()
938 };
939
940 let result = backend.compile_function("fadd", &ops, &config);
941 assert!(
942 result.is_err(),
943 "f32 operations should fail on Cortex-M3 (no FPU)"
944 );
945 }
946
947 #[test]
948 fn test_f32_accepted_on_cortex_m4f() {
949 let backend = ArmBackend::new();
950 let ops = vec![WasmOp::F32Const(1.0), WasmOp::F32Const(2.0), WasmOp::F32Add];
951 let config = CompileConfig {
952 target: TargetSpec::cortex_m4f(),
953 no_optimize: true,
954 ..CompileConfig::default()
955 };
956
957 let result = backend.compile_function("fadd", &ops, &config);
958 assert!(
959 result.is_ok(),
960 "f32 operations should succeed on Cortex-M4F, got: {:?}",
961 result.unwrap_err()
962 );
963 }
964
965 #[test]
966 fn test_i32_works_on_all_targets() {
967 let backend = ArmBackend::new();
968 let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add];
969
970 // Cortex-M3 (no FPU)
971 let config_m3 = CompileConfig {
972 target: TargetSpec::cortex_m3(),
973 no_optimize: true,
974 ..CompileConfig::default()
975 };
976 assert!(
977 backend.compile_function("add", &ops, &config_m3).is_ok(),
978 "i32 ops should work on Cortex-M3"
979 );
980
981 // Cortex-M4F (single FPU)
982 let config_m4f = CompileConfig {
983 target: TargetSpec::cortex_m4f(),
984 no_optimize: true,
985 ..CompileConfig::default()
986 };
987 assert!(
988 backend.compile_function("add", &ops, &config_m4f).is_ok(),
989 "i32 ops should work on Cortex-M4F"
990 );
991
992 // Cortex-M7DP (double FPU)
993 let config_m7dp = CompileConfig {
994 target: TargetSpec::cortex_m7dp(),
995 no_optimize: true,
996 ..CompileConfig::default()
997 };
998 assert!(
999 backend.compile_function("add", &ops, &config_m7dp).is_ok(),
1000 "i32 ops should work on Cortex-M7DP"
1001 );
1002 }
1003
1004 #[test]
1005 fn test_f32_rejected_on_cortex_m4_no_fpu() {
1006 // Cortex-M4 (without F suffix) has no FPU
1007 let backend = ArmBackend::new();
1008 let ops = vec![WasmOp::F32Const(1.5), WasmOp::F32Const(2.5), WasmOp::F32Mul];
1009 let config = CompileConfig {
1010 target: TargetSpec::cortex_m4(),
1011 no_optimize: true,
1012 ..CompileConfig::default()
1013 };
1014
1015 let result = backend.compile_function("fmul", &ops, &config);
1016 assert!(
1017 result.is_err(),
1018 "f32 operations should fail on Cortex-M4 (no FPU)"
1019 );
1020 }
1021
1022 // ========================================================================
1023 // Issue #120 — f32 ops in the optimized lowering path
1024 //
1025 // `OptimizerBridge::wasm_to_ir` has no handlers for f32/f64 ops, so a
1026 // value-producing float op fell through to `Opcode::Nop`, leaving a
1027 // downstream consumer with an unmapped vreg and tripping the PR #101
1028 // defensive panic in `ir_to_arm`. Customer reproducer: `compiler_builtins
1029 // float::div` and `gale_compute_ipi_mask` in the `falcon-rate-component`
1030 // module.
1031 //
1032 // Fix: `optimize_full` declines float modules with a typed `Err`;
1033 // `compile_wasm_to_arm` falls back to the non-optimized `select_with_stack`
1034 // path, which handles f32 via VFP/FPU. These tests use the *default*
1035 // (optimized) config — `no_optimize` is NOT set — which is the exact
1036 // configuration that panicked pre-fix.
1037 // ========================================================================
1038
1039 /// Pre-fix: this panicked with "vreg vN has no assigned ARM register and
1040 /// no spill slot" inside `ir_to_arm`. Post-fix: the optimized path declines
1041 /// the module and the backend falls back to direct selection, producing a
1042 /// non-empty f32.div lowering on a Cortex-M4F.
1043 #[test]
1044 fn test_issue120_f32_div_compiles_via_optimized_default() {
1045 let backend = ArmBackend::new();
1046 let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Div];
1047 let config = CompileConfig {
1048 target: TargetSpec::cortex_m4f(),
1049 // no_optimize NOT set — this exercises the optimized path that
1050 // panicked in issue #120, then the fallback to direct selection.
1051 ..CompileConfig::default()
1052 };
1053
1054 let result = backend.compile_function("fdiv", &ops, &config);
1055 assert!(
1056 result.is_ok(),
1057 "f32.div must compile on Cortex-M4F via the optimized->direct \
1058 fallback (issue #120), got: {:?}",
1059 result.as_ref().err()
1060 );
1061 assert!(
1062 !result.unwrap().code.is_empty(),
1063 "f32.div must produce non-empty machine code"
1064 );
1065 }
1066
1067 /// A spread of f32 ops, all through the optimized (default) config, must
1068 /// compile via the fallback on an FPU target without panicking.
1069 #[test]
1070 fn test_issue120_assorted_f32_ops_compile_via_optimized_default() {
1071 let backend = ArmBackend::new();
1072 let config = CompileConfig {
1073 target: TargetSpec::cortex_m4f(),
1074 ..CompileConfig::default()
1075 };
1076
1077 let cases: Vec<(&str, Vec<WasmOp>)> = vec![
1078 (
1079 "fadd",
1080 vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Add],
1081 ),
1082 (
1083 "fmul",
1084 vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Mul],
1085 ),
1086 (
1087 "fsub",
1088 vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Sub],
1089 ),
1090 ];
1091
1092 for (name, ops) in cases {
1093 let result = backend.compile_function(name, &ops, &config);
1094 assert!(
1095 result.is_ok(),
1096 "{name} must compile via the optimized->direct fallback \
1097 (issue #120), got: {:?}",
1098 result.as_ref().err()
1099 );
1100 assert!(
1101 !result.unwrap().code.is_empty(),
1102 "{name} must produce non-empty machine code"
1103 );
1104 }
1105 }
1106
1107 /// The fallback must still honor the ISA feature gate: f32 on a no-FPU
1108 /// target must fail cleanly (not panic) even on the optimized path.
1109 #[test]
1110 fn test_issue120_f32_div_rejected_on_no_fpu_via_optimized() {
1111 let backend = ArmBackend::new();
1112 let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Div];
1113 let config = CompileConfig {
1114 target: TargetSpec::cortex_m3(),
1115 ..CompileConfig::default()
1116 };
1117
1118 let result = backend.compile_function("fdiv", &ops, &config);
1119 assert!(
1120 result.is_err(),
1121 "f32.div must be rejected on Cortex-M3 (no FPU), not panic"
1122 );
1123 }
1124
1125 /// Issue #94: end-to-end byte-size check for the canonical u64-packed
1126 /// FFI-return hi32 extract pattern. Compiles two near-identical
1127 /// functions — one with the optimized shift-by-32, one with a generic
1128 /// shift-by-7 — and asserts the optimized form is meaningfully smaller.
1129 #[test]
1130 fn test_issue94_hi32_extract_is_smaller_than_generic_shift() {
1131 let backend = ArmBackend::new();
1132 let config = CompileConfig {
1133 target: TargetSpec::cortex_m4f(),
1134 ..CompileConfig::default()
1135 };
1136
1137 // Optimized path: `(local.get 0) >>> 32; wrap_i64`
1138 let ops_hi32 = vec![
1139 WasmOp::LocalGet(0), // i64 param in R0:R1
1140 WasmOp::I64Const(32),
1141 WasmOp::I64ShrU,
1142 WasmOp::I32WrapI64,
1143 ];
1144 let func_hi32 = backend
1145 .compile_function("hi32_extract", &ops_hi32, &config)
1146 .unwrap();
1147
1148 // Generic path: `(local.get 0) >>> 7; wrap_i64` — same shape, but the
1149 // shift amount is not a multiple of 32, so it falls through to the
1150 // 38-byte runtime shift.
1151 let ops_generic = vec![
1152 WasmOp::LocalGet(0),
1153 WasmOp::I64Const(7),
1154 WasmOp::I64ShrU,
1155 WasmOp::I32WrapI64,
1156 ];
1157 let func_generic = backend
1158 .compile_function("generic_shr", &ops_generic, &config)
1159 .unwrap();
1160
1161 let bytes_hi32 = func_hi32.code.len();
1162 let bytes_generic = func_generic.code.len();
1163 println!(
1164 "\n[issue #94] hi32 extract: {} bytes (vs generic shift: {} bytes; saved {})",
1165 bytes_hi32,
1166 bytes_generic,
1167 bytes_generic.saturating_sub(bytes_hi32)
1168 );
1169 let hex: String = func_hi32
1170 .code
1171 .iter()
1172 .map(|b| format!("{:02x}", b))
1173 .collect::<Vec<_>>()
1174 .join(" ");
1175 println!("[issue #94] hi32 bytes: {}", hex);
1176 // We expect the optimized form to be at least 30 bytes smaller than
1177 // the generic 64-bit shift sequence. (Empirically: 14 vs 50 bytes.)
1178 assert!(
1179 bytes_hi32 + 30 <= bytes_generic,
1180 "issue #94: hi32 extract = {} bytes, generic shift = {} bytes; \
1181 expected optimized form to be at least 30 bytes smaller",
1182 bytes_hi32,
1183 bytes_generic,
1184 );
1185 }
1186}