relon-codegen-llvm 0.1.0-rc2

LLVM-backed AOT evaluator for Relon (Phase A bootstrap)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
//! Minimal runtime state for the LLVM AOT backend's buffer-protocol
//! entries. **Phase B.**
//!
//! The buffer-protocol entry signature mirrors the cranelift-native
//! backend's `EntryShape::BufferProtocol`:
//!
//! ```text
//! fn run_main(state: *const SandboxState,
//!             in_ptr: i32, in_len: i32,
//!             out_ptr: i32, out_cap: i32,
//!             caps: i64) -> i32;
//! ```
//!
//! `LoadField` / `StoreField` ops resolve to absolute host addresses
//! through the formula `arena_base + buf_ptr + offset`, where
//! `arena_base` lives at a stable offset on the state. The LLVM
//! emitter loads it through a `ptrtoint`/`inttoptr` round-trip.
//!
//! We **do not** reuse `relon_codegen_cranelift::SandboxState` here on
//! purpose:
//!
//! - It would require pulling cranelift-native as a hard dependency of
//!   the LLVM crate just to share an opaque struct layout. The LLVM
//!   backend is meant to stand on its own.
//! - The LLVM backend keeps its sandbox state local: arena bounds,
//!   capability trap codes, host-fn dispatch, and the step-budget fuel
//!   live in this C-layout `ArenaState` instead of depending on the
//!   cranelift crate's `SandboxState`.
//! - Keeping the layout local to this crate makes the offsets we
//!   embed in emitted LLVM IR self-contained — if the cranelift
//!   crate ever rearranges `SandboxState` it cannot accidentally
//!   miscompile our IR.
//!
//! Phase C (when sandbox traps + closures land) is the right time to
//! revisit the dep direction; for Phase B this stays self-contained.

use std::cell::UnsafeCell;
use std::collections::HashMap;
use std::sync::Arc;

use relon_eval_api::{NativeArgs, NativeFnCaps, RelonFunction, RuntimeError, Value};
use relon_parser::TokenRange;

/// Per-call arena state handed to the LLVM JIT-compiled entry. The
/// emitter reads `arena_base` (at offset 0 on a 64-bit host) and
/// `arena_len` (offset 8) to resolve every buffer-protocol load /
/// store; everything past those two fields is reserved for Phase C
/// (sandbox traps, deadline, closure table).
///
/// `#[repr(C)]` because the LLVM emitter hard-codes the field
/// offsets through `inttoptr(arena_base_ptr + N)` style address
/// arithmetic.
///
/// `UnsafeCell` on the live fields because the JIT thread mutates
/// them through a raw pointer; Rust's borrow checker cannot see the
/// emitted machine code. The per-call ownership model (one
/// `ArenaState` per `run_main` dispatch) means no aliasing race
/// can occur — the LLVM evaluator allocates a fresh state on the
/// stack before each call.
///
/// ## Phase 0b: native-call dispatch
///
/// `host_fns` + `trap_code` mirror the cranelift backend's
/// `SandboxState` so the LLVM JIT path can dispatch a source-lowered
/// `Op::CallNative` through the host-fn registry the same way (see
/// [`relon_llvm_call_native`]). `host_fns` is a raw pointer (not an
/// `Arc` slot) because the registry is owned by the evaluator and
/// outlives every per-call state; the emitter loads it by offset and
/// hands it back to the helper verbatim. `0` (null) means "no
/// registry installed" — a `CallNative` then records
/// [`NativeTrap::HostFnMissing`] in `trap_code`.
#[repr(C)]
pub struct ArenaState {
    /// Base pointer of the arena bytes the host owns. The emitted
    /// LLVM IR reads this through `load i64, ptr %state` (offset 0),
    /// then `inttoptr` to a byte pointer + i64-extended `buf_ptr` +
    /// `field_offset`. The pointer is `usize`-wide so the cast
    /// matches the host's pointer width.
    pub arena_base: UnsafeCell<usize>,
    /// Length of the arena in bytes. The LLVM emitter uses this for
    /// arena-relative bounds guards before forming host pointers.
    pub arena_len: UnsafeCell<u32>,
    /// Phase E.1: tail cursor used by pointer-indirect StoreField
    /// (`String` / `ListInt` / `ListFloat` / `ListBool`) to bump-
    /// allocate records inside the output buffer's tail region.
    /// Counts buffer-relative bytes from `out_ptr`. Reset to 0 at the
    /// start of every dispatch.
    pub tail_cursor: UnsafeCell<u32>,
    /// Phase E.1: scratch bump cursor used by stdlib bodies (`concat`,
    /// `substring`, ...) and `Op::StrConcatN` to allocate temporary
    /// records inside the arena's scratch region. Counts bytes from
    /// `scratch_base`. Reset to 0 per dispatch.
    pub scratch_cursor: UnsafeCell<u32>,
    /// Phase E.1: arena-relative byte offset at which the scratch
    /// region starts (= `out_ptr + out_cap`). The bump path reads
    /// `scratch_base + scratch_cursor` as the i32 pointer returned to
    /// the stdlib body.
    pub scratch_base: UnsafeCell<u32>,
    /// Phase 0b: trap code recorded by [`relon_llvm_call_native`] on a
    /// failed dispatch (host-fn missing / host-fn error / unsupported
    /// arg shape). `0` = no trap. The `Op::CallNative` lowering loads
    /// this right after the helper returns and routes a non-zero value
    /// to an `llvm.trap`. Mirrors `SandboxState::trap_code`.
    pub trap_code: UnsafeCell<u64>,
    /// Phase 0b: raw pointer to the host-fn registry installed by the
    /// evaluator before dispatch. Null when no registry was supplied.
    /// The emitter loads this word and hands it to the helper; the
    /// helper re-derives `&HostFnRegistry`. Lives outside the
    /// `#[repr(C)]` codegen-visible prefix only through its offset —
    /// it is a plain pointer-width field the JIT never dereferences
    /// directly (only the helper does, on the Rust side).
    pub host_fns: UnsafeCell<usize>,
    /// Remaining loop/entry budget for the current dispatch. `0`
    /// means "unlimited"; positive values are decremented by the LLVM
    /// emitter at the entry prologue and loop headers; negative values
    /// trap `ResourceExhausted`.
    pub step_budget: UnsafeCell<i64>,
}

/// Byte offset of [`ArenaState::arena_base`] inside the `#[repr(C)]`
/// layout. Used by the LLVM emitter to materialise the load.
pub const ARENA_STATE_OFFSET_BASE: u32 = 0;

/// Byte offset of [`ArenaState::arena_len`]. The LLVM emitter reads it
/// before arena-relative host-pointer formation.
pub const ARENA_STATE_OFFSET_LEN: u32 = std::mem::size_of::<usize>() as u32;

/// Byte offset of [`ArenaState::tail_cursor`]. The pointer-indirect
/// StoreField path loads and stores this u32 to bump-allocate the
/// output buffer's tail region.
pub const ARENA_STATE_OFFSET_TAIL_CURSOR: u32 = ARENA_STATE_OFFSET_LEN + 4;

/// Byte offset of [`ArenaState::scratch_cursor`]. Loaded / stored by
/// the `Op::AllocScratch` / `Op::AllocScratchDyn` lowering.
pub const ARENA_STATE_OFFSET_SCRATCH_CURSOR: u32 = ARENA_STATE_OFFSET_TAIL_CURSOR + 4;

/// Byte offset of [`ArenaState::scratch_base`]. Loaded by the scratch
/// allocator to compute the arena-relative offset of a freshly-
/// reserved scratch block (`scratch_base + scratch_cursor`).
pub const ARENA_STATE_OFFSET_SCRATCH_BASE: u32 = ARENA_STATE_OFFSET_SCRATCH_CURSOR + 4;

/// Byte offset of [`ArenaState::trap_code`]. The three trailing u32
/// fields (`arena_len`, `tail_cursor`, `scratch_cursor`,
/// `scratch_base`) total 16 bytes past `arena_base`; the `u64`
/// `trap_code` follows on its natural 8-byte boundary. The
/// `Op::CallNative` lowering reads / writes this offset; a runtime
/// assert in [`ArenaState`]'s test module pins the layout.
pub const ARENA_STATE_OFFSET_TRAP_CODE: u32 = 24;

/// Byte offset of [`ArenaState::host_fns`]. The `usize`-wide registry
/// pointer follows `trap_code` on its natural boundary. Only the Rust
/// helper [`relon_llvm_call_native`] dereferences this field (via
/// `state.host_fns.get()`), so the emitter never materialises the
/// offset — it exists for the layout assertion + documentation.
#[allow(dead_code)]
pub const ARENA_STATE_OFFSET_HOST_FNS: u32 = ARENA_STATE_OFFSET_TRAP_CODE + 8;

/// Byte offset of [`ArenaState::step_budget`]. Appended after the
/// existing host-fn word so the earlier ABI offsets stay stable.
pub const ARENA_STATE_OFFSET_STEP_BUDGET: u32 =
    ARENA_STATE_OFFSET_HOST_FNS + std::mem::size_of::<usize>() as u32;

/// Phase 0b native-dispatch trap codes recorded in
/// [`ArenaState::trap_code`] by [`relon_llvm_call_native`]. Mirrors the
/// cranelift backend's `TrapKind` numbering for the subset the LLVM
/// dynamic-dispatch path can raise. `0` is reserved for "no trap".
#[repr(u64)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NativeTrap {
    /// Division (`Op::Div` / `Op::Mod`) by zero. Matches cranelift's
    /// `TrapKind::DivisionByZero` (= 1); lifts to
    /// `RuntimeError::DivisionByZero`.
    DivisionByZero = 1,
    /// Pointer dereference walked past the arena bounds. Matches
    /// cranelift's `TrapKind::BoundsViolation` (= 2); lifts to
    /// `RuntimeError::IndexOutOfBounds`.
    BoundsViolation = 2,
    /// Per-call resource budget exhausted. LLVM currently raises this
    /// from deterministic step-budget fuel; a future wall-clock deadline
    /// can reuse the same trap code.
    ResourceExhausted = 4,
    /// The `Op::CheckCap` gate denied a gated native call (the granted
    /// `caps` bitmask had the required bit clear). Matches cranelift's
    /// `TrapKind::CapabilityDenied` (= 3); lifts to
    /// `RuntimeError::CapabilityDenied`.
    CapabilityDenied = 3,
    /// A checked Int reduction overflowed i64 (`list_int_sum`'s
    /// per-iteration guard). Matches cranelift's
    /// `TrapKind::NumericOverflow` (= 6); lifts to
    /// `RuntimeError::NumericOverflow`, the same typed error the
    /// tree-walk oracle's checked `+` raises. Routed through
    /// `state.trap_code` + the negative sentinel (not `llvm.trap`)
    /// so the host can surface the typed error instead of a SIGILL.
    NumericOverflow = 6,
    /// No host fn registered at the requested `import_idx`, or no
    /// registry installed at all. Surfaces as
    /// `RuntimeError::Unsupported`. Matches cranelift's
    /// `TrapKind::Unreachable` (= 5) so the host-observable outcome
    /// class is identical across backends.
    HostFnMissing = 5,
    /// The host fn returned an error, or a value outside the phase-0b
    /// scalar return envelope (`Int` / `Bool` / `Unit`). Surfaces as
    /// `RuntimeError::Unsupported`. A distinct code from `HostFnMissing`
    /// only for post-mortem readability — both lift to `Unsupported`.
    HostFnError = 7,
    /// A strict-mode `match` fell through every arm with no `_`
    /// catch-all and no arm matched at runtime. Lifts to
    /// `RuntimeError::TypeMismatch { expected: "a matching arm", .. }`,
    /// byte-aligned (modulo range) with the tree-walk oracle and the
    /// cranelift `TrapKind::NoMatch`. The LLVM `Op::Trap` path can't use
    /// `llvm.trap` (a `ud2` SIGILL the host can't decode into a typed
    /// error), so the no-match trap records this code in
    /// `state.trap_code` + returns the negative sentinel, which
    /// `run_buffer_main` already lifts via `runtime_error_from_code`.
    NoMatch = 8,
}

impl NativeTrap {
    /// Lift a trap code recorded in [`ArenaState::trap_code`] into the
    /// matching [`RuntimeError`]. Unknown / `0` codes are treated as
    /// `Unsupported` (defensive — the JIT only ever stores the codes
    /// above). Mirrors cranelift's `TrapKind::to_runtime_error` for the
    /// subset the LLVM dynamic-dispatch path raises.
    pub fn runtime_error_from_code(code: u64) -> RuntimeError {
        match code {
            1 => RuntimeError::DivisionByZero(TokenRange::default()),
            2 => RuntimeError::IndexOutOfBounds {
                range: TokenRange::default(),
            },
            3 => RuntimeError::CapabilityDenied {
                cap_bit: None,
                reason: "llvm-aot: host-fn call denied by capability gate".to_string(),
                range: TokenRange::default(),
            },
            // Checked-reduction overflow — same typed error class as the
            // tree-walk oracle's checked `+` and cranelift's
            // `TrapKind::NumericOverflow::to_runtime_error`.
            6 => RuntimeError::NumericOverflow(TokenRange::default()),
            8 => RuntimeError::TypeMismatch {
                // Byte-aligned with the tree-walk oracle's `Expr::Match`
                // no-match path and the cranelift `TrapKind::NoMatch`
                // mapping. `found` cannot reproduce the oracle's
                // value-dependent `format!("value {}", val)` from a static
                // trap; it states the structural cause instead.
                expected: "a matching arm".to_string(),
                found: "no matching arm".to_string(),
                range: TokenRange::default(),
            },
            4 => RuntimeError::StepLimitExceeded {
                limit: None,
                range: TokenRange::default(),
            },
            _ => RuntimeError::Unsupported {
                reason: "llvm-aot: native-fn dispatch failed (host fn missing / errored / \
                         returned a non-scalar value)"
                    .to_string(),
            },
        }
    }
}

impl ArenaState {
    /// Construct a state that points at `arena[0..]` for a single
    /// dispatch. The caller owns the backing storage; this struct
    /// only borrows it through a raw pointer for the JIT's
    /// lifetime.
    ///
    /// `scratch_base` is the arena-relative offset where temporary
    /// allocations (string concat, ...) live; pass `arena.len()` to
    /// disable the scratch path. The cursors are reset to 0 so the
    /// JIT bump path starts fresh on every dispatch.
    ///
    /// # Safety
    ///
    /// The caller must keep `arena` live and exclusively owned by the
    /// `run_main` invocation that consumes this state. The emitted
    /// JIT code reads and writes through `arena_base` without
    /// touching the Rust borrow checker.
    pub fn new(arena: &mut [u8], scratch_base: u32) -> Self {
        Self {
            arena_base: UnsafeCell::new(arena.as_mut_ptr() as usize),
            arena_len: UnsafeCell::new(arena.len() as u32),
            tail_cursor: UnsafeCell::new(0),
            scratch_cursor: UnsafeCell::new(0),
            scratch_base: UnsafeCell::new(scratch_base),
            trap_code: UnsafeCell::new(0),
            host_fns: UnsafeCell::new(0),
            step_budget: UnsafeCell::new(0),
        }
    }

    /// Set the remaining step budget for this dispatch. `0` disables
    /// budget checks; negative values are already exhausted.
    pub fn set_step_budget(&self, budget: i64) {
        unsafe {
            *self.step_budget.get() = budget;
        }
    }

    /// Point the state at a host-fn registry for the duration of one
    /// dispatch. Pass `0` (or skip the call) to leave the registry
    /// unset — a `CallNative` then traps `HostFnMissing`.
    ///
    /// # Safety
    ///
    /// `registry` must outlive the JIT dispatch that consumes this
    /// state, and must be a valid `*const HostFnRegistry` (or null).
    /// The per-call ownership model keeps the `UnsafeCell` unaliased.
    pub unsafe fn install_host_fns(&self, registry: *const HostFnRegistry) {
        unsafe {
            *self.host_fns.get() = registry as usize;
        }
    }

    /// Read the trap code recorded by the JIT-side `Op::CallNative`
    /// dispatch. `0` means no native-dispatch trap fired.
    pub fn trap_code(&self) -> u64 {
        // SAFETY: the dispatch has returned, so the cell is unaliased.
        unsafe { *self.trap_code.get() }
    }

    /// Read the current tail-cursor value. Used by the evaluator
    /// after a dispatch returns to know how much was written into the
    /// tail region (for `String` return-value decoding).
    #[allow(dead_code)]
    pub fn tail_cursor(&self) -> u32 {
        // SAFETY: caller owns the state exclusively for a single
        // dispatch — no aliasing read can happen.
        unsafe { *self.tail_cursor.get() }
    }
}

/// Phase 0b host-fn registry: `import_idx -> Arc<dyn RelonFunction>`.
///
/// Mirrors the `host_fns` half of the cranelift backend's
/// `CapabilityVtable`. The LLVM evaluator owns one of these (built via
/// [`Self::with_host_fns`]) and points each per-call [`ArenaState`] at
/// it through [`ArenaState::install_host_fns`]; a source-lowered
/// `Op::CallNative` then resolves the `import_idx`-keyed callable via
/// [`relon_llvm_call_native`].
///
/// Keying off `import_idx` (the IR-side private namespace) keeps it
/// distinct from the capability-bit namespace the `Op::CheckCap`
/// gate consumes — exactly the cranelift split.
#[derive(Default, Clone)]
pub struct HostFnRegistry {
    host_fns: HashMap<u32, Arc<dyn RelonFunction>>,
}

impl std::fmt::Debug for HostFnRegistry {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("HostFnRegistry")
            .field("host_fn_count", &self.host_fns.len())
            .finish()
    }
}

impl HostFnRegistry {
    /// Build an empty registry.
    pub fn new() -> Self {
        Self {
            host_fns: HashMap::new(),
        }
    }

    /// Register a callable at `import_idx`. Overwrites any prior entry.
    pub fn register(&mut self, import_idx: u32, func: Arc<dyn RelonFunction>) {
        self.host_fns.insert(import_idx, func);
    }

    /// Resolve the callable registered at `import_idx`.
    pub fn resolve(&self, import_idx: u32) -> Option<&Arc<dyn RelonFunction>> {
        self.host_fns.get(&import_idx)
    }

    /// Number of registered host fns.
    pub fn len(&self) -> usize {
        self.host_fns.len()
    }

    /// `true` when no host fns are registered.
    pub fn is_empty(&self) -> bool {
        self.host_fns.is_empty()
    }
}

/// Zero-surface [`NativeFnCaps`] for LLVM-dispatched host fns. Same
/// envelope as the cranelift backend's `CraneliftNativeFnCaps`: no
/// closure-callback / iterator surface yet, so a host fn that tries to
/// call back into Relon logic gets a typed `Unsupported` error rather
/// than a segfault. Cached as a single `Arc` so each dispatch is a
/// refcount bump.
struct LlvmNativeFnCaps;

impl NativeFnCaps for LlvmNativeFnCaps {
    fn call_relon(
        &self,
        _func: &Value,
        _args: Vec<Value>,
        _range: TokenRange,
    ) -> Result<Value, RuntimeError> {
        Err(RuntimeError::Unsupported {
            reason: "llvm-aot host fn: call_relon callback unsupported".to_string(),
        })
    }
}

fn llvm_native_caps() -> Arc<dyn NativeFnCaps> {
    static CAPS: std::sync::OnceLock<Arc<dyn NativeFnCaps>> = std::sync::OnceLock::new();
    Arc::clone(CAPS.get_or_init(|| Arc::new(LlvmNativeFnCaps) as Arc<dyn NativeFnCaps>))
}

/// Stable symbol name the LLVM module declares the native-dispatch
/// helper under. The evaluator maps it onto
/// [`relon_llvm_call_native`]'s address via `engine.add_global_mapping`
/// before resolving the entry pointer. Mirrors the cranelift backend's
/// `RelonCallNative` vtable slot — same `(state, import_idx, args_ptr,
/// arg_count) -> i64` shape, resolved by symbol here instead of through
/// a data-vtable slot.
pub const RELON_LLVM_CALL_NATIVE_SYMBOL: &str = "relon_llvm_call_native";

/// Dynamic host-fn dispatch helper for a source-lowered
/// `Op::CallNative`. The JIT-emitted call site passes the per-call
/// `ArenaState` pointer, the IR `import_idx`, a pointer to `arg_count`
/// contiguous i64 args (spilled into an `alloca` by the lowering), and
/// the arg count. The helper:
///
/// 1. loads the `host_fns` registry pointer from the state;
/// 2. resolves the `Arc<dyn RelonFunction>` registered at `import_idx`;
/// 3. packs the i64 args as `Value::Int`s into `NativeArgs`;
/// 4. invokes the callable and returns the i64-encoded scalar result.
///
/// Failures (no registry / no callable / host-fn error / non-scalar
/// return) do **not** unwind across this `extern "C"` boundary (that
/// would be UB on a `panic=unwind` build): the helper records a
/// [`NativeTrap`] code in `state.trap_code` and returns `0`. The JIT
/// call site loads `trap_code` right after the call and routes a
/// non-zero value to an `llvm.trap`, so the host sees a typed
/// `RuntimeError` the same way every other LLVM trap surfaces. Mirrors
/// the cranelift backend's `SandboxState::call_native`.
///
/// Scope: scalar `Int` args in, `Int` / `Bool` / `Unit` result out.
///
/// # Safety
///
/// `state` must point at a live, aligned [`ArenaState`]; `args_ptr`
/// must point at `arg_count` contiguous `i64`s. The JIT prologue passes
/// the same `state` pointer it received and a stack slot it just
/// populated, so both invariants hold for every production caller.
pub unsafe extern "C" fn relon_llvm_call_native(
    state: *const ArenaState,
    import_idx: u32,
    args_ptr: *const i64,
    arg_count: u32,
) -> i64 {
    // SAFETY: caller guarantees a live, aligned ArenaState.
    let state = unsafe { &*state };
    // SAFETY: per-call ownership — the JIT thread is the only reader.
    let registry_ptr = unsafe { *state.host_fns.get() } as *const HostFnRegistry;
    let record_trap = |code: NativeTrap| {
        // SAFETY: per-call ownership; the JIT call has not returned yet
        // but no other thread can see this state.
        unsafe {
            *state.trap_code.get() = code as u64;
        }
    };
    if registry_ptr.is_null() {
        record_trap(NativeTrap::HostFnMissing);
        return 0;
    }
    // SAFETY: the evaluator installs a registry that outlives the
    // dispatch (it lives on the evaluator, behind an Arc).
    let registry = unsafe { &*registry_ptr };
    let Some(func) = registry.resolve(import_idx).cloned() else {
        record_trap(NativeTrap::HostFnMissing);
        return 0;
    };
    let args_slice = if arg_count == 0 {
        &[][..]
    } else {
        // SAFETY: caller guarantees `arg_count` contiguous i64s.
        unsafe { std::slice::from_raw_parts(args_ptr, arg_count as usize) }
    };
    let packed: Vec<Value> = args_slice.iter().map(|&x| Value::Int(x)).collect();
    let native_args = NativeArgs::from_positional(packed, llvm_native_caps());
    match func.call(native_args, TokenRange::default()) {
        Ok(Value::Int(v)) => v,
        Ok(Value::Bool(b)) => i64::from(b),
        Ok(v) if v.is_option_none() => 0,
        Ok(_) | Err(_) => {
            record_trap(NativeTrap::HostFnError);
            0
        }
    }
}

/// Address of [`relon_llvm_call_native`] as a `usize`, for
/// `engine.add_global_mapping`. Two-step cast silences the
/// `fn-as-usize` lint (mirrors `relon_llvm_str_contains_arena_addr`).
#[inline]
pub fn relon_llvm_call_native_addr() -> usize {
    relon_llvm_call_native as *const () as usize
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn arena_state_offsets_match_repr_c_layout() {
        let mut buf = [0u8; 16];
        let state = ArenaState::new(&mut buf, 16);
        let base = &state as *const _ as usize;
        assert_eq!(
            (state.arena_base.get() as usize) - base,
            ARENA_STATE_OFFSET_BASE as usize
        );
        assert_eq!(
            (state.arena_len.get() as usize) - base,
            ARENA_STATE_OFFSET_LEN as usize
        );
        assert_eq!(
            (state.tail_cursor.get() as usize) - base,
            ARENA_STATE_OFFSET_TAIL_CURSOR as usize
        );
        assert_eq!(
            (state.scratch_cursor.get() as usize) - base,
            ARENA_STATE_OFFSET_SCRATCH_CURSOR as usize
        );
        assert_eq!(
            (state.scratch_base.get() as usize) - base,
            ARENA_STATE_OFFSET_SCRATCH_BASE as usize
        );
        assert_eq!(
            (state.trap_code.get() as usize) - base,
            ARENA_STATE_OFFSET_TRAP_CODE as usize
        );
        assert_eq!(
            (state.host_fns.get() as usize) - base,
            ARENA_STATE_OFFSET_HOST_FNS as usize
        );
        assert_eq!(
            (state.step_budget.get() as usize) - base,
            ARENA_STATE_OFFSET_STEP_BUDGET as usize
        );
    }

    struct AddOne;
    impl RelonFunction for AddOne {
        fn call(&self, args: NativeArgs, _r: TokenRange) -> Result<Value, RuntimeError> {
            match args.positional.first() {
                Some(Value::Int(x)) => Ok(Value::Int(x + 1)),
                _ => Err(RuntimeError::Unsupported {
                    reason: "AddOne expects Int".into(),
                }),
            }
        }
    }

    #[test]
    fn call_native_helper_dispatches_registered_fn() {
        let mut reg = HostFnRegistry::new();
        reg.register(0, Arc::new(AddOne));
        let mut buf = [0u8; 16];
        let state = ArenaState::new(&mut buf, 16);
        // SAFETY: `reg` outlives the call below.
        unsafe { state.install_host_fns(&reg as *const _) };
        let args = [41i64];
        let r = unsafe { relon_llvm_call_native(&state as *const _, 0, args.as_ptr(), 1) };
        assert_eq!(r, 42);
        assert_eq!(state.trap_code(), 0);
    }

    #[test]
    fn call_native_helper_traps_when_unregistered() {
        let reg = HostFnRegistry::new();
        let mut buf = [0u8; 16];
        let state = ArenaState::new(&mut buf, 16);
        unsafe { state.install_host_fns(&reg as *const _) };
        let r = unsafe { relon_llvm_call_native(&state as *const _, 7, std::ptr::null(), 0) };
        assert_eq!(r, 0);
        assert_eq!(state.trap_code(), NativeTrap::HostFnMissing as u64);
    }

    #[test]
    fn call_native_helper_traps_when_no_registry() {
        let mut buf = [0u8; 16];
        let state = ArenaState::new(&mut buf, 16);
        // No install_host_fns — registry pointer stays null.
        let r = unsafe { relon_llvm_call_native(&state as *const _, 0, std::ptr::null(), 0) };
        assert_eq!(r, 0);
        assert_eq!(state.trap_code(), NativeTrap::HostFnMissing as u64);
    }

    #[test]
    fn native_trap_bounds_code_lifts_to_index_out_of_bounds() {
        assert!(matches!(
            NativeTrap::runtime_error_from_code(NativeTrap::DivisionByZero as u64),
            RuntimeError::DivisionByZero(_)
        ));
        assert!(matches!(
            NativeTrap::runtime_error_from_code(NativeTrap::BoundsViolation as u64),
            RuntimeError::IndexOutOfBounds { .. }
        ));
        assert!(matches!(
            NativeTrap::runtime_error_from_code(NativeTrap::ResourceExhausted as u64),
            RuntimeError::StepLimitExceeded { .. }
        ));
    }
}