Skip to main content

luna_core/jit/
aot_meta.rs

1//! v1.3 Phase AOT Stage 7 sub-piece 4 — wire format for AOT trace
2//! metadata.
3//!
4//! # Why a luna-core module
5//!
6//! The format is shared by two distinct crates:
7//!
8//! - `luna-aot` (compile-time): serializes a runtime `CompiledTrace`
9//!   into bytes embedded in the AOT object's `luna_trace_blob`
10//!   section.
11//! - `luna-runtime-helpers` (deploy-time): walks the
12//!   `luna_trace_meta` index section, deserializes each entry's blob
13//!   into the **minimal fields** needed to construct a fresh
14//!   `CompiledTrace` for the deploy `Vm`'s dispatcher.
15//!
16//! Putting the wire format under `luna-core` keeps both crates pinned
17//! to the same constants without giving either a dep on the other.
18//!
19//! # 0-dep contract
20//!
21//! Hand-rolled `u8` packing — no `bincode`, no `serde`. Format is
22//! stable across the v1.3 line (header carries [`AOT_META_MAGIC`] +
23//! [`AOT_META_VERSION`]; a mismatch on the deploy side is a hard
24//! reject, not silent fallback).
25//!
26//! # Wire format versions
27//!
28//! - **v1** — sub-piece-4 minimal cut. Fields: `head_pc`, `n_ops`,
29//!   `window_size`, `dispatchable`, `entry_tags`, `exit_tags`,
30//!   `global_tag_res_kind`. Only "simple" traces (no inline side-
31//!   exits, no per-cont_pc tag exits) installable.
32//! - **v2** — Stage 7 trace-coverage follow-up. Appends a trailing
33//!   `per_exit_tags` array (`(cont_pc, [ExitTag])` per entry) so
34//!   traces with typed-register side-exit guards (GetUpval-heavy
35//!   closures, type-specialized GetField loops) are AOT-installable.
36//!   v2 readers MUST accept v1 blobs as if `per_exit_tags`
37//!   were empty (the trailing block becomes optional via the
38//!   `total_payload < bytes.len()` predicate at decode time).
39//! - **v3** — Inline cmp@d>0 side-exit scaffolding. Appends a second
40//!   trailing block after `per_exit_tags`: a list of
41//!   [`PerExitInlineEntry`] records carrying the `cont_pc`,
42//!   `head_resume_pc`, the per-slot exit-tag snapshot covering the
43//!   trace's full `window_size`, and the `FrameMaterializeInfo`
44//!   chain bytes. v3 readers accept v1 and v2 blobs as if the
45//!   inline block were empty.
46//!
47//!   **Important — install-time invariant**: emitting the v3 inline
48//!   block into a meta blob is necessary but **not sufficient** to
49//!   safely AOT-install a trace whose `per_exit_inline` is non-
50//!   empty. The trace mcode itself today bakes a raw
51//!   `Rc::as_ptr(&chain_rc)` value as an `iconst` immediate at lower
52//!   time (see `luna-jit/src/jit_backend/trace.rs` near the cmp@d>0
53//!   side-exit emit sites). Under JIT the immediate is a live heap
54//!   pointer; under AOT it would be the warmup VM's heap address,
55//!   invalid in the deploy binary. To actually unlock the install
56//!   path the lowerer must:
57//!     1. Emit a writable per-site slot (`__luna_aot_inline_chain_
58//!        slot_<key>`) and load through it instead of the raw
59//!        `iconst`.
60//!     2. Register a new `luna_inline_chain_idx` bracketed section
61//!        analogous to `luna_strkey_idx` so the deploy resolver can
62//!        match installed entries to slot addresses.
63//!     3. The deploy walker (after rebuilding `Rc<[FrameMaterializeInfo
64//!        ]>` from v3 blob bytes) writes the live address of the
65//!        rebuilt chain into each slot before the first dispatch.
66//!
67//!   Until those three pieces land, the AOT harvester
68//!   (`luna-aot::embed`) MUST keep its
69//!   `per_exit_inline.is_empty()` filter so non-empty-inline
70//!   traces never produce a v3 blob with inline data — they stay
71//!   JIT-only. The wire format below is forward-ready so the
72//!   lowerer / resolver work doesn't need a fresh version bump.
73//!
74//! # Field summary
75//!
76//! `CompiledTrace` carries 30+ fields including `RefCell<HashMap>`,
77//! `Box<Cell<*const u8>>`, `Rc<[InlineSideExit]>` — most are side-
78//! trace bookkeeping irrelevant for AOT (the deploy `Vm` never side-
79//! traces an AOT-installed trace, so all those start empty). The
80//! AOT meta format serializes only the **dispatch-load-bearing**
81//! fields:
82//!
83//! - `head_pc`, `n_ops`, `window_size`, `dispatchable`
84//! - `entry_tags: Rc<[u8]>` — per-slot entry-tag specialization
85//! - `exit_tags: Rc<[ExitTag]>` — per-slot exit-tag restore (clean tail)
86//! - `global_tag_res_kind` — fast-path classification
87//! - `per_exit_tags` *(v2+)* — per-cont_pc slot-shape entries the
88//!   dispatcher uses to restore vm.stack on a typed-register
89//!   side-exit
90//! - `per_exit_inline` *(v3+)* — per-site inline cmp@d>0 side-exit
91//!   records: cont_pc + head_resume_pc + per-slot exit_tags + the
92//!   `FrameMaterializeInfo` chain bytes. Forward-compat scaffold;
93//!   the harvester does not yet emit non-empty entries because the
94//!   trace mcode side needs a relocatable chain-slot scheme first
95//!   (see the v3 paragraph above).
96//!
97//! `body_writes`, side-trace ptrs etc. default to empty / null.
98
99use crate::jit::trace_types::{ExitTag, FrameMaterializeInfo, TagResKind};
100
101/// Magic bytes at the start of every AOT meta blob. The deploy walker
102/// checks this against `read::<u32>` before parsing the rest;
103/// mismatches are reported (and the entry skipped) rather than
104/// causing arbitrary deserialization.
105pub const AOT_META_MAGIC: u32 = 0xAA77_0001;
106
107/// Wire-format version. v1 = minimal cut (sub-piece 4). v2 = appends
108/// trailing `per_exit_tags` block so typed-register side-exits
109/// (GetUpval-heavy traces) install at deploy time. v3 = appends a
110/// second trailing block carrying `per_exit_inline` data so
111/// depth>0 inlined cmp side-exits can be rebuilt at install time
112/// (the trace mcode side still needs a relocatable-slot scheme
113/// before non-empty inline entries can actually be emitted —
114/// module docs go into the staging plan).
115///
116/// **Forward compatibility contract**: a v3 writer emits the same
117/// fixed-prefix header layout as v1/v2 plus the v2 tail (always
118/// present, count=0 if empty) plus the v3 tail (count=0 if empty).
119/// A v3 reader MUST accept v1 blobs (= header + tags only, no v2
120/// tail) and v2 blobs (= header + tags + v2 tail only, no v3 tail)
121/// as if the missing tails were empty — implementation lives in
122/// [`decode_meta_blob`]'s `bytes.len() > cur` predicate at each
123/// tail boundary. v2 readers on a v3 blob would mis-parse the v3
124/// tail as garbage; we bump `AOT_META_VERSION` so older readers
125/// hard-reject instead of silently mis-installing.
126pub const AOT_META_VERSION: u32 = 3;
127
128/// Fixed-size header at the top of every meta blob. All ints are
129/// little-endian.
130///
131/// Total = 28 bytes. The variable-length tag arrays follow this
132/// header back-to-back (`entry_tags_len` u8s then `exit_tags_len`
133/// u8s).
134#[repr(C)]
135#[derive(Clone, Copy, Debug)]
136pub struct AotTraceMetaHeader {
137    /// [`AOT_META_MAGIC`]. Deploy-side hard-rejects mismatch.
138    pub magic: u32,
139    /// [`AOT_META_VERSION`]. Deploy-side hard-rejects mismatch.
140    pub version: u32,
141    /// Trace's `head_pc` — the PC the dispatcher matches on.
142    pub head_pc: u32,
143    /// Trace's `n_ops` — diagnostic only on the deploy side.
144    pub n_ops: u32,
145    /// Trace's `window_size` — sizes the dispatcher's `reg_state` buffer.
146    pub window_size: u32,
147    /// Trace's `dispatchable` flag as `u8` (0 / 1).
148    pub dispatchable: u8,
149    /// Trace's `global_tag_res_kind` packed:
150    /// `0 = AllUntouched`, `1 = AllInt`, `2 = Mixed`.
151    pub tag_res_kind: u8,
152    /// Length of the `entry_tags` array that follows the header.
153    /// `u16` is enough: trace's `max_stack` is bounded by Lua's
154    /// `MAXREGS` (255) and even worst-case inlining caps under 4K.
155    pub entry_tags_len: u16,
156    /// Length of the `exit_tags` array that follows after `entry_tags`.
157    pub exit_tags_len: u32,
158}
159
160impl AotTraceMetaHeader {
161    /// Byte size of the fixed prefix. Used to compute payload offset.
162    pub const SIZE: usize = 28;
163}
164
165/// Pack an `ExitTag` into its on-disk `u8` representation. Mirrors the
166/// `#[repr(u8)]` discriminant so the wire format is the same byte the
167/// compiler would lay out — but we go through the explicit match so a
168/// future reorder of [`ExitTag`]'s variants doesn't silently change
169/// the format.
170pub fn pack_exit_tag(t: ExitTag) -> u8 {
171    match t {
172        ExitTag::Untouched => 0,
173        ExitTag::Int => 1,
174        ExitTag::Float => 2,
175        ExitTag::Table => 3,
176        ExitTag::Closure => 4,
177        ExitTag::Nil => 5,
178        ExitTag::Str => 6,
179    }
180}
181
182/// Inverse of [`pack_exit_tag`]. Returns `None` on an unknown byte
183/// (treated as a corruption signal by the deploy walker).
184pub fn unpack_exit_tag(b: u8) -> Option<ExitTag> {
185    match b {
186        0 => Some(ExitTag::Untouched),
187        1 => Some(ExitTag::Int),
188        2 => Some(ExitTag::Float),
189        3 => Some(ExitTag::Table),
190        4 => Some(ExitTag::Closure),
191        5 => Some(ExitTag::Nil),
192        6 => Some(ExitTag::Str),
193        _ => None,
194    }
195}
196
197/// Pack a [`TagResKind`] into its wire byte.
198pub fn pack_tag_res_kind(k: TagResKind) -> u8 {
199    match k {
200        TagResKind::AllUntouched => 0,
201        TagResKind::AllInt => 1,
202        TagResKind::Mixed => 2,
203    }
204}
205
206/// Inverse of [`pack_tag_res_kind`]. Returns `None` on an unknown byte.
207pub fn unpack_tag_res_kind(b: u8) -> Option<TagResKind> {
208    match b {
209        0 => Some(TagResKind::AllUntouched),
210        1 => Some(TagResKind::AllInt),
211        2 => Some(TagResKind::Mixed),
212        _ => None,
213    }
214}
215
216/// One per-cont_pc side-exit entry serialized into the v2 tail of a
217/// meta blob. Mirrors `CompiledTrace::per_exit_tags`'s `(u32,
218/// Rc<[ExitTag]>)` shape, with the `ExitTag` slice already packed
219/// through [`pack_exit_tag`].
220#[derive(Clone, Debug)]
221pub struct PerExitTagsEntry {
222    /// Pc the interp resumes at after the side-exit fires. Matches
223    /// the IR's `iconst` baked into the side-exit return.
224    pub cont_pc: u32,
225    /// Per-slot `ExitTag` snapshot at the side-exit moment, packed
226    /// via [`pack_exit_tag`]. Length is the trace's caller-window
227    /// `max_stack` (always ≤ `window_size`).
228    pub tags_packed: Vec<u8>,
229}
230
231/// One per-site inline cmp@d>0 side-exit entry serialized into the v3
232/// tail of a meta blob. Mirrors `CompiledTrace::per_exit_inline`'s
233/// [`crate::jit::trace_types::InlineSideExit`] shape minus the
234/// runtime-only `side_trace_ptr` cell (always defaults null on AOT
235/// install). The `chain` field carries [`FrameMaterializeInfo`]
236/// records as raw bytes — `FrameMaterializeInfo` is `repr(C)` with
237/// three 32-bit fields = exactly 12 bytes per entry on every
238/// supported target, so the wire layout is stable.
239#[derive(Clone, Debug)]
240pub struct PerExitInlineEntry {
241    /// Pc the interpreter resumes at after the inline side-exit
242    /// fires. Mirrors `InlineSideExit::cont_pc`.
243    pub cont_pc: u32,
244    /// Pc to write on the trace head frame when the side-exit fires
245    /// (the outermost self-rec Call's `pc + 1`). Mirrors
246    /// `InlineSideExit::head_resume_pc`.
247    pub head_resume_pc: u32,
248    /// Per-slot `ExitTag` snapshot at the side-exit moment, packed
249    /// via [`pack_exit_tag`]. Length equals the trace's full
250    /// `window_size` (caller + every inlined frame's register
251    /// window) — `per_exit_tags`'s arrays cover only `max_stack`,
252    /// inline arrays cover the full window.
253    pub tags_packed: Vec<u8>,
254    /// `FrameMaterializeInfo` records as raw bytes (count * 12).
255    /// Outermost = depth 1 first, innermost = depth N last; the
256    /// innermost frame's `pc` is already overwritten to the side-
257    /// exit PC at AOT-compile time (matches the JIT-side
258    /// snapshot.last_mut().pc = side_exit_pc step). Length divisible
259    /// by 12 is a wire-format invariant; the decoder rejects otherwise.
260    pub chain_bytes: Vec<u8>,
261}
262
263impl PerExitInlineEntry {
264    /// Byte size of one `FrameMaterializeInfo` on the wire. Asserted
265    /// at compile time via [`FRAME_MATERIALIZE_INFO_WIRE_SIZE_CHECK`]
266    /// against the live struct so layout drift fails the build.
267    pub const FRAME_MATERIALIZE_INFO_SIZE: usize = 12;
268
269    /// Construct a `PerExitInlineEntry` from a live
270    /// [`crate::jit::trace_types::InlineSideExit`]. The chain is
271    /// serialized via a raw-byte copy — `FrameMaterializeInfo` is
272    /// `repr(C)` + all-`Copy` fields with no padding, so the byte
273    /// pattern matches the on-disk wire layout verbatim.
274    ///
275    /// # Safety
276    ///
277    /// `FrameMaterializeInfo` is `#[repr(C)]` with three 32-bit
278    /// fields and no padding; the byte-level transmute below is
279    /// sound per the wire-size assertion at module top.
280    pub fn from_inline_side_exit(src: &crate::jit::trace_types::InlineSideExit) -> Self {
281        let tags_packed: Vec<u8> = src.exit_tags.iter().copied().map(pack_exit_tag).collect();
282        let n = src.chain.len();
283        let mut chain_bytes = Vec::with_capacity(n * Self::FRAME_MATERIALIZE_INFO_SIZE);
284        for fm in src.chain.iter() {
285            chain_bytes.extend_from_slice(&fm.base_offset.to_le_bytes());
286            chain_bytes.extend_from_slice(&fm.pc.to_le_bytes());
287            chain_bytes.extend_from_slice(&fm.nresults.to_le_bytes());
288        }
289        PerExitInlineEntry {
290            cont_pc: src.cont_pc,
291            head_resume_pc: src.head_resume_pc,
292            tags_packed,
293            chain_bytes,
294        }
295    }
296
297    /// Reconstruct a `Vec<FrameMaterializeInfo>` from this entry's
298    /// `chain_bytes`. Returns `None` if `chain_bytes.len()` is not a
299    /// multiple of [`Self::FRAME_MATERIALIZE_INFO_SIZE`] (corruption
300    /// signal — the deploy walker should skip the entry).
301    pub fn rebuild_chain(&self) -> Option<Vec<FrameMaterializeInfo>> {
302        let unit = Self::FRAME_MATERIALIZE_INFO_SIZE;
303        if !self.chain_bytes.len().is_multiple_of(unit) {
304            return None;
305        }
306        let n = self.chain_bytes.len() / unit;
307        let mut out = Vec::with_capacity(n);
308        for i in 0..n {
309            let off = i * unit;
310            let base_offset =
311                u32::from_le_bytes(self.chain_bytes[off..off + 4].try_into().unwrap());
312            let pc = u32::from_le_bytes(self.chain_bytes[off + 4..off + 8].try_into().unwrap());
313            let nresults =
314                i32::from_le_bytes(self.chain_bytes[off + 8..off + 12].try_into().unwrap());
315            out.push(FrameMaterializeInfo {
316                base_offset,
317                pc,
318                nresults,
319            });
320        }
321        Some(out)
322    }
323}
324
325/// Static assertion that [`FrameMaterializeInfo`]'s in-memory size
326/// matches the wire-format constant. A regression here (a fourth
327/// field, a different layout attribute) silently misaligns the
328/// `chain_bytes` (re)serialization; the build break makes the drift
329/// loud at the source instead of mysterious at deploy time.
330pub const FRAME_MATERIALIZE_INFO_WIRE_SIZE_CHECK: () = assert!(
331    core::mem::size_of::<FrameMaterializeInfo>() == PerExitInlineEntry::FRAME_MATERIALIZE_INFO_SIZE,
332    "FrameMaterializeInfo wire size drifted — update PerExitInlineEntry::FRAME_MATERIALIZE_INFO_SIZE \
333     and any deploy-side rebuilders together"
334);
335
336/// Serialize a header + the two tag arrays + the v2 `per_exit_tags`
337/// tail + the v3 `per_exit_inline` tail into a fresh `Vec<u8>`. Pass
338/// empty slices to emit a "simple" trace (each tail then carries a
339/// single `count = 0` u32 — still v3 layout, just empty).
340///
341/// The produced bytes are what `luna-aot` embeds into the
342/// `luna_trace_blob` section per-trace; the deploy walker reads from
343/// the same wire shape via [`decode_meta_blob`].
344pub fn encode_meta_blob(
345    header: &AotTraceMetaHeader,
346    entry_tags: &[u8],
347    exit_tags_packed: &[u8],
348    per_exit_tags: &[PerExitTagsEntry],
349    per_exit_inline: &[PerExitInlineEntry],
350) -> Vec<u8> {
351    assert_eq!(entry_tags.len(), header.entry_tags_len as usize);
352    assert_eq!(exit_tags_packed.len(), header.exit_tags_len as usize);
353    assert_eq!(header.version, AOT_META_VERSION);
354    let v2_tail_bytes: usize = 4 + per_exit_tags
355        .iter()
356        .map(|e| 4 + 4 + e.tags_packed.len())
357        .sum::<usize>();
358    let v3_tail_bytes: usize = 4 + per_exit_inline
359        .iter()
360        .map(|e| 4 + 4 + 4 + e.tags_packed.len() + 4 + e.chain_bytes.len())
361        .sum::<usize>();
362    let mut out = Vec::with_capacity(
363        AotTraceMetaHeader::SIZE
364            + entry_tags.len()
365            + exit_tags_packed.len()
366            + v2_tail_bytes
367            + v3_tail_bytes,
368    );
369    out.extend_from_slice(&header.magic.to_le_bytes());
370    out.extend_from_slice(&header.version.to_le_bytes());
371    out.extend_from_slice(&header.head_pc.to_le_bytes());
372    out.extend_from_slice(&header.n_ops.to_le_bytes());
373    out.extend_from_slice(&header.window_size.to_le_bytes());
374    out.push(header.dispatchable);
375    out.push(header.tag_res_kind);
376    out.extend_from_slice(&header.entry_tags_len.to_le_bytes());
377    out.extend_from_slice(&header.exit_tags_len.to_le_bytes());
378    out.extend_from_slice(entry_tags);
379    out.extend_from_slice(exit_tags_packed);
380    // v2 tail: u32 count, then per entry [cont_pc:u32, tags_len:u32, tags:[u8; tags_len]].
381    out.extend_from_slice(&(per_exit_tags.len() as u32).to_le_bytes());
382    for ent in per_exit_tags {
383        out.extend_from_slice(&ent.cont_pc.to_le_bytes());
384        out.extend_from_slice(&(ent.tags_packed.len() as u32).to_le_bytes());
385        out.extend_from_slice(&ent.tags_packed);
386    }
387    // v3 tail: u32 count, then per entry
388    //   [cont_pc:u32, head_resume_pc:u32, tags_len:u32, tags:[u8; tags_len],
389    //    chain_bytes_len:u32, chain_bytes:[u8; chain_bytes_len]].
390    // chain_bytes_len is always a multiple of 12 (FrameMaterializeInfo
391    // wire size); the decoder rejects otherwise as a corruption signal.
392    out.extend_from_slice(&(per_exit_inline.len() as u32).to_le_bytes());
393    for ent in per_exit_inline {
394        out.extend_from_slice(&ent.cont_pc.to_le_bytes());
395        out.extend_from_slice(&ent.head_resume_pc.to_le_bytes());
396        out.extend_from_slice(&(ent.tags_packed.len() as u32).to_le_bytes());
397        out.extend_from_slice(&ent.tags_packed);
398        out.extend_from_slice(&(ent.chain_bytes.len() as u32).to_le_bytes());
399        out.extend_from_slice(&ent.chain_bytes);
400    }
401    out
402}
403
404/// Decoded shape returned by [`decode_meta_blob`].
405#[derive(Debug)]
406pub struct DecodedMeta {
407    /// The fixed-prefix header.
408    pub header: AotTraceMetaHeader,
409    /// `entry_tags` payload (length = `header.entry_tags_len`).
410    pub entry_tags: Vec<u8>,
411    /// `exit_tags` payload (length = `header.exit_tags_len`), still in
412    /// packed `u8` form. Caller maps each through [`unpack_exit_tag`].
413    pub exit_tags: Vec<u8>,
414    /// v2 tail — per-cont_pc tag arrays. Empty for v1 blobs and for
415    /// v2+ traces with no typed-register side-exits.
416    pub per_exit_tags: Vec<PerExitTagsEntry>,
417    /// v3 tail — per-site inline cmp@d>0 side-exit metadata.
418    /// Empty for v1 / v2 blobs and for v3 traces with no inlined
419    /// side-exits. Today the AOT harvester filters out traces with
420    /// non-empty `per_exit_inline` regardless (see module docs);
421    /// the field exists so the wire format is forward-ready for the
422    /// relocatable-chain-slot lowerer work that flips the filter.
423    pub per_exit_inline: Vec<PerExitInlineEntry>,
424}
425
426/// Deserialize a blob produced by [`encode_meta_blob`]. Returns
427/// `Err(reason)` on magic / version / length mismatch — the deploy
428/// walker should skip the entry and log the reason rather than
429/// installing a broken trace.
430pub fn decode_meta_blob(bytes: &[u8]) -> Result<DecodedMeta, &'static str> {
431    if bytes.len() < AotTraceMetaHeader::SIZE {
432        return Err("blob shorter than header");
433    }
434    let magic = u32::from_le_bytes(bytes[0..4].try_into().unwrap());
435    if magic != AOT_META_MAGIC {
436        return Err("AOT_META_MAGIC mismatch");
437    }
438    let version = u32::from_le_bytes(bytes[4..8].try_into().unwrap());
439    if version != AOT_META_VERSION {
440        return Err("AOT_META_VERSION mismatch");
441    }
442    let head_pc = u32::from_le_bytes(bytes[8..12].try_into().unwrap());
443    let n_ops = u32::from_le_bytes(bytes[12..16].try_into().unwrap());
444    let window_size = u32::from_le_bytes(bytes[16..20].try_into().unwrap());
445    let dispatchable = bytes[20];
446    let tag_res_kind = bytes[21];
447    let entry_tags_len = u16::from_le_bytes(bytes[22..24].try_into().unwrap());
448    let exit_tags_len = u32::from_le_bytes(bytes[24..28].try_into().unwrap());
449    let header = AotTraceMetaHeader {
450        magic,
451        version,
452        head_pc,
453        n_ops,
454        window_size,
455        dispatchable,
456        tag_res_kind,
457        entry_tags_len,
458        exit_tags_len,
459    };
460    let total_payload = entry_tags_len as usize + exit_tags_len as usize;
461    if bytes.len() < AotTraceMetaHeader::SIZE + total_payload {
462        return Err("blob shorter than declared payload");
463    }
464    let entry_start = AotTraceMetaHeader::SIZE;
465    let entry_end = entry_start + entry_tags_len as usize;
466    let exit_end = entry_end + exit_tags_len as usize;
467    let entry_tags = bytes[entry_start..entry_end].to_vec();
468    let exit_tags = bytes[entry_end..exit_end].to_vec();
469    // v2 tail: optional per_exit_tags block. Absent (= empty) when
470    // the blob ends exactly at `exit_end` — covers v1-shaped
471    // producers (which never wrote a tail) and v2+ producers
472    // serializing a trace with zero typed-register side-exits.
473    let mut per_exit_tags: Vec<PerExitTagsEntry> = Vec::new();
474    let mut cur = exit_end;
475    if bytes.len() > cur {
476        if bytes.len() < cur + 4 {
477            return Err("v2 tail truncated at count");
478        }
479        let count = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap()) as usize;
480        cur += 4;
481        per_exit_tags.reserve(count);
482        for _ in 0..count {
483            if bytes.len() < cur + 8 {
484                return Err("v2 tail truncated at entry header");
485            }
486            let cont_pc = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap());
487            cur += 4;
488            let tags_len = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap()) as usize;
489            cur += 4;
490            if bytes.len() < cur + tags_len {
491                return Err("v2 tail truncated at entry tags");
492            }
493            let tags_packed = bytes[cur..cur + tags_len].to_vec();
494            cur += tags_len;
495            per_exit_tags.push(PerExitTagsEntry {
496                cont_pc,
497                tags_packed,
498            });
499        }
500    }
501    // v3 tail: optional per_exit_inline block. Absent when the blob
502    // ends at the v2 tail boundary — covers v1 / v2 producers and
503    // v3 producers serializing a trace with zero inline cmp@d>0
504    // side-exits. Tail layout per entry:
505    //   cont_pc:u32, head_resume_pc:u32,
506    //   tags_len:u32, tags:[u8; tags_len],
507    //   chain_bytes_len:u32, chain_bytes:[u8; chain_bytes_len]
508    // chain_bytes_len validated as a multiple of 12
509    // (FrameMaterializeInfo wire size) — non-multiple = corruption,
510    // we Err so the deploy walker skips the entry cleanly.
511    let mut per_exit_inline: Vec<PerExitInlineEntry> = Vec::new();
512    if bytes.len() > cur {
513        if bytes.len() < cur + 4 {
514            return Err("v3 tail truncated at count");
515        }
516        let count = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap()) as usize;
517        cur += 4;
518        per_exit_inline.reserve(count);
519        for _ in 0..count {
520            if bytes.len() < cur + 12 {
521                return Err("v3 tail truncated at entry header");
522            }
523            let cont_pc = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap());
524            cur += 4;
525            let head_resume_pc = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap());
526            cur += 4;
527            let tags_len = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap()) as usize;
528            cur += 4;
529            if bytes.len() < cur + tags_len {
530                return Err("v3 tail truncated at entry tags");
531            }
532            let tags_packed = bytes[cur..cur + tags_len].to_vec();
533            cur += tags_len;
534            if bytes.len() < cur + 4 {
535                return Err("v3 tail truncated at chain header");
536            }
537            let chain_bytes_len =
538                u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap()) as usize;
539            cur += 4;
540            if bytes.len() < cur + chain_bytes_len {
541                return Err("v3 tail truncated at chain bytes");
542            }
543            if !chain_bytes_len.is_multiple_of(PerExitInlineEntry::FRAME_MATERIALIZE_INFO_SIZE) {
544                return Err("v3 tail chain_bytes_len not a multiple of FrameMaterializeInfo size");
545            }
546            let chain_bytes = bytes[cur..cur + chain_bytes_len].to_vec();
547            cur += chain_bytes_len;
548            per_exit_inline.push(PerExitInlineEntry {
549                cont_pc,
550                head_resume_pc,
551                tags_packed,
552                chain_bytes,
553            });
554        }
555    }
556    Ok(DecodedMeta {
557        header,
558        entry_tags,
559        exit_tags,
560        per_exit_tags,
561        per_exit_inline,
562    })
563}
564
565/// Index entry layout in the deploy-side `luna_trace_meta` section.
566///
567/// 48 bytes per entry; the static linker fills `fn_ptr` and `meta_ptr`
568/// with relocations resolving to the trace's `.text` body and the
569/// matching `luna_trace_blob` payload respectively.
570///
571/// The deploy walker brackets the section via linker-synthetic
572/// `__start_luna_trace_meta` / `__stop_luna_trace_meta` (ELF) or
573/// `section$start$__DATA$luna_trace_meta` (Mach-O), mirroring sub-
574/// piece 3's `luna_strkey_idx` plumbing.
575#[repr(C)]
576#[derive(Clone, Copy, Debug)]
577pub struct AotTraceIndexEntry {
578    /// `Proto::stable_hash()` — matches the AOT-time proto identity
579    /// against the deploy-loaded proto tree.
580    pub proto_hash: [u8; 16],
581    /// Trace's `head_pc`. Used together with `proto_hash` to detect
582    /// duplicate installs and to log which trace fired.
583    pub head_pc: u32,
584    /// Padding so the following 64-bit address fields align at 8 bytes.
585    pub _pad: u32,
586    /// Address of the AOT-emitted trace fn
587    /// (`extern "C" fn(*mut i64) -> i64`). Stored as `u64` so the
588    /// wire layout is identical across 32/64-bit targets — wasm32 +
589    /// other 32-bit targets cast through this field. AOT-binary
590    /// deploy is always 64-bit (cross-compile to 32-bit targets
591    /// disabled at the linker step), so the upper 32 bits are zero
592    /// in practice. Linker-resolved relocation against the
593    /// `luna_aot_trace_<idx>` symbol the lowerer exports.
594    pub fn_ptr: u64,
595    /// Address of the matching meta blob in `luna_trace_blob`. Same
596    /// width-stable rationale as `fn_ptr`.
597    pub meta_ptr: u64,
598    /// Length of the meta blob (the deploy walker hard-rejects entries
599    /// whose declared payload exceeds this).
600    pub meta_len: u32,
601    /// Padding so the entry is a multiple of 8 bytes (48 total).
602    pub _pad2: u32,
603}
604
605impl AotTraceIndexEntry {
606    /// Byte size of one index entry. Compile-time assertion lives
607    /// next to the type via [`AOT_TRACE_INDEX_ENTRY_SIZE_CHECK`].
608    pub const SIZE: usize = 48;
609}
610
611/// Static assertion that `AotTraceIndexEntry` is exactly 48 bytes on
612/// the host build. Both crates that consume this format (`luna-aot`,
613/// `luna-runtime-helpers`) inherit the assertion via the type, so a
614/// padding regression fails compilation before the wire format
615/// silently misaligns.
616pub const AOT_TRACE_INDEX_ENTRY_SIZE_CHECK: () = assert!(
617    core::mem::size_of::<AotTraceIndexEntry>() == AotTraceIndexEntry::SIZE,
618    "AotTraceIndexEntry must be 48 bytes — alignment / padding regressed"
619);
620
621#[cfg(test)]
622mod tests {
623    use super::*;
624
625    #[test]
626    fn header_round_trip() {
627        let header = AotTraceMetaHeader {
628            magic: AOT_META_MAGIC,
629            version: AOT_META_VERSION,
630            head_pc: 42,
631            n_ops: 7,
632            window_size: 4,
633            dispatchable: 1,
634            tag_res_kind: pack_tag_res_kind(TagResKind::AllInt),
635            entry_tags_len: 2,
636            exit_tags_len: 3,
637        };
638        let entry_tags = vec![1u8, 2u8];
639        let exit_tags = vec![
640            pack_exit_tag(ExitTag::Int),
641            pack_exit_tag(ExitTag::Untouched),
642            pack_exit_tag(ExitTag::Float),
643        ];
644        let blob = encode_meta_blob(&header, &entry_tags, &exit_tags, &[], &[]);
645        // SIZE + entry_tags + exit_tags + v2-tail-count(4) + v3-tail-count(4)
646        assert_eq!(blob.len(), AotTraceMetaHeader::SIZE + 2 + 3 + 4 + 4);
647        let decoded = decode_meta_blob(&blob).expect("decode");
648        assert!(decoded.per_exit_tags.is_empty());
649        assert!(decoded.per_exit_inline.is_empty());
650        assert_eq!(decoded.header.head_pc, 42);
651        assert_eq!(decoded.header.window_size, 4);
652        assert_eq!(decoded.header.dispatchable, 1);
653        assert_eq!(decoded.entry_tags, entry_tags);
654        assert_eq!(decoded.exit_tags, exit_tags);
655        assert_eq!(
656            unpack_tag_res_kind(decoded.header.tag_res_kind),
657            Some(TagResKind::AllInt)
658        );
659        for (raw, expected) in
660            decoded
661                .exit_tags
662                .iter()
663                .zip([ExitTag::Int, ExitTag::Untouched, ExitTag::Float])
664        {
665            assert_eq!(unpack_exit_tag(*raw), Some(expected));
666        }
667    }
668
669    #[test]
670    fn decode_rejects_magic_mismatch() {
671        let mut blob = vec![0u8; AotTraceMetaHeader::SIZE];
672        // Magic stays zero.
673        let err = decode_meta_blob(&blob).unwrap_err();
674        assert!(err.contains("MAGIC"));
675        // Now valid magic + wrong version.
676        blob[..4].copy_from_slice(&AOT_META_MAGIC.to_le_bytes());
677        let err = decode_meta_blob(&blob).unwrap_err();
678        assert!(err.contains("VERSION"));
679    }
680
681    #[test]
682    fn v2_per_exit_tags_round_trip() {
683        // Two entries — one shorter than the other so the tail walker
684        // exercises variable-length parsing per entry.
685        let header = AotTraceMetaHeader {
686            magic: AOT_META_MAGIC,
687            version: AOT_META_VERSION,
688            head_pc: 7,
689            n_ops: 12,
690            window_size: 5,
691            dispatchable: 1,
692            tag_res_kind: pack_tag_res_kind(TagResKind::Mixed),
693            entry_tags_len: 0,
694            exit_tags_len: 0,
695        };
696        let entries = vec![
697            PerExitTagsEntry {
698                cont_pc: 3,
699                tags_packed: vec![
700                    pack_exit_tag(ExitTag::Int),
701                    pack_exit_tag(ExitTag::Untouched),
702                ],
703            },
704            PerExitTagsEntry {
705                cont_pc: 11,
706                tags_packed: vec![
707                    pack_exit_tag(ExitTag::Closure),
708                    pack_exit_tag(ExitTag::Table),
709                    pack_exit_tag(ExitTag::Float),
710                ],
711            },
712        ];
713        let blob = encode_meta_blob(&header, &[], &[], &entries, &[]);
714        let decoded = decode_meta_blob(&blob).expect("decode v2");
715        assert_eq!(decoded.per_exit_tags.len(), 2);
716        assert_eq!(decoded.per_exit_tags[0].cont_pc, 3);
717        assert_eq!(decoded.per_exit_tags[0].tags_packed.len(), 2);
718        assert_eq!(decoded.per_exit_tags[1].cont_pc, 11);
719        assert_eq!(decoded.per_exit_tags[1].tags_packed.len(), 3);
720        assert!(decoded.per_exit_inline.is_empty());
721    }
722
723    #[test]
724    fn v3_per_exit_inline_round_trip() {
725        // Two inline-side-exit entries with different chain depths so
726        // the tail walker exercises variable-length per-entry parsing.
727        let header = AotTraceMetaHeader {
728            magic: AOT_META_MAGIC,
729            version: AOT_META_VERSION,
730            head_pc: 0,
731            n_ops: 0,
732            window_size: 8,
733            dispatchable: 1,
734            tag_res_kind: pack_tag_res_kind(TagResKind::Mixed),
735            entry_tags_len: 0,
736            exit_tags_len: 0,
737        };
738        // Hand-roll the inline entries (the live-CompiledTrace
739        // converter is exercised by the round-trip-from-live test
740        // below).
741        let inline = vec![
742            PerExitInlineEntry {
743                cont_pc: 5,
744                head_resume_pc: 9,
745                tags_packed: vec![pack_exit_tag(ExitTag::Int), pack_exit_tag(ExitTag::Int)],
746                // 1 FrameMaterializeInfo = 12 bytes:
747                //   base_offset = 3, pc = 4, nresults = 1
748                chain_bytes: {
749                    let mut v = Vec::new();
750                    v.extend_from_slice(&3u32.to_le_bytes());
751                    v.extend_from_slice(&4u32.to_le_bytes());
752                    v.extend_from_slice(&1i32.to_le_bytes());
753                    v
754                },
755            },
756            PerExitInlineEntry {
757                cont_pc: 17,
758                head_resume_pc: 21,
759                tags_packed: vec![
760                    pack_exit_tag(ExitTag::Closure),
761                    pack_exit_tag(ExitTag::Untouched),
762                    pack_exit_tag(ExitTag::Float),
763                ],
764                // 2 frames = 24 bytes.
765                chain_bytes: {
766                    let mut v = Vec::new();
767                    for (off, pc, nr) in [(2u32, 7u32, 1i32), (5u32, 11u32, 2i32)] {
768                        v.extend_from_slice(&off.to_le_bytes());
769                        v.extend_from_slice(&pc.to_le_bytes());
770                        v.extend_from_slice(&nr.to_le_bytes());
771                    }
772                    v
773                },
774            },
775        ];
776        let blob = encode_meta_blob(&header, &[], &[], &[], &inline);
777        let decoded = decode_meta_blob(&blob).expect("decode v3");
778        assert_eq!(decoded.per_exit_inline.len(), 2);
779        assert_eq!(decoded.per_exit_inline[0].cont_pc, 5);
780        assert_eq!(decoded.per_exit_inline[0].head_resume_pc, 9);
781        assert_eq!(decoded.per_exit_inline[0].tags_packed.len(), 2);
782        let chain0 = decoded.per_exit_inline[0]
783            .rebuild_chain()
784            .expect("rebuild chain[0]");
785        assert_eq!(chain0.len(), 1);
786        assert_eq!(chain0[0].base_offset, 3);
787        assert_eq!(chain0[0].pc, 4);
788        assert_eq!(chain0[0].nresults, 1);
789        let chain1 = decoded.per_exit_inline[1]
790            .rebuild_chain()
791            .expect("rebuild chain[1]");
792        assert_eq!(chain1.len(), 2);
793        assert_eq!(chain1[0].base_offset, 2);
794        assert_eq!(chain1[1].pc, 11);
795        assert_eq!(chain1[1].nresults, 2);
796    }
797
798    #[test]
799    fn v3_per_exit_inline_round_trip_from_live() {
800        // Exercise PerExitInlineEntry::from_inline_side_exit against
801        // a hand-built InlineSideExit, then encode + decode +
802        // rebuild and check field equality. Catches drift if either
803        // the live-struct shape or the wire layout changes without
804        // updating the other.
805        use crate::jit::trace_types::{ExitTag, FrameMaterializeInfo, InlineSideExit};
806        let chain = vec![FrameMaterializeInfo {
807            base_offset: 1,
808            pc: 2,
809            nresults: 3,
810        }];
811        let live = InlineSideExit {
812            cont_pc: 42,
813            head_resume_pc: 50,
814            exit_tags: crate::jit::send_compat::TArc::from(
815                vec![ExitTag::Int, ExitTag::Float, ExitTag::Untouched].into_boxed_slice(),
816            ),
817            chain: crate::jit::send_compat::TArc::from(chain.into_boxed_slice()),
818            side_trace_ptr: Box::new(crate::jit::send_compat::TCellPtr::null()),
819        };
820        let entry = PerExitInlineEntry::from_inline_side_exit(&live);
821        assert_eq!(entry.cont_pc, 42);
822        assert_eq!(entry.head_resume_pc, 50);
823        assert_eq!(entry.tags_packed.len(), 3);
824        assert_eq!(
825            entry.chain_bytes.len(),
826            PerExitInlineEntry::FRAME_MATERIALIZE_INFO_SIZE
827        );
828        let header = AotTraceMetaHeader {
829            magic: AOT_META_MAGIC,
830            version: AOT_META_VERSION,
831            head_pc: 0,
832            n_ops: 0,
833            window_size: 4,
834            dispatchable: 1,
835            tag_res_kind: pack_tag_res_kind(TagResKind::Mixed),
836            entry_tags_len: 0,
837            exit_tags_len: 0,
838        };
839        let blob = encode_meta_blob(&header, &[], &[], &[], &[entry]);
840        let decoded = decode_meta_blob(&blob).expect("decode v3 from live");
841        assert_eq!(decoded.per_exit_inline.len(), 1);
842        let rebuilt = decoded.per_exit_inline[0].rebuild_chain().expect("rebuild");
843        assert_eq!(rebuilt.len(), 1);
844        assert_eq!(rebuilt[0].base_offset, 1);
845        assert_eq!(rebuilt[0].pc, 2);
846        assert_eq!(rebuilt[0].nresults, 3);
847    }
848
849    #[test]
850    fn decode_rejects_v3_chain_bytes_misaligned() {
851        // Hand-emit a v3 blob whose chain_bytes_len is not a multiple
852        // of 12 — the decoder MUST refuse (returning Err) instead of
853        // silently truncating, so the deploy walker has a clean skip
854        // signal.
855        let header = AotTraceMetaHeader {
856            magic: AOT_META_MAGIC,
857            version: AOT_META_VERSION,
858            head_pc: 0,
859            n_ops: 0,
860            window_size: 0,
861            dispatchable: 0,
862            tag_res_kind: 0,
863            entry_tags_len: 0,
864            exit_tags_len: 0,
865        };
866        let mut blob = Vec::new();
867        blob.extend_from_slice(&header.magic.to_le_bytes());
868        blob.extend_from_slice(&header.version.to_le_bytes());
869        blob.extend_from_slice(&header.head_pc.to_le_bytes());
870        blob.extend_from_slice(&header.n_ops.to_le_bytes());
871        blob.extend_from_slice(&header.window_size.to_le_bytes());
872        blob.push(header.dispatchable);
873        blob.push(header.tag_res_kind);
874        blob.extend_from_slice(&header.entry_tags_len.to_le_bytes());
875        blob.extend_from_slice(&header.exit_tags_len.to_le_bytes());
876        // v2 tail: count=0.
877        blob.extend_from_slice(&0u32.to_le_bytes());
878        // v3 tail: count=1, cont_pc=0, head_resume_pc=0, tags_len=0,
879        // chain_bytes_len=7 (not a multiple of 12), chain_bytes=7 zeros.
880        blob.extend_from_slice(&1u32.to_le_bytes());
881        blob.extend_from_slice(&0u32.to_le_bytes());
882        blob.extend_from_slice(&0u32.to_le_bytes());
883        blob.extend_from_slice(&0u32.to_le_bytes());
884        blob.extend_from_slice(&7u32.to_le_bytes());
885        blob.extend(std::iter::repeat_n(0u8, 7));
886        let err = decode_meta_blob(&blob).unwrap_err();
887        assert!(
888            err.contains("FrameMaterializeInfo"),
889            "expected misalignment err, got {err:?}"
890        );
891    }
892
893    #[test]
894    fn decode_tolerates_v1_blob_shape() {
895        // Emulate a v1-shaped blob: header + tags, NO trailing v2/v3
896        // tails. The v3 decoder should accept as empty everywhere.
897        let header = AotTraceMetaHeader {
898            magic: AOT_META_MAGIC,
899            version: AOT_META_VERSION,
900            head_pc: 0,
901            n_ops: 0,
902            window_size: 0,
903            dispatchable: 0,
904            tag_res_kind: 0,
905            entry_tags_len: 1,
906            exit_tags_len: 0,
907        };
908        let mut blob = Vec::new();
909        blob.extend_from_slice(&header.magic.to_le_bytes());
910        blob.extend_from_slice(&header.version.to_le_bytes());
911        blob.extend_from_slice(&header.head_pc.to_le_bytes());
912        blob.extend_from_slice(&header.n_ops.to_le_bytes());
913        blob.extend_from_slice(&header.window_size.to_le_bytes());
914        blob.push(header.dispatchable);
915        blob.push(header.tag_res_kind);
916        blob.extend_from_slice(&header.entry_tags_len.to_le_bytes());
917        blob.extend_from_slice(&header.exit_tags_len.to_le_bytes());
918        blob.push(0); // entry_tags[0]
919        // No v2/v3 tails.
920        let decoded = decode_meta_blob(&blob).expect("decode v1-shaped");
921        assert!(decoded.per_exit_tags.is_empty());
922        assert!(decoded.per_exit_inline.is_empty());
923    }
924
925    #[test]
926    fn decode_tolerates_v2_blob_shape() {
927        // Emulate a v2-shaped blob: header + tags + v2 tail only,
928        // NO trailing v3 count u32. The v3 decoder should accept it
929        // as an empty per_exit_inline.
930        let header = AotTraceMetaHeader {
931            magic: AOT_META_MAGIC,
932            version: AOT_META_VERSION,
933            head_pc: 0,
934            n_ops: 0,
935            window_size: 0,
936            dispatchable: 0,
937            tag_res_kind: 0,
938            entry_tags_len: 0,
939            exit_tags_len: 0,
940        };
941        let mut blob = Vec::new();
942        blob.extend_from_slice(&header.magic.to_le_bytes());
943        blob.extend_from_slice(&header.version.to_le_bytes());
944        blob.extend_from_slice(&header.head_pc.to_le_bytes());
945        blob.extend_from_slice(&header.n_ops.to_le_bytes());
946        blob.extend_from_slice(&header.window_size.to_le_bytes());
947        blob.push(header.dispatchable);
948        blob.push(header.tag_res_kind);
949        blob.extend_from_slice(&header.entry_tags_len.to_le_bytes());
950        blob.extend_from_slice(&header.exit_tags_len.to_le_bytes());
951        // v2 tail: count=1, one entry (cont_pc=3, tags_len=1, tags=[Int])
952        blob.extend_from_slice(&1u32.to_le_bytes());
953        blob.extend_from_slice(&3u32.to_le_bytes());
954        blob.extend_from_slice(&1u32.to_le_bytes());
955        blob.push(pack_exit_tag(ExitTag::Int));
956        // No v3 tail.
957        let decoded = decode_meta_blob(&blob).expect("decode v2-shaped");
958        assert_eq!(decoded.per_exit_tags.len(), 1);
959        assert!(decoded.per_exit_inline.is_empty());
960    }
961
962    #[test]
963    fn decode_rejects_truncated() {
964        // Header is fine, but exit_tags_len declares 10 bytes that
965        // aren't there.
966        let header = AotTraceMetaHeader {
967            magic: AOT_META_MAGIC,
968            version: AOT_META_VERSION,
969            head_pc: 0,
970            n_ops: 0,
971            window_size: 0,
972            dispatchable: 0,
973            tag_res_kind: 0,
974            entry_tags_len: 0,
975            exit_tags_len: 10,
976        };
977        let blob = {
978            let mut b = Vec::new();
979            b.extend_from_slice(&header.magic.to_le_bytes());
980            b.extend_from_slice(&header.version.to_le_bytes());
981            b.extend_from_slice(&header.head_pc.to_le_bytes());
982            b.extend_from_slice(&header.n_ops.to_le_bytes());
983            b.extend_from_slice(&header.window_size.to_le_bytes());
984            b.push(header.dispatchable);
985            b.push(header.tag_res_kind);
986            b.extend_from_slice(&header.entry_tags_len.to_le_bytes());
987            b.extend_from_slice(&header.exit_tags_len.to_le_bytes());
988            b
989        };
990        // Only header, no payload — should fail truncation check.
991        let err = decode_meta_blob(&blob).unwrap_err();
992        assert!(err.contains("payload"));
993    }
994}