luna_core/jit/aot_meta.rs
1//! v1.3 Phase AOT Stage 7 sub-piece 4 — wire format for AOT trace
2//! metadata.
3//!
4//! # Why a luna-core module
5//!
6//! The format is shared by two distinct crates:
7//!
8//! - `luna-aot` (compile-time): serializes a runtime `CompiledTrace`
9//! into bytes embedded in the AOT object's `luna_trace_blob`
10//! section.
11//! - `luna-runtime-helpers` (deploy-time): walks the
12//! `luna_trace_meta` index section, deserializes each entry's blob
13//! into the **minimal fields** needed to construct a fresh
14//! `CompiledTrace` for the deploy `Vm`'s dispatcher.
15//!
16//! Putting the wire format under `luna-core` keeps both crates pinned
17//! to the same constants without giving either a dep on the other.
18//!
19//! # 0-dep contract
20//!
21//! Hand-rolled `u8` packing — no `bincode`, no `serde`. Format is
22//! stable across the v1.3 line (header carries [`AOT_META_MAGIC`] +
23//! [`AOT_META_VERSION`]; a mismatch on the deploy side is a hard
24//! reject, not silent fallback).
25//!
26//! # Wire format versions
27//!
28//! - **v1** — sub-piece-4 minimal cut. Fields: `head_pc`, `n_ops`,
29//! `window_size`, `dispatchable`, `entry_tags`, `exit_tags`,
30//! `global_tag_res_kind`. Only "simple" traces (no inline side-
31//! exits, no per-cont_pc tag exits) installable.
32//! - **v2** — Stage 7 trace-coverage follow-up. Appends a trailing
33//! `per_exit_tags` array (`(cont_pc, [ExitTag])` per entry) so
34//! traces with typed-register side-exit guards (GetUpval-heavy
35//! closures, type-specialized GetField loops) are AOT-installable.
36//! v2 readers MUST accept v1 blobs as if `per_exit_tags`
37//! were empty (the trailing block becomes optional via the
38//! `total_payload < bytes.len()` predicate at decode time).
39//! - **v3** — Inline cmp@d>0 side-exit scaffolding. Appends a second
40//! trailing block after `per_exit_tags`: a list of
41//! [`PerExitInlineEntry`] records carrying the `cont_pc`,
42//! `head_resume_pc`, the per-slot exit-tag snapshot covering the
43//! trace's full `window_size`, and the `FrameMaterializeInfo`
44//! chain bytes. v3 readers accept v1 and v2 blobs as if the
45//! inline block were empty.
46//!
47//! **Important — install-time invariant**: emitting the v3 inline
48//! block into a meta blob is necessary but **not sufficient** to
49//! safely AOT-install a trace whose `per_exit_inline` is non-
50//! empty. The trace mcode itself today bakes a raw
51//! `Rc::as_ptr(&chain_rc)` value as an `iconst` immediate at lower
52//! time (see `luna-jit/src/jit_backend/trace.rs` near the cmp@d>0
53//! side-exit emit sites). Under JIT the immediate is a live heap
54//! pointer; under AOT it would be the warmup VM's heap address,
55//! invalid in the deploy binary. To actually unlock the install
56//! path the lowerer must:
57//! 1. Emit a writable per-site slot (`__luna_aot_inline_chain_
58//! slot_<key>`) and load through it instead of the raw
59//! `iconst`.
60//! 2. Register a new `luna_inline_chain_idx` bracketed section
61//! analogous to `luna_strkey_idx` so the deploy resolver can
62//! match installed entries to slot addresses.
63//! 3. The deploy walker (after rebuilding `Rc<[FrameMaterializeInfo
64//! ]>` from v3 blob bytes) writes the live address of the
65//! rebuilt chain into each slot before the first dispatch.
66//!
67//! Until those three pieces land, the AOT harvester
68//! (`luna-aot::embed`) MUST keep its
69//! `per_exit_inline.is_empty()` filter so non-empty-inline
70//! traces never produce a v3 blob with inline data — they stay
71//! JIT-only. The wire format below is forward-ready so the
72//! lowerer / resolver work doesn't need a fresh version bump.
73//!
74//! # Field summary
75//!
76//! `CompiledTrace` carries 30+ fields including `RefCell<HashMap>`,
77//! `Box<Cell<*const u8>>`, `Rc<[InlineSideExit]>` — most are side-
78//! trace bookkeeping irrelevant for AOT (the deploy `Vm` never side-
79//! traces an AOT-installed trace, so all those start empty). The
80//! AOT meta format serializes only the **dispatch-load-bearing**
81//! fields:
82//!
83//! - `head_pc`, `n_ops`, `window_size`, `dispatchable`
84//! - `entry_tags: Rc<[u8]>` — per-slot entry-tag specialization
85//! - `exit_tags: Rc<[ExitTag]>` — per-slot exit-tag restore (clean tail)
86//! - `global_tag_res_kind` — fast-path classification
87//! - `per_exit_tags` *(v2+)* — per-cont_pc slot-shape entries the
88//! dispatcher uses to restore vm.stack on a typed-register
89//! side-exit
90//! - `per_exit_inline` *(v3+)* — per-site inline cmp@d>0 side-exit
91//! records: cont_pc + head_resume_pc + per-slot exit_tags + the
92//! `FrameMaterializeInfo` chain bytes. Forward-compat scaffold;
93//! the harvester does not yet emit non-empty entries because the
94//! trace mcode side needs a relocatable chain-slot scheme first
95//! (see the v3 paragraph above).
96//!
97//! `body_writes`, side-trace ptrs etc. default to empty / null.
98
99use crate::jit::trace_types::{ExitTag, FrameMaterializeInfo, TagResKind};
100
101/// Magic bytes at the start of every AOT meta blob. The deploy walker
102/// checks this against `read::<u32>` before parsing the rest;
103/// mismatches are reported (and the entry skipped) rather than
104/// causing arbitrary deserialization.
105pub const AOT_META_MAGIC: u32 = 0xAA77_0001;
106
107/// Wire-format version. v1 = minimal cut (sub-piece 4). v2 = appends
108/// trailing `per_exit_tags` block so typed-register side-exits
109/// (GetUpval-heavy traces) install at deploy time. v3 = appends a
110/// second trailing block carrying `per_exit_inline` data so
111/// depth>0 inlined cmp side-exits can be rebuilt at install time
112/// (the trace mcode side still needs a relocatable-slot scheme
113/// before non-empty inline entries can actually be emitted —
114/// module docs go into the staging plan).
115///
116/// **Forward compatibility contract**: a v3 writer emits the same
117/// fixed-prefix header layout as v1/v2 plus the v2 tail (always
118/// present, count=0 if empty) plus the v3 tail (count=0 if empty).
119/// A v3 reader MUST accept v1 blobs (= header + tags only, no v2
120/// tail) and v2 blobs (= header + tags + v2 tail only, no v3 tail)
121/// as if the missing tails were empty — implementation lives in
122/// [`decode_meta_blob`]'s `bytes.len() > cur` predicate at each
123/// tail boundary. v2 readers on a v3 blob would mis-parse the v3
124/// tail as garbage; we bump `AOT_META_VERSION` so older readers
125/// hard-reject instead of silently mis-installing.
126pub const AOT_META_VERSION: u32 = 3;
127
128/// Fixed-size header at the top of every meta blob. All ints are
129/// little-endian.
130///
131/// Total = 28 bytes. The variable-length tag arrays follow this
132/// header back-to-back (`entry_tags_len` u8s then `exit_tags_len`
133/// u8s).
134#[repr(C)]
135#[derive(Clone, Copy, Debug)]
136pub struct AotTraceMetaHeader {
137 /// [`AOT_META_MAGIC`]. Deploy-side hard-rejects mismatch.
138 pub magic: u32,
139 /// [`AOT_META_VERSION`]. Deploy-side hard-rejects mismatch.
140 pub version: u32,
141 /// Trace's `head_pc` — the PC the dispatcher matches on.
142 pub head_pc: u32,
143 /// Trace's `n_ops` — diagnostic only on the deploy side.
144 pub n_ops: u32,
145 /// Trace's `window_size` — sizes the dispatcher's `reg_state` buffer.
146 pub window_size: u32,
147 /// Trace's `dispatchable` flag as `u8` (0 / 1).
148 pub dispatchable: u8,
149 /// Trace's `global_tag_res_kind` packed:
150 /// `0 = AllUntouched`, `1 = AllInt`, `2 = Mixed`.
151 pub tag_res_kind: u8,
152 /// Length of the `entry_tags` array that follows the header.
153 /// `u16` is enough: trace's `max_stack` is bounded by Lua's
154 /// `MAXREGS` (255) and even worst-case inlining caps under 4K.
155 pub entry_tags_len: u16,
156 /// Length of the `exit_tags` array that follows after `entry_tags`.
157 pub exit_tags_len: u32,
158}
159
160impl AotTraceMetaHeader {
161 /// Byte size of the fixed prefix. Used to compute payload offset.
162 pub const SIZE: usize = 28;
163}
164
165/// Pack an `ExitTag` into its on-disk `u8` representation. Mirrors the
166/// `#[repr(u8)]` discriminant so the wire format is the same byte the
167/// compiler would lay out — but we go through the explicit match so a
168/// future reorder of [`ExitTag`]'s variants doesn't silently change
169/// the format.
170pub fn pack_exit_tag(t: ExitTag) -> u8 {
171 match t {
172 ExitTag::Untouched => 0,
173 ExitTag::Int => 1,
174 ExitTag::Float => 2,
175 ExitTag::Table => 3,
176 ExitTag::Closure => 4,
177 ExitTag::Nil => 5,
178 ExitTag::Str => 6,
179 }
180}
181
182/// Inverse of [`pack_exit_tag`]. Returns `None` on an unknown byte
183/// (treated as a corruption signal by the deploy walker).
184pub fn unpack_exit_tag(b: u8) -> Option<ExitTag> {
185 match b {
186 0 => Some(ExitTag::Untouched),
187 1 => Some(ExitTag::Int),
188 2 => Some(ExitTag::Float),
189 3 => Some(ExitTag::Table),
190 4 => Some(ExitTag::Closure),
191 5 => Some(ExitTag::Nil),
192 6 => Some(ExitTag::Str),
193 _ => None,
194 }
195}
196
197/// Pack a [`TagResKind`] into its wire byte.
198pub fn pack_tag_res_kind(k: TagResKind) -> u8 {
199 match k {
200 TagResKind::AllUntouched => 0,
201 TagResKind::AllInt => 1,
202 TagResKind::Mixed => 2,
203 }
204}
205
206/// Inverse of [`pack_tag_res_kind`]. Returns `None` on an unknown byte.
207pub fn unpack_tag_res_kind(b: u8) -> Option<TagResKind> {
208 match b {
209 0 => Some(TagResKind::AllUntouched),
210 1 => Some(TagResKind::AllInt),
211 2 => Some(TagResKind::Mixed),
212 _ => None,
213 }
214}
215
216/// One per-cont_pc side-exit entry serialized into the v2 tail of a
217/// meta blob. Mirrors `CompiledTrace::per_exit_tags`'s `(u32,
218/// Rc<[ExitTag]>)` shape, with the `ExitTag` slice already packed
219/// through [`pack_exit_tag`].
220#[derive(Clone, Debug)]
221pub struct PerExitTagsEntry {
222 /// Pc the interp resumes at after the side-exit fires. Matches
223 /// the IR's `iconst` baked into the side-exit return.
224 pub cont_pc: u32,
225 /// Per-slot `ExitTag` snapshot at the side-exit moment, packed
226 /// via [`pack_exit_tag`]. Length is the trace's caller-window
227 /// `max_stack` (always ≤ `window_size`).
228 pub tags_packed: Vec<u8>,
229}
230
231/// One per-site inline cmp@d>0 side-exit entry serialized into the v3
232/// tail of a meta blob. Mirrors `CompiledTrace::per_exit_inline`'s
233/// [`crate::jit::trace_types::InlineSideExit`] shape minus the
234/// runtime-only `side_trace_ptr` cell (always defaults null on AOT
235/// install). The `chain` field carries [`FrameMaterializeInfo`]
236/// records as raw bytes — `FrameMaterializeInfo` is `repr(C)` with
237/// three 32-bit fields = exactly 12 bytes per entry on every
238/// supported target, so the wire layout is stable.
239#[derive(Clone, Debug)]
240pub struct PerExitInlineEntry {
241 /// Pc the interpreter resumes at after the inline side-exit
242 /// fires. Mirrors `InlineSideExit::cont_pc`.
243 pub cont_pc: u32,
244 /// Pc to write on the trace head frame when the side-exit fires
245 /// (the outermost self-rec Call's `pc + 1`). Mirrors
246 /// `InlineSideExit::head_resume_pc`.
247 pub head_resume_pc: u32,
248 /// Per-slot `ExitTag` snapshot at the side-exit moment, packed
249 /// via [`pack_exit_tag`]. Length equals the trace's full
250 /// `window_size` (caller + every inlined frame's register
251 /// window) — `per_exit_tags`'s arrays cover only `max_stack`,
252 /// inline arrays cover the full window.
253 pub tags_packed: Vec<u8>,
254 /// `FrameMaterializeInfo` records as raw bytes (count * 12).
255 /// Outermost = depth 1 first, innermost = depth N last; the
256 /// innermost frame's `pc` is already overwritten to the side-
257 /// exit PC at AOT-compile time (matches the JIT-side
258 /// snapshot.last_mut().pc = side_exit_pc step). Length divisible
259 /// by 12 is a wire-format invariant; the decoder rejects otherwise.
260 pub chain_bytes: Vec<u8>,
261}
262
263impl PerExitInlineEntry {
264 /// Byte size of one `FrameMaterializeInfo` on the wire. Asserted
265 /// at compile time via [`FRAME_MATERIALIZE_INFO_WIRE_SIZE_CHECK`]
266 /// against the live struct so layout drift fails the build.
267 pub const FRAME_MATERIALIZE_INFO_SIZE: usize = 12;
268
269 /// Construct a `PerExitInlineEntry` from a live
270 /// [`crate::jit::trace_types::InlineSideExit`]. The chain is
271 /// serialized via a raw-byte copy — `FrameMaterializeInfo` is
272 /// `repr(C)` + all-`Copy` fields with no padding, so the byte
273 /// pattern matches the on-disk wire layout verbatim.
274 ///
275 /// # Safety
276 ///
277 /// `FrameMaterializeInfo` is `#[repr(C)]` with three 32-bit
278 /// fields and no padding; the byte-level transmute below is
279 /// sound per the wire-size assertion at module top.
280 pub fn from_inline_side_exit(src: &crate::jit::trace_types::InlineSideExit) -> Self {
281 let tags_packed: Vec<u8> = src.exit_tags.iter().copied().map(pack_exit_tag).collect();
282 let n = src.chain.len();
283 let mut chain_bytes = Vec::with_capacity(n * Self::FRAME_MATERIALIZE_INFO_SIZE);
284 for fm in src.chain.iter() {
285 chain_bytes.extend_from_slice(&fm.base_offset.to_le_bytes());
286 chain_bytes.extend_from_slice(&fm.pc.to_le_bytes());
287 chain_bytes.extend_from_slice(&fm.nresults.to_le_bytes());
288 }
289 PerExitInlineEntry {
290 cont_pc: src.cont_pc,
291 head_resume_pc: src.head_resume_pc,
292 tags_packed,
293 chain_bytes,
294 }
295 }
296
297 /// Reconstruct a `Vec<FrameMaterializeInfo>` from this entry's
298 /// `chain_bytes`. Returns `None` if `chain_bytes.len()` is not a
299 /// multiple of [`Self::FRAME_MATERIALIZE_INFO_SIZE`] (corruption
300 /// signal — the deploy walker should skip the entry).
301 pub fn rebuild_chain(&self) -> Option<Vec<FrameMaterializeInfo>> {
302 let unit = Self::FRAME_MATERIALIZE_INFO_SIZE;
303 if !self.chain_bytes.len().is_multiple_of(unit) {
304 return None;
305 }
306 let n = self.chain_bytes.len() / unit;
307 let mut out = Vec::with_capacity(n);
308 for i in 0..n {
309 let off = i * unit;
310 let base_offset =
311 u32::from_le_bytes(self.chain_bytes[off..off + 4].try_into().unwrap());
312 let pc = u32::from_le_bytes(self.chain_bytes[off + 4..off + 8].try_into().unwrap());
313 let nresults =
314 i32::from_le_bytes(self.chain_bytes[off + 8..off + 12].try_into().unwrap());
315 out.push(FrameMaterializeInfo {
316 base_offset,
317 pc,
318 nresults,
319 });
320 }
321 Some(out)
322 }
323}
324
325/// Static assertion that [`FrameMaterializeInfo`]'s in-memory size
326/// matches the wire-format constant. A regression here (a fourth
327/// field, a different layout attribute) silently misaligns the
328/// `chain_bytes` (re)serialization; the build break makes the drift
329/// loud at the source instead of mysterious at deploy time.
330pub const FRAME_MATERIALIZE_INFO_WIRE_SIZE_CHECK: () = assert!(
331 core::mem::size_of::<FrameMaterializeInfo>() == PerExitInlineEntry::FRAME_MATERIALIZE_INFO_SIZE,
332 "FrameMaterializeInfo wire size drifted — update PerExitInlineEntry::FRAME_MATERIALIZE_INFO_SIZE \
333 and any deploy-side rebuilders together"
334);
335
336/// Serialize a header + the two tag arrays + the v2 `per_exit_tags`
337/// tail + the v3 `per_exit_inline` tail into a fresh `Vec<u8>`. Pass
338/// empty slices to emit a "simple" trace (each tail then carries a
339/// single `count = 0` u32 — still v3 layout, just empty).
340///
341/// The produced bytes are what `luna-aot` embeds into the
342/// `luna_trace_blob` section per-trace; the deploy walker reads from
343/// the same wire shape via [`decode_meta_blob`].
344pub fn encode_meta_blob(
345 header: &AotTraceMetaHeader,
346 entry_tags: &[u8],
347 exit_tags_packed: &[u8],
348 per_exit_tags: &[PerExitTagsEntry],
349 per_exit_inline: &[PerExitInlineEntry],
350) -> Vec<u8> {
351 assert_eq!(entry_tags.len(), header.entry_tags_len as usize);
352 assert_eq!(exit_tags_packed.len(), header.exit_tags_len as usize);
353 assert_eq!(header.version, AOT_META_VERSION);
354 let v2_tail_bytes: usize = 4 + per_exit_tags
355 .iter()
356 .map(|e| 4 + 4 + e.tags_packed.len())
357 .sum::<usize>();
358 let v3_tail_bytes: usize = 4 + per_exit_inline
359 .iter()
360 .map(|e| 4 + 4 + 4 + e.tags_packed.len() + 4 + e.chain_bytes.len())
361 .sum::<usize>();
362 let mut out = Vec::with_capacity(
363 AotTraceMetaHeader::SIZE
364 + entry_tags.len()
365 + exit_tags_packed.len()
366 + v2_tail_bytes
367 + v3_tail_bytes,
368 );
369 out.extend_from_slice(&header.magic.to_le_bytes());
370 out.extend_from_slice(&header.version.to_le_bytes());
371 out.extend_from_slice(&header.head_pc.to_le_bytes());
372 out.extend_from_slice(&header.n_ops.to_le_bytes());
373 out.extend_from_slice(&header.window_size.to_le_bytes());
374 out.push(header.dispatchable);
375 out.push(header.tag_res_kind);
376 out.extend_from_slice(&header.entry_tags_len.to_le_bytes());
377 out.extend_from_slice(&header.exit_tags_len.to_le_bytes());
378 out.extend_from_slice(entry_tags);
379 out.extend_from_slice(exit_tags_packed);
380 // v2 tail: u32 count, then per entry [cont_pc:u32, tags_len:u32, tags:[u8; tags_len]].
381 out.extend_from_slice(&(per_exit_tags.len() as u32).to_le_bytes());
382 for ent in per_exit_tags {
383 out.extend_from_slice(&ent.cont_pc.to_le_bytes());
384 out.extend_from_slice(&(ent.tags_packed.len() as u32).to_le_bytes());
385 out.extend_from_slice(&ent.tags_packed);
386 }
387 // v3 tail: u32 count, then per entry
388 // [cont_pc:u32, head_resume_pc:u32, tags_len:u32, tags:[u8; tags_len],
389 // chain_bytes_len:u32, chain_bytes:[u8; chain_bytes_len]].
390 // chain_bytes_len is always a multiple of 12 (FrameMaterializeInfo
391 // wire size); the decoder rejects otherwise as a corruption signal.
392 out.extend_from_slice(&(per_exit_inline.len() as u32).to_le_bytes());
393 for ent in per_exit_inline {
394 out.extend_from_slice(&ent.cont_pc.to_le_bytes());
395 out.extend_from_slice(&ent.head_resume_pc.to_le_bytes());
396 out.extend_from_slice(&(ent.tags_packed.len() as u32).to_le_bytes());
397 out.extend_from_slice(&ent.tags_packed);
398 out.extend_from_slice(&(ent.chain_bytes.len() as u32).to_le_bytes());
399 out.extend_from_slice(&ent.chain_bytes);
400 }
401 out
402}
403
404/// Decoded shape returned by [`decode_meta_blob`].
405#[derive(Debug)]
406pub struct DecodedMeta {
407 /// The fixed-prefix header.
408 pub header: AotTraceMetaHeader,
409 /// `entry_tags` payload (length = `header.entry_tags_len`).
410 pub entry_tags: Vec<u8>,
411 /// `exit_tags` payload (length = `header.exit_tags_len`), still in
412 /// packed `u8` form. Caller maps each through [`unpack_exit_tag`].
413 pub exit_tags: Vec<u8>,
414 /// v2 tail — per-cont_pc tag arrays. Empty for v1 blobs and for
415 /// v2+ traces with no typed-register side-exits.
416 pub per_exit_tags: Vec<PerExitTagsEntry>,
417 /// v3 tail — per-site inline cmp@d>0 side-exit metadata.
418 /// Empty for v1 / v2 blobs and for v3 traces with no inlined
419 /// side-exits. Today the AOT harvester filters out traces with
420 /// non-empty `per_exit_inline` regardless (see module docs);
421 /// the field exists so the wire format is forward-ready for the
422 /// relocatable-chain-slot lowerer work that flips the filter.
423 pub per_exit_inline: Vec<PerExitInlineEntry>,
424}
425
426/// Deserialize a blob produced by [`encode_meta_blob`]. Returns
427/// `Err(reason)` on magic / version / length mismatch — the deploy
428/// walker should skip the entry and log the reason rather than
429/// installing a broken trace.
430pub fn decode_meta_blob(bytes: &[u8]) -> Result<DecodedMeta, &'static str> {
431 if bytes.len() < AotTraceMetaHeader::SIZE {
432 return Err("blob shorter than header");
433 }
434 let magic = u32::from_le_bytes(bytes[0..4].try_into().unwrap());
435 if magic != AOT_META_MAGIC {
436 return Err("AOT_META_MAGIC mismatch");
437 }
438 let version = u32::from_le_bytes(bytes[4..8].try_into().unwrap());
439 if version != AOT_META_VERSION {
440 return Err("AOT_META_VERSION mismatch");
441 }
442 let head_pc = u32::from_le_bytes(bytes[8..12].try_into().unwrap());
443 let n_ops = u32::from_le_bytes(bytes[12..16].try_into().unwrap());
444 let window_size = u32::from_le_bytes(bytes[16..20].try_into().unwrap());
445 let dispatchable = bytes[20];
446 let tag_res_kind = bytes[21];
447 let entry_tags_len = u16::from_le_bytes(bytes[22..24].try_into().unwrap());
448 let exit_tags_len = u32::from_le_bytes(bytes[24..28].try_into().unwrap());
449 let header = AotTraceMetaHeader {
450 magic,
451 version,
452 head_pc,
453 n_ops,
454 window_size,
455 dispatchable,
456 tag_res_kind,
457 entry_tags_len,
458 exit_tags_len,
459 };
460 let total_payload = entry_tags_len as usize + exit_tags_len as usize;
461 if bytes.len() < AotTraceMetaHeader::SIZE + total_payload {
462 return Err("blob shorter than declared payload");
463 }
464 let entry_start = AotTraceMetaHeader::SIZE;
465 let entry_end = entry_start + entry_tags_len as usize;
466 let exit_end = entry_end + exit_tags_len as usize;
467 let entry_tags = bytes[entry_start..entry_end].to_vec();
468 let exit_tags = bytes[entry_end..exit_end].to_vec();
469 // v2 tail: optional per_exit_tags block. Absent (= empty) when
470 // the blob ends exactly at `exit_end` — covers v1-shaped
471 // producers (which never wrote a tail) and v2+ producers
472 // serializing a trace with zero typed-register side-exits.
473 let mut per_exit_tags: Vec<PerExitTagsEntry> = Vec::new();
474 let mut cur = exit_end;
475 if bytes.len() > cur {
476 if bytes.len() < cur + 4 {
477 return Err("v2 tail truncated at count");
478 }
479 let count = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap()) as usize;
480 cur += 4;
481 per_exit_tags.reserve(count);
482 for _ in 0..count {
483 if bytes.len() < cur + 8 {
484 return Err("v2 tail truncated at entry header");
485 }
486 let cont_pc = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap());
487 cur += 4;
488 let tags_len = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap()) as usize;
489 cur += 4;
490 if bytes.len() < cur + tags_len {
491 return Err("v2 tail truncated at entry tags");
492 }
493 let tags_packed = bytes[cur..cur + tags_len].to_vec();
494 cur += tags_len;
495 per_exit_tags.push(PerExitTagsEntry {
496 cont_pc,
497 tags_packed,
498 });
499 }
500 }
501 // v3 tail: optional per_exit_inline block. Absent when the blob
502 // ends at the v2 tail boundary — covers v1 / v2 producers and
503 // v3 producers serializing a trace with zero inline cmp@d>0
504 // side-exits. Tail layout per entry:
505 // cont_pc:u32, head_resume_pc:u32,
506 // tags_len:u32, tags:[u8; tags_len],
507 // chain_bytes_len:u32, chain_bytes:[u8; chain_bytes_len]
508 // chain_bytes_len validated as a multiple of 12
509 // (FrameMaterializeInfo wire size) — non-multiple = corruption,
510 // we Err so the deploy walker skips the entry cleanly.
511 let mut per_exit_inline: Vec<PerExitInlineEntry> = Vec::new();
512 if bytes.len() > cur {
513 if bytes.len() < cur + 4 {
514 return Err("v3 tail truncated at count");
515 }
516 let count = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap()) as usize;
517 cur += 4;
518 per_exit_inline.reserve(count);
519 for _ in 0..count {
520 if bytes.len() < cur + 12 {
521 return Err("v3 tail truncated at entry header");
522 }
523 let cont_pc = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap());
524 cur += 4;
525 let head_resume_pc = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap());
526 cur += 4;
527 let tags_len = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap()) as usize;
528 cur += 4;
529 if bytes.len() < cur + tags_len {
530 return Err("v3 tail truncated at entry tags");
531 }
532 let tags_packed = bytes[cur..cur + tags_len].to_vec();
533 cur += tags_len;
534 if bytes.len() < cur + 4 {
535 return Err("v3 tail truncated at chain header");
536 }
537 let chain_bytes_len =
538 u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap()) as usize;
539 cur += 4;
540 if bytes.len() < cur + chain_bytes_len {
541 return Err("v3 tail truncated at chain bytes");
542 }
543 if !chain_bytes_len.is_multiple_of(PerExitInlineEntry::FRAME_MATERIALIZE_INFO_SIZE) {
544 return Err("v3 tail chain_bytes_len not a multiple of FrameMaterializeInfo size");
545 }
546 let chain_bytes = bytes[cur..cur + chain_bytes_len].to_vec();
547 cur += chain_bytes_len;
548 per_exit_inline.push(PerExitInlineEntry {
549 cont_pc,
550 head_resume_pc,
551 tags_packed,
552 chain_bytes,
553 });
554 }
555 }
556 Ok(DecodedMeta {
557 header,
558 entry_tags,
559 exit_tags,
560 per_exit_tags,
561 per_exit_inline,
562 })
563}
564
565/// Index entry layout in the deploy-side `luna_trace_meta` section.
566///
567/// 48 bytes per entry; the static linker fills `fn_ptr` and `meta_ptr`
568/// with relocations resolving to the trace's `.text` body and the
569/// matching `luna_trace_blob` payload respectively.
570///
571/// The deploy walker brackets the section via linker-synthetic
572/// `__start_luna_trace_meta` / `__stop_luna_trace_meta` (ELF) or
573/// `section$start$__DATA$luna_trace_meta` (Mach-O), mirroring sub-
574/// piece 3's `luna_strkey_idx` plumbing.
575#[repr(C)]
576#[derive(Clone, Copy, Debug)]
577pub struct AotTraceIndexEntry {
578 /// `Proto::stable_hash()` — matches the AOT-time proto identity
579 /// against the deploy-loaded proto tree.
580 pub proto_hash: [u8; 16],
581 /// Trace's `head_pc`. Used together with `proto_hash` to detect
582 /// duplicate installs and to log which trace fired.
583 pub head_pc: u32,
584 /// Padding so the following 64-bit address fields align at 8 bytes.
585 pub _pad: u32,
586 /// Address of the AOT-emitted trace fn
587 /// (`extern "C" fn(*mut i64) -> i64`). Stored as `u64` so the
588 /// wire layout is identical across 32/64-bit targets — wasm32 +
589 /// other 32-bit targets cast through this field. AOT-binary
590 /// deploy is always 64-bit (cross-compile to 32-bit targets
591 /// disabled at the linker step), so the upper 32 bits are zero
592 /// in practice. Linker-resolved relocation against the
593 /// `luna_aot_trace_<idx>` symbol the lowerer exports.
594 pub fn_ptr: u64,
595 /// Address of the matching meta blob in `luna_trace_blob`. Same
596 /// width-stable rationale as `fn_ptr`.
597 pub meta_ptr: u64,
598 /// Length of the meta blob (the deploy walker hard-rejects entries
599 /// whose declared payload exceeds this).
600 pub meta_len: u32,
601 /// Padding so the entry is a multiple of 8 bytes (48 total).
602 pub _pad2: u32,
603}
604
605impl AotTraceIndexEntry {
606 /// Byte size of one index entry. Compile-time assertion lives
607 /// next to the type via [`AOT_TRACE_INDEX_ENTRY_SIZE_CHECK`].
608 pub const SIZE: usize = 48;
609}
610
611/// Static assertion that `AotTraceIndexEntry` is exactly 48 bytes on
612/// the host build. Both crates that consume this format (`luna-aot`,
613/// `luna-runtime-helpers`) inherit the assertion via the type, so a
614/// padding regression fails compilation before the wire format
615/// silently misaligns.
616pub const AOT_TRACE_INDEX_ENTRY_SIZE_CHECK: () = assert!(
617 core::mem::size_of::<AotTraceIndexEntry>() == AotTraceIndexEntry::SIZE,
618 "AotTraceIndexEntry must be 48 bytes — alignment / padding regressed"
619);
620
621#[cfg(test)]
622mod tests {
623 use super::*;
624
625 #[test]
626 fn header_round_trip() {
627 let header = AotTraceMetaHeader {
628 magic: AOT_META_MAGIC,
629 version: AOT_META_VERSION,
630 head_pc: 42,
631 n_ops: 7,
632 window_size: 4,
633 dispatchable: 1,
634 tag_res_kind: pack_tag_res_kind(TagResKind::AllInt),
635 entry_tags_len: 2,
636 exit_tags_len: 3,
637 };
638 let entry_tags = vec![1u8, 2u8];
639 let exit_tags = vec![
640 pack_exit_tag(ExitTag::Int),
641 pack_exit_tag(ExitTag::Untouched),
642 pack_exit_tag(ExitTag::Float),
643 ];
644 let blob = encode_meta_blob(&header, &entry_tags, &exit_tags, &[], &[]);
645 // SIZE + entry_tags + exit_tags + v2-tail-count(4) + v3-tail-count(4)
646 assert_eq!(blob.len(), AotTraceMetaHeader::SIZE + 2 + 3 + 4 + 4);
647 let decoded = decode_meta_blob(&blob).expect("decode");
648 assert!(decoded.per_exit_tags.is_empty());
649 assert!(decoded.per_exit_inline.is_empty());
650 assert_eq!(decoded.header.head_pc, 42);
651 assert_eq!(decoded.header.window_size, 4);
652 assert_eq!(decoded.header.dispatchable, 1);
653 assert_eq!(decoded.entry_tags, entry_tags);
654 assert_eq!(decoded.exit_tags, exit_tags);
655 assert_eq!(
656 unpack_tag_res_kind(decoded.header.tag_res_kind),
657 Some(TagResKind::AllInt)
658 );
659 for (raw, expected) in
660 decoded
661 .exit_tags
662 .iter()
663 .zip([ExitTag::Int, ExitTag::Untouched, ExitTag::Float])
664 {
665 assert_eq!(unpack_exit_tag(*raw), Some(expected));
666 }
667 }
668
669 #[test]
670 fn decode_rejects_magic_mismatch() {
671 let mut blob = vec![0u8; AotTraceMetaHeader::SIZE];
672 // Magic stays zero.
673 let err = decode_meta_blob(&blob).unwrap_err();
674 assert!(err.contains("MAGIC"));
675 // Now valid magic + wrong version.
676 blob[..4].copy_from_slice(&AOT_META_MAGIC.to_le_bytes());
677 let err = decode_meta_blob(&blob).unwrap_err();
678 assert!(err.contains("VERSION"));
679 }
680
681 #[test]
682 fn v2_per_exit_tags_round_trip() {
683 // Two entries — one shorter than the other so the tail walker
684 // exercises variable-length parsing per entry.
685 let header = AotTraceMetaHeader {
686 magic: AOT_META_MAGIC,
687 version: AOT_META_VERSION,
688 head_pc: 7,
689 n_ops: 12,
690 window_size: 5,
691 dispatchable: 1,
692 tag_res_kind: pack_tag_res_kind(TagResKind::Mixed),
693 entry_tags_len: 0,
694 exit_tags_len: 0,
695 };
696 let entries = vec![
697 PerExitTagsEntry {
698 cont_pc: 3,
699 tags_packed: vec![
700 pack_exit_tag(ExitTag::Int),
701 pack_exit_tag(ExitTag::Untouched),
702 ],
703 },
704 PerExitTagsEntry {
705 cont_pc: 11,
706 tags_packed: vec![
707 pack_exit_tag(ExitTag::Closure),
708 pack_exit_tag(ExitTag::Table),
709 pack_exit_tag(ExitTag::Float),
710 ],
711 },
712 ];
713 let blob = encode_meta_blob(&header, &[], &[], &entries, &[]);
714 let decoded = decode_meta_blob(&blob).expect("decode v2");
715 assert_eq!(decoded.per_exit_tags.len(), 2);
716 assert_eq!(decoded.per_exit_tags[0].cont_pc, 3);
717 assert_eq!(decoded.per_exit_tags[0].tags_packed.len(), 2);
718 assert_eq!(decoded.per_exit_tags[1].cont_pc, 11);
719 assert_eq!(decoded.per_exit_tags[1].tags_packed.len(), 3);
720 assert!(decoded.per_exit_inline.is_empty());
721 }
722
723 #[test]
724 fn v3_per_exit_inline_round_trip() {
725 // Two inline-side-exit entries with different chain depths so
726 // the tail walker exercises variable-length per-entry parsing.
727 let header = AotTraceMetaHeader {
728 magic: AOT_META_MAGIC,
729 version: AOT_META_VERSION,
730 head_pc: 0,
731 n_ops: 0,
732 window_size: 8,
733 dispatchable: 1,
734 tag_res_kind: pack_tag_res_kind(TagResKind::Mixed),
735 entry_tags_len: 0,
736 exit_tags_len: 0,
737 };
738 // Hand-roll the inline entries (the live-CompiledTrace
739 // converter is exercised by the round-trip-from-live test
740 // below).
741 let inline = vec![
742 PerExitInlineEntry {
743 cont_pc: 5,
744 head_resume_pc: 9,
745 tags_packed: vec![pack_exit_tag(ExitTag::Int), pack_exit_tag(ExitTag::Int)],
746 // 1 FrameMaterializeInfo = 12 bytes:
747 // base_offset = 3, pc = 4, nresults = 1
748 chain_bytes: {
749 let mut v = Vec::new();
750 v.extend_from_slice(&3u32.to_le_bytes());
751 v.extend_from_slice(&4u32.to_le_bytes());
752 v.extend_from_slice(&1i32.to_le_bytes());
753 v
754 },
755 },
756 PerExitInlineEntry {
757 cont_pc: 17,
758 head_resume_pc: 21,
759 tags_packed: vec![
760 pack_exit_tag(ExitTag::Closure),
761 pack_exit_tag(ExitTag::Untouched),
762 pack_exit_tag(ExitTag::Float),
763 ],
764 // 2 frames = 24 bytes.
765 chain_bytes: {
766 let mut v = Vec::new();
767 for (off, pc, nr) in [(2u32, 7u32, 1i32), (5u32, 11u32, 2i32)] {
768 v.extend_from_slice(&off.to_le_bytes());
769 v.extend_from_slice(&pc.to_le_bytes());
770 v.extend_from_slice(&nr.to_le_bytes());
771 }
772 v
773 },
774 },
775 ];
776 let blob = encode_meta_blob(&header, &[], &[], &[], &inline);
777 let decoded = decode_meta_blob(&blob).expect("decode v3");
778 assert_eq!(decoded.per_exit_inline.len(), 2);
779 assert_eq!(decoded.per_exit_inline[0].cont_pc, 5);
780 assert_eq!(decoded.per_exit_inline[0].head_resume_pc, 9);
781 assert_eq!(decoded.per_exit_inline[0].tags_packed.len(), 2);
782 let chain0 = decoded.per_exit_inline[0]
783 .rebuild_chain()
784 .expect("rebuild chain[0]");
785 assert_eq!(chain0.len(), 1);
786 assert_eq!(chain0[0].base_offset, 3);
787 assert_eq!(chain0[0].pc, 4);
788 assert_eq!(chain0[0].nresults, 1);
789 let chain1 = decoded.per_exit_inline[1]
790 .rebuild_chain()
791 .expect("rebuild chain[1]");
792 assert_eq!(chain1.len(), 2);
793 assert_eq!(chain1[0].base_offset, 2);
794 assert_eq!(chain1[1].pc, 11);
795 assert_eq!(chain1[1].nresults, 2);
796 }
797
798 #[test]
799 fn v3_per_exit_inline_round_trip_from_live() {
800 // Exercise PerExitInlineEntry::from_inline_side_exit against
801 // a hand-built InlineSideExit, then encode + decode +
802 // rebuild and check field equality. Catches drift if either
803 // the live-struct shape or the wire layout changes without
804 // updating the other.
805 use crate::jit::trace_types::{ExitTag, FrameMaterializeInfo, InlineSideExit};
806 let chain = vec![FrameMaterializeInfo {
807 base_offset: 1,
808 pc: 2,
809 nresults: 3,
810 }];
811 let live = InlineSideExit {
812 cont_pc: 42,
813 head_resume_pc: 50,
814 exit_tags: crate::jit::send_compat::TArc::from(
815 vec![ExitTag::Int, ExitTag::Float, ExitTag::Untouched].into_boxed_slice(),
816 ),
817 chain: crate::jit::send_compat::TArc::from(chain.into_boxed_slice()),
818 side_trace_ptr: Box::new(crate::jit::send_compat::TCellPtr::null()),
819 };
820 let entry = PerExitInlineEntry::from_inline_side_exit(&live);
821 assert_eq!(entry.cont_pc, 42);
822 assert_eq!(entry.head_resume_pc, 50);
823 assert_eq!(entry.tags_packed.len(), 3);
824 assert_eq!(
825 entry.chain_bytes.len(),
826 PerExitInlineEntry::FRAME_MATERIALIZE_INFO_SIZE
827 );
828 let header = AotTraceMetaHeader {
829 magic: AOT_META_MAGIC,
830 version: AOT_META_VERSION,
831 head_pc: 0,
832 n_ops: 0,
833 window_size: 4,
834 dispatchable: 1,
835 tag_res_kind: pack_tag_res_kind(TagResKind::Mixed),
836 entry_tags_len: 0,
837 exit_tags_len: 0,
838 };
839 let blob = encode_meta_blob(&header, &[], &[], &[], &[entry]);
840 let decoded = decode_meta_blob(&blob).expect("decode v3 from live");
841 assert_eq!(decoded.per_exit_inline.len(), 1);
842 let rebuilt = decoded.per_exit_inline[0].rebuild_chain().expect("rebuild");
843 assert_eq!(rebuilt.len(), 1);
844 assert_eq!(rebuilt[0].base_offset, 1);
845 assert_eq!(rebuilt[0].pc, 2);
846 assert_eq!(rebuilt[0].nresults, 3);
847 }
848
849 #[test]
850 fn decode_rejects_v3_chain_bytes_misaligned() {
851 // Hand-emit a v3 blob whose chain_bytes_len is not a multiple
852 // of 12 — the decoder MUST refuse (returning Err) instead of
853 // silently truncating, so the deploy walker has a clean skip
854 // signal.
855 let header = AotTraceMetaHeader {
856 magic: AOT_META_MAGIC,
857 version: AOT_META_VERSION,
858 head_pc: 0,
859 n_ops: 0,
860 window_size: 0,
861 dispatchable: 0,
862 tag_res_kind: 0,
863 entry_tags_len: 0,
864 exit_tags_len: 0,
865 };
866 let mut blob = Vec::new();
867 blob.extend_from_slice(&header.magic.to_le_bytes());
868 blob.extend_from_slice(&header.version.to_le_bytes());
869 blob.extend_from_slice(&header.head_pc.to_le_bytes());
870 blob.extend_from_slice(&header.n_ops.to_le_bytes());
871 blob.extend_from_slice(&header.window_size.to_le_bytes());
872 blob.push(header.dispatchable);
873 blob.push(header.tag_res_kind);
874 blob.extend_from_slice(&header.entry_tags_len.to_le_bytes());
875 blob.extend_from_slice(&header.exit_tags_len.to_le_bytes());
876 // v2 tail: count=0.
877 blob.extend_from_slice(&0u32.to_le_bytes());
878 // v3 tail: count=1, cont_pc=0, head_resume_pc=0, tags_len=0,
879 // chain_bytes_len=7 (not a multiple of 12), chain_bytes=7 zeros.
880 blob.extend_from_slice(&1u32.to_le_bytes());
881 blob.extend_from_slice(&0u32.to_le_bytes());
882 blob.extend_from_slice(&0u32.to_le_bytes());
883 blob.extend_from_slice(&0u32.to_le_bytes());
884 blob.extend_from_slice(&7u32.to_le_bytes());
885 blob.extend(std::iter::repeat_n(0u8, 7));
886 let err = decode_meta_blob(&blob).unwrap_err();
887 assert!(
888 err.contains("FrameMaterializeInfo"),
889 "expected misalignment err, got {err:?}"
890 );
891 }
892
893 #[test]
894 fn decode_tolerates_v1_blob_shape() {
895 // Emulate a v1-shaped blob: header + tags, NO trailing v2/v3
896 // tails. The v3 decoder should accept as empty everywhere.
897 let header = AotTraceMetaHeader {
898 magic: AOT_META_MAGIC,
899 version: AOT_META_VERSION,
900 head_pc: 0,
901 n_ops: 0,
902 window_size: 0,
903 dispatchable: 0,
904 tag_res_kind: 0,
905 entry_tags_len: 1,
906 exit_tags_len: 0,
907 };
908 let mut blob = Vec::new();
909 blob.extend_from_slice(&header.magic.to_le_bytes());
910 blob.extend_from_slice(&header.version.to_le_bytes());
911 blob.extend_from_slice(&header.head_pc.to_le_bytes());
912 blob.extend_from_slice(&header.n_ops.to_le_bytes());
913 blob.extend_from_slice(&header.window_size.to_le_bytes());
914 blob.push(header.dispatchable);
915 blob.push(header.tag_res_kind);
916 blob.extend_from_slice(&header.entry_tags_len.to_le_bytes());
917 blob.extend_from_slice(&header.exit_tags_len.to_le_bytes());
918 blob.push(0); // entry_tags[0]
919 // No v2/v3 tails.
920 let decoded = decode_meta_blob(&blob).expect("decode v1-shaped");
921 assert!(decoded.per_exit_tags.is_empty());
922 assert!(decoded.per_exit_inline.is_empty());
923 }
924
925 #[test]
926 fn decode_tolerates_v2_blob_shape() {
927 // Emulate a v2-shaped blob: header + tags + v2 tail only,
928 // NO trailing v3 count u32. The v3 decoder should accept it
929 // as an empty per_exit_inline.
930 let header = AotTraceMetaHeader {
931 magic: AOT_META_MAGIC,
932 version: AOT_META_VERSION,
933 head_pc: 0,
934 n_ops: 0,
935 window_size: 0,
936 dispatchable: 0,
937 tag_res_kind: 0,
938 entry_tags_len: 0,
939 exit_tags_len: 0,
940 };
941 let mut blob = Vec::new();
942 blob.extend_from_slice(&header.magic.to_le_bytes());
943 blob.extend_from_slice(&header.version.to_le_bytes());
944 blob.extend_from_slice(&header.head_pc.to_le_bytes());
945 blob.extend_from_slice(&header.n_ops.to_le_bytes());
946 blob.extend_from_slice(&header.window_size.to_le_bytes());
947 blob.push(header.dispatchable);
948 blob.push(header.tag_res_kind);
949 blob.extend_from_slice(&header.entry_tags_len.to_le_bytes());
950 blob.extend_from_slice(&header.exit_tags_len.to_le_bytes());
951 // v2 tail: count=1, one entry (cont_pc=3, tags_len=1, tags=[Int])
952 blob.extend_from_slice(&1u32.to_le_bytes());
953 blob.extend_from_slice(&3u32.to_le_bytes());
954 blob.extend_from_slice(&1u32.to_le_bytes());
955 blob.push(pack_exit_tag(ExitTag::Int));
956 // No v3 tail.
957 let decoded = decode_meta_blob(&blob).expect("decode v2-shaped");
958 assert_eq!(decoded.per_exit_tags.len(), 1);
959 assert!(decoded.per_exit_inline.is_empty());
960 }
961
962 #[test]
963 fn decode_rejects_truncated() {
964 // Header is fine, but exit_tags_len declares 10 bytes that
965 // aren't there.
966 let header = AotTraceMetaHeader {
967 magic: AOT_META_MAGIC,
968 version: AOT_META_VERSION,
969 head_pc: 0,
970 n_ops: 0,
971 window_size: 0,
972 dispatchable: 0,
973 tag_res_kind: 0,
974 entry_tags_len: 0,
975 exit_tags_len: 10,
976 };
977 let blob = {
978 let mut b = Vec::new();
979 b.extend_from_slice(&header.magic.to_le_bytes());
980 b.extend_from_slice(&header.version.to_le_bytes());
981 b.extend_from_slice(&header.head_pc.to_le_bytes());
982 b.extend_from_slice(&header.n_ops.to_le_bytes());
983 b.extend_from_slice(&header.window_size.to_le_bytes());
984 b.push(header.dispatchable);
985 b.push(header.tag_res_kind);
986 b.extend_from_slice(&header.entry_tags_len.to_le_bytes());
987 b.extend_from_slice(&header.exit_tags_len.to_le_bytes());
988 b
989 };
990 // Only header, no payload — should fail truncation check.
991 let err = decode_meta_blob(&blob).unwrap_err();
992 assert!(err.contains("payload"));
993 }
994}