Skip to main content

lex_bytecode/
program.rs

1//! Compiled program: a set of functions plus a constant pool.
2
3use crate::op::*;
4use indexmap::IndexMap;
5use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
8pub struct Program {
9    pub constants: Vec<Const>,
10    pub functions: Vec<Function>,
11    /// Global function names → function index in `functions`.
12    pub function_names: IndexMap<String, u32>,
13    /// Imported module aliases → module name (e.g., `io` → `io`).
14    /// Used by the compiler/runtime to dispatch `alias.op(...)` calls.
15    pub module_aliases: IndexMap<String, String>,
16    /// Entry function (for `lex run`, set to whatever function the user
17    /// chose to invoke). Optional.
18    pub entry: Option<u32>,
19    /// Interned record field-name shapes (#461). Each entry is a list
20    /// of constant-pool indices (must point at `Const::FieldName`).
21    /// `Op::MakeRecord { shape_idx, .. }` indexes into this side-table.
22    /// Hoisting the field-name list out of the op stream is what
23    /// lets `Op` be `Copy`.
24    #[serde(default)]
25    pub record_shapes: Vec<Vec<u32>>,
26}
27
28impl Program {
29    pub fn lookup(&self, name: &str) -> Option<u32> {
30        self.function_names.get(name).copied()
31    }
32
33    /// Walk every function's declared effects and collect the union of
34    /// effect kinds (with their args).
35    pub fn declared_effects(&self) -> Vec<DeclaredEffect> {
36        let mut out: Vec<DeclaredEffect> = Vec::new();
37        for f in &self.functions {
38            for e in &f.effects {
39                if !out.iter().any(|x| x == e) {
40                    out.push(e.clone());
41                }
42            }
43        }
44        out
45    }
46}
47
48/// Content hash of a function body (#222). 16 bytes = SHA-256 truncated.
49/// Matches `lex-vcs::OpId`'s width so that mixing the two never confuses a
50/// reader expecting a uniform hash size across the codebase.
51pub type BodyHash = [u8; 16];
52
53/// All-zero sentinel — used in `Function::default()` and as a placeholder
54/// before the hash is computed at the end of the compile pass.
55pub const ZERO_BODY_HASH: BodyHash = [0u8; 16];
56
57#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
58pub struct Function {
59    pub name: String,
60    pub arity: u16,
61    pub locals_count: u16,
62    pub code: Vec<Op>,
63    /// Declared effects on this function's signature (spec §7).
64    #[serde(default)]
65    pub effects: Vec<DeclaredEffect>,
66    /// Content hash of the bytecode body — see `compute_body_hash`.
67    /// Populated at the end of the compile pass; used at `Op::MakeClosure`
68    /// to give every `Value::Closure` a canonical identity that does not
69    /// depend on the closure literal's source location (#222).
70    #[serde(default = "zero_body_hash")]
71    pub body_hash: BodyHash,
72    /// Per-parameter refinement predicates (#209 slice 3). `Some(r)`
73    /// for params declared with `Type{x | predicate}`, `None`
74    /// otherwise. The VM evaluates these at `Op::Call` time before
75    /// pushing the frame; failure raises `VmError::RefinementFailed`
76    /// and the tracer records a verdict event with the same shape
77    /// as a runtime gate's `gate.verdict`.
78    #[serde(default)]
79    pub refinements: Vec<Option<Refinement>>,
80    /// Number of `Op::GetField` sites in this function (#462 slice 1).
81    /// Populated by the compiler so the VM can lazily one-shot
82    /// allocate the inline-cache `Vec<Option<usize>>` to its final
83    /// size on first GetField — no per-op resize bookkeeping.
84    /// `#[serde(default)]` because pre-#462 programs don't carry it.
85    #[serde(default)]
86    pub field_ic_sites: u16,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
90pub struct Refinement {
91    /// The bound variable name from `Type{binding | predicate}`.
92    pub binding: String,
93    /// The predicate, stored as a canonical-AST `CExpr`. The VM
94    /// interprets it directly via a small tree-walk evaluator —
95    /// no separate compile pass needed since predicates are pure
96    /// expressions over a single binding plus, eventually, the
97    /// surrounding call-site context (slice 3 supports the
98    /// binding only).
99    pub predicate: lex_ast::CExpr,
100}
101
102fn zero_body_hash() -> BodyHash { ZERO_BODY_HASH }
103
104/// Hash a function body so that two structurally-identical bodies — the
105/// `fn(x) -> x + 1` literal repeated at two source locations, two flow
106/// trampolines built from the same shape, etc. — yield the same hash.
107///
108/// Inputs: the bytecode `Op` sequence, the arity, the locals count.
109/// Capture *types* are intentionally not hashed: capture *values* already
110/// participate in `Value::Closure`'s equality through the `captures`
111/// field, so two closures with different capture values already compare
112/// non-equal regardless of the hash. Capture *types* without values
113/// don't add equality information that captures don't already provide
114/// (a value of type `Int` and a value of type `Str` can't both be `42`).
115///
116/// Constants pool indices referenced from the body are *not* resolved
117/// before hashing — within a single compile the pool is shared, so two
118/// equivalent literals produce identical `Op` sequences. Cross-compile
119/// canonicality is deliberately out of scope (#222).
120pub fn compute_body_hash(
121    arity: u16,
122    locals_count: u16,
123    code: &[Op],
124    record_shapes: &[Vec<u32>],
125) -> BodyHash {
126    use sha2::{Digest, Sha256};
127    let mut hasher = Sha256::new();
128    hasher.update(arity.to_le_bytes());
129    hasher.update(locals_count.to_le_bytes());
130    hasher.update((code.len() as u64).to_le_bytes());
131    // Serialize each Op deterministically. The serde-derived JSON form
132    // is the canonical wire shape — closures with the same body must
133    // hash identically across builds. `Op::MakeRecord` is special-cased:
134    // its on-disk representation (a `shape_idx` into the side-table
135    // plus a cached `field_count`) is rehydrated to the historical
136    // inline form `{"MakeRecord":{"field_name_indices":[...]}}` so the
137    // hash bytes stay bit-identical to pre-#461 builds.
138    for op in code {
139        let bytes = match op {
140            // Both `MakeRecord` and its #464 step-2 sibling
141            // `AllocStackRecord` hash as the historical inline
142            // `{"MakeRecord":{"field_name_indices":[...]}}` form so
143            // closure identity (#222) is bit-identical to pre-#464
144            // builds — the stack-vs-heap lowering is a performance
145            // detail that mustn't perturb body hashes (otherwise the
146            // same closure literal would hash differently after the
147            // escape pass fires).
148            Op::MakeRecord { shape_idx, .. }
149            | Op::AllocStackRecord { shape_idx, .. }
150            | Op::AllocArenaRecord { shape_idx, .. } => {
151                let shape = &record_shapes[*shape_idx as usize];
152                #[derive(Serialize)]
153                struct LegacyMakeRecord<'a> {
154                    field_name_indices: &'a [u32],
155                }
156                #[derive(Serialize)]
157                enum LegacyOp<'a> {
158                    MakeRecord(LegacyMakeRecord<'a>),
159                }
160                serde_json::to_vec(&LegacyOp::MakeRecord(LegacyMakeRecord {
161                    field_name_indices: shape,
162                }))
163                .expect("Op serialization must succeed")
164            }
165            // Peephole-fused op (#461 superinstructions). The fused
166            // op occupies the slot where `LoadLocal(local_idx)` was;
167            // the next two slots in `code` still hold the unchanged
168            // `PushConst(imm_const_idx)` and `IntAdd`. Hashing as the
169            // original `LoadLocal` makes the total body-hash bytes
170            // match the pre-fusion form — closure identity (#222)
171            // stays invariant across peephole rewrites.
172            Op::LoadLocalAddIntConst { local_idx, .. }
173            | Op::LoadLocalAddIntConstStoreLocal { src: local_idx, .. }
174            | Op::LoadLocalAddLocal { lhs_idx: local_idx, .. }
175            | Op::LoadLocalSubLocal { lhs_idx: local_idx, .. }
176            | Op::LoadLocalMulLocal { lhs_idx: local_idx, .. }
177            | Op::LoadLocalEqIntConstJumpIfNot { local_idx, .. }
178            | Op::LoadLocalStoreEqIntConstJumpIfNot { src: local_idx, .. }
179            | Op::LoadLocalGetFieldAdd { local_idx, .. }
180            | Op::LoadLocalGetFieldSub { local_idx, .. }
181            | Op::LoadLocalGetFieldMul { local_idx, .. }
182            | Op::LoadLocalGetField { local_idx, .. } => {
183                // Slice 1: this slot was `LoadLocal(local_idx)`.
184                // Slice 2: this slot was *also* `LoadLocal(src)`
185                // — the fused op now owns 4 slots, but the very
186                // first one was originally `LoadLocal`. Slice 3:
187                // ditto for `LoadLocal(lhs_idx)`. Slice 4: same
188                // shape as slice 3, just with IntSub / IntMul as
189                // terminator. The trailing primitive ops (PushConst
190                // / IntAdd / IntSub / IntMul / StoreLocal / LoadLocal)
191                // remain in the code stream as tombstones and hash
192                // normally, so the total byte stream matches pre-fusion.
193                serde_json::to_vec(&Op::LoadLocal(*local_idx))
194                    .expect("Op serialization must succeed")
195            }
196            // #461 typed-lowering compensation. The compiler now emits
197            // typed numeric primitives (`IntAdd` / `FloatAdd` / ...)
198            // wherever it can prove operand types, but the closure
199            // identity contract (#222) requires `body_hash` to remain
200            // bit-identical to the pre-lowering polymorphic form
201            // (`NumAdd` / ...). Lower the typed op to its polymorphic
202            // counterpart at hash time. Behavioral equivalence is
203            // guaranteed by the type checker — operand-type proofs the
204            // compiler used to specialize are exactly what makes the
205            // polymorphic op behave identically.
206            // #462 slice 1 — `GetField` carries a `site_idx` now,
207            // but the canonical body-hash form is the historical
208            // single-field tuple `GetField(name_idx)`. Lowering keeps
209            // closure identity (#222) bit-identical to pre-#462 builds.
210            // `site_idx` is a compile-time perf-side-channel; behavior
211            // doesn't depend on it (identical input → identical
212            // observable result).
213            Op::GetField { name_idx, .. } => {
214                #[derive(Serialize)]
215                enum LegacyOp { GetField(u32) }
216                serde_json::to_vec(&LegacyOp::GetField(*name_idx))
217                    .expect("Op serialization must succeed")
218            }
219            // #464 tuple codegen: AllocStackTuple hashes as MakeTuple
220            // so the stack-vs-heap lowering leaves closure identity
221            // (#222) bit-identical, mirroring AllocStackRecord above.
222            // #463 slice 2a: AllocArenaTuple does the same — the
223            // arena routing is a runtime detail that mustn't perturb
224            // body hashes any more than the stack lowering does.
225            Op::AllocStackTuple { arity }
226            | Op::AllocArenaTuple { arity } =>
227                serde_json::to_vec(&Op::MakeTuple(*arity)).expect("Op serialization must succeed"),
228            Op::IntAdd   | Op::FloatAdd => serde_json::to_vec(&Op::NumAdd).unwrap(),
229            Op::IntSub   | Op::FloatSub => serde_json::to_vec(&Op::NumSub).unwrap(),
230            Op::IntMul   | Op::FloatMul => serde_json::to_vec(&Op::NumMul).unwrap(),
231            Op::IntDiv   | Op::FloatDiv => serde_json::to_vec(&Op::NumDiv).unwrap(),
232            Op::IntMod                  => serde_json::to_vec(&Op::NumMod).unwrap(),
233            Op::IntNeg   | Op::FloatNeg => serde_json::to_vec(&Op::NumNeg).unwrap(),
234            Op::IntEq    | Op::FloatEq  => serde_json::to_vec(&Op::NumEq).unwrap(),
235            Op::IntLt    | Op::FloatLt  => serde_json::to_vec(&Op::NumLt).unwrap(),
236            Op::IntLe    | Op::FloatLe  => serde_json::to_vec(&Op::NumLe).unwrap(),
237            _ => serde_json::to_vec(op).expect("Op serialization must succeed"),
238        };
239        hasher.update((bytes.len() as u64).to_le_bytes());
240        hasher.update(&bytes);
241    }
242    let full = hasher.finalize();
243    let mut out = [0u8; 16];
244    out.copy_from_slice(&full[..16]);
245    out
246}
247
248#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
249pub struct DeclaredEffect {
250    pub kind: String,
251    #[serde(default, skip_serializing_if = "Option::is_none")]
252    pub arg: Option<EffectArg>,
253}
254
255#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
256pub enum EffectArg {
257    Str(String),
258    Int(i64),
259    Ident(String),
260}