lex_bytecode/program.rs
1//! Compiled program: a set of functions plus a constant pool.
2
3use crate::op::*;
4use indexmap::IndexMap;
5use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
8pub struct Program {
9 pub constants: Vec<Const>,
10 pub functions: Vec<Function>,
11 /// Global function names → function index in `functions`.
12 pub function_names: IndexMap<String, u32>,
13 /// Imported module aliases → module name (e.g., `io` → `io`).
14 /// Used by the compiler/runtime to dispatch `alias.op(...)` calls.
15 pub module_aliases: IndexMap<String, String>,
16 /// Entry function (for `lex run`, set to whatever function the user
17 /// chose to invoke). Optional.
18 pub entry: Option<u32>,
19 /// Interned record field-name shapes (#461). Each entry is a list
20 /// of constant-pool indices (must point at `Const::FieldName`).
21 /// `Op::MakeRecord { shape_idx, .. }` indexes into this side-table.
22 /// Hoisting the field-name list out of the op stream is what
23 /// lets `Op` be `Copy`.
24 #[serde(default)]
25 pub record_shapes: Vec<Vec<u32>>,
26}
27
28impl Program {
29 pub fn lookup(&self, name: &str) -> Option<u32> {
30 self.function_names.get(name).copied()
31 }
32
33 /// Walk every function's declared effects and collect the union of
34 /// effect kinds (with their args).
35 pub fn declared_effects(&self) -> Vec<DeclaredEffect> {
36 let mut out: Vec<DeclaredEffect> = Vec::new();
37 for f in &self.functions {
38 for e in &f.effects {
39 if !out.iter().any(|x| x == e) {
40 out.push(e.clone());
41 }
42 }
43 }
44 out
45 }
46}
47
48/// Content hash of a function body (#222). 16 bytes = SHA-256 truncated.
49/// Matches `lex-vcs::OpId`'s width so that mixing the two never confuses a
50/// reader expecting a uniform hash size across the codebase.
51pub type BodyHash = [u8; 16];
52
53/// All-zero sentinel — used in `Function::default()` and as a placeholder
54/// before the hash is computed at the end of the compile pass.
55pub const ZERO_BODY_HASH: BodyHash = [0u8; 16];
56
57#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
58pub struct Function {
59 pub name: String,
60 pub arity: u16,
61 pub locals_count: u16,
62 pub code: Vec<Op>,
63 /// Declared effects on this function's signature (spec §7).
64 #[serde(default)]
65 pub effects: Vec<DeclaredEffect>,
66 /// Content hash of the bytecode body — see `compute_body_hash`.
67 /// Populated at the end of the compile pass; used at `Op::MakeClosure`
68 /// to give every `Value::Closure` a canonical identity that does not
69 /// depend on the closure literal's source location (#222).
70 #[serde(default = "zero_body_hash")]
71 pub body_hash: BodyHash,
72 /// Per-parameter refinement predicates (#209 slice 3). `Some(r)`
73 /// for params declared with `Type{x | predicate}`, `None`
74 /// otherwise. The VM evaluates these at `Op::Call` time before
75 /// pushing the frame; failure raises `VmError::RefinementFailed`
76 /// and the tracer records a verdict event with the same shape
77 /// as a runtime gate's `gate.verdict`.
78 #[serde(default)]
79 pub refinements: Vec<Option<Refinement>>,
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
83pub struct Refinement {
84 /// The bound variable name from `Type{binding | predicate}`.
85 pub binding: String,
86 /// The predicate, stored as a canonical-AST `CExpr`. The VM
87 /// interprets it directly via a small tree-walk evaluator —
88 /// no separate compile pass needed since predicates are pure
89 /// expressions over a single binding plus, eventually, the
90 /// surrounding call-site context (slice 3 supports the
91 /// binding only).
92 pub predicate: lex_ast::CExpr,
93}
94
95fn zero_body_hash() -> BodyHash { ZERO_BODY_HASH }
96
97/// Hash a function body so that two structurally-identical bodies — the
98/// `fn(x) -> x + 1` literal repeated at two source locations, two flow
99/// trampolines built from the same shape, etc. — yield the same hash.
100///
101/// Inputs: the bytecode `Op` sequence, the arity, the locals count.
102/// Capture *types* are intentionally not hashed: capture *values* already
103/// participate in `Value::Closure`'s equality through the `captures`
104/// field, so two closures with different capture values already compare
105/// non-equal regardless of the hash. Capture *types* without values
106/// don't add equality information that captures don't already provide
107/// (a value of type `Int` and a value of type `Str` can't both be `42`).
108///
109/// Constants pool indices referenced from the body are *not* resolved
110/// before hashing — within a single compile the pool is shared, so two
111/// equivalent literals produce identical `Op` sequences. Cross-compile
112/// canonicality is deliberately out of scope (#222).
113pub fn compute_body_hash(
114 arity: u16,
115 locals_count: u16,
116 code: &[Op],
117 record_shapes: &[Vec<u32>],
118) -> BodyHash {
119 use sha2::{Digest, Sha256};
120 let mut hasher = Sha256::new();
121 hasher.update(arity.to_le_bytes());
122 hasher.update(locals_count.to_le_bytes());
123 hasher.update((code.len() as u64).to_le_bytes());
124 // Serialize each Op deterministically. The serde-derived JSON form
125 // is the canonical wire shape — closures with the same body must
126 // hash identically across builds. `Op::MakeRecord` is special-cased:
127 // its on-disk representation (a `shape_idx` into the side-table
128 // plus a cached `field_count`) is rehydrated to the historical
129 // inline form `{"MakeRecord":{"field_name_indices":[...]}}` so the
130 // hash bytes stay bit-identical to pre-#461 builds.
131 for op in code {
132 let bytes = match op {
133 Op::MakeRecord { shape_idx, .. } => {
134 let shape = &record_shapes[*shape_idx as usize];
135 #[derive(Serialize)]
136 struct LegacyMakeRecord<'a> {
137 field_name_indices: &'a [u32],
138 }
139 #[derive(Serialize)]
140 enum LegacyOp<'a> {
141 MakeRecord(LegacyMakeRecord<'a>),
142 }
143 serde_json::to_vec(&LegacyOp::MakeRecord(LegacyMakeRecord {
144 field_name_indices: shape,
145 }))
146 .expect("Op serialization must succeed")
147 }
148 // Peephole-fused op (#461 superinstructions). The fused
149 // op occupies the slot where `LoadLocal(local_idx)` was;
150 // the next two slots in `code` still hold the unchanged
151 // `PushConst(imm_const_idx)` and `IntAdd`. Hashing as the
152 // original `LoadLocal` makes the total body-hash bytes
153 // match the pre-fusion form — closure identity (#222)
154 // stays invariant across peephole rewrites.
155 Op::LoadLocalAddIntConst { local_idx, .. } => {
156 serde_json::to_vec(&Op::LoadLocal(*local_idx))
157 .expect("Op serialization must succeed")
158 }
159 _ => serde_json::to_vec(op).expect("Op serialization must succeed"),
160 };
161 hasher.update((bytes.len() as u64).to_le_bytes());
162 hasher.update(&bytes);
163 }
164 let full = hasher.finalize();
165 let mut out = [0u8; 16];
166 out.copy_from_slice(&full[..16]);
167 out
168}
169
170#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
171pub struct DeclaredEffect {
172 pub kind: String,
173 #[serde(default, skip_serializing_if = "Option::is_none")]
174 pub arg: Option<EffectArg>,
175}
176
177#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
178pub enum EffectArg {
179 Str(String),
180 Int(i64),
181 Ident(String),
182}