lex_bytecode/program.rs
1//! Compiled program: a set of functions plus a constant pool.
2
3use crate::op::*;
4use indexmap::IndexMap;
5use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
8pub struct Program {
9 pub constants: Vec<Const>,
10 pub functions: Vec<Function>,
11 /// Global function names → function index in `functions`.
12 pub function_names: IndexMap<String, u32>,
13 /// Imported module aliases → module name (e.g., `io` → `io`).
14 /// Used by the compiler/runtime to dispatch `alias.op(...)` calls.
15 pub module_aliases: IndexMap<String, String>,
16 /// Entry function (for `lex run`, set to whatever function the user
17 /// chose to invoke). Optional.
18 pub entry: Option<u32>,
19}
20
21impl Program {
22 pub fn lookup(&self, name: &str) -> Option<u32> {
23 self.function_names.get(name).copied()
24 }
25
26 /// Walk every function's declared effects and collect the union of
27 /// effect kinds (with their args).
28 pub fn declared_effects(&self) -> Vec<DeclaredEffect> {
29 let mut out: Vec<DeclaredEffect> = Vec::new();
30 for f in &self.functions {
31 for e in &f.effects {
32 if !out.iter().any(|x| x == e) {
33 out.push(e.clone());
34 }
35 }
36 }
37 out
38 }
39}
40
41/// Content hash of a function body (#222). 16 bytes = SHA-256 truncated.
42/// Matches `lex-vcs::OpId`'s width so that mixing the two never confuses a
43/// reader expecting a uniform hash size across the codebase.
44pub type BodyHash = [u8; 16];
45
46/// All-zero sentinel — used in `Function::default()` and as a placeholder
47/// before the hash is computed at the end of the compile pass.
48pub const ZERO_BODY_HASH: BodyHash = [0u8; 16];
49
50#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
51pub struct Function {
52 pub name: String,
53 pub arity: u16,
54 pub locals_count: u16,
55 pub code: Vec<Op>,
56 /// Declared effects on this function's signature (spec §7).
57 #[serde(default)]
58 pub effects: Vec<DeclaredEffect>,
59 /// Content hash of the bytecode body — see `compute_body_hash`.
60 /// Populated at the end of the compile pass; used at `Op::MakeClosure`
61 /// to give every `Value::Closure` a canonical identity that does not
62 /// depend on the closure literal's source location (#222).
63 #[serde(default = "zero_body_hash")]
64 pub body_hash: BodyHash,
65 /// Per-parameter refinement predicates (#209 slice 3). `Some(r)`
66 /// for params declared with `Type{x | predicate}`, `None`
67 /// otherwise. The VM evaluates these at `Op::Call` time before
68 /// pushing the frame; failure raises `VmError::RefinementFailed`
69 /// and the tracer records a verdict event with the same shape
70 /// as a runtime gate's `gate.verdict`.
71 #[serde(default)]
72 pub refinements: Vec<Option<Refinement>>,
73}
74
75#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
76pub struct Refinement {
77 /// The bound variable name from `Type{binding | predicate}`.
78 pub binding: String,
79 /// The predicate, stored as a canonical-AST `CExpr`. The VM
80 /// interprets it directly via a small tree-walk evaluator —
81 /// no separate compile pass needed since predicates are pure
82 /// expressions over a single binding plus, eventually, the
83 /// surrounding call-site context (slice 3 supports the
84 /// binding only).
85 pub predicate: lex_ast::CExpr,
86}
87
88fn zero_body_hash() -> BodyHash { ZERO_BODY_HASH }
89
90/// Hash a function body so that two structurally-identical bodies — the
91/// `fn(x) -> x + 1` literal repeated at two source locations, two flow
92/// trampolines built from the same shape, etc. — yield the same hash.
93///
94/// Inputs: the bytecode `Op` sequence, the arity, the locals count.
95/// Capture *types* are intentionally not hashed: capture *values* already
96/// participate in `Value::Closure`'s equality through the `captures`
97/// field, so two closures with different capture values already compare
98/// non-equal regardless of the hash. Capture *types* without values
99/// don't add equality information that captures don't already provide
100/// (a value of type `Int` and a value of type `Str` can't both be `42`).
101///
102/// Constants pool indices referenced from the body are *not* resolved
103/// before hashing — within a single compile the pool is shared, so two
104/// equivalent literals produce identical `Op` sequences. Cross-compile
105/// canonicality is deliberately out of scope (#222).
106pub fn compute_body_hash(arity: u16, locals_count: u16, code: &[Op]) -> BodyHash {
107 use sha2::{Digest, Sha256};
108 let mut hasher = Sha256::new();
109 hasher.update(arity.to_le_bytes());
110 hasher.update(locals_count.to_le_bytes());
111 hasher.update((code.len() as u64).to_le_bytes());
112 // Serialize each Op deterministically. `serde_json` doesn't guarantee
113 // field ordering, so we route through bincode-like manual byte layout
114 // instead: we serialize via `serde_json::to_vec` only because Op's
115 // `Serialize` impl is auto-derived and stable across Rust versions
116 // for this enum shape. If determinism ever drifts we'll switch to a
117 // hand-rolled encoder.
118 for op in code {
119 let bytes = serde_json::to_vec(op)
120 .expect("Op serialization must succeed");
121 hasher.update((bytes.len() as u64).to_le_bytes());
122 hasher.update(&bytes);
123 }
124 let full = hasher.finalize();
125 let mut out = [0u8; 16];
126 out.copy_from_slice(&full[..16]);
127 out
128}
129
130#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
131pub struct DeclaredEffect {
132 pub kind: String,
133 #[serde(default, skip_serializing_if = "Option::is_none")]
134 pub arg: Option<EffectArg>,
135}
136
137#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
138pub enum EffectArg {
139 Str(String),
140 Int(i64),
141 Ident(String),
142}