Skip to main content

lex_bytecode/
program.rs

1//! Compiled program: a set of functions plus a constant pool.
2
3use crate::op::*;
4use indexmap::IndexMap;
5use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
8pub struct Program {
9    pub constants: Vec<Const>,
10    pub functions: Vec<Function>,
11    /// Global function names → function index in `functions`.
12    pub function_names: IndexMap<String, u32>,
13    /// Imported module aliases → module name (e.g., `io` → `io`).
14    /// Used by the compiler/runtime to dispatch `alias.op(...)` calls.
15    pub module_aliases: IndexMap<String, String>,
16    /// Entry function (for `lex run`, set to whatever function the user
17    /// chose to invoke). Optional.
18    pub entry: Option<u32>,
19}
20
21impl Program {
22    pub fn lookup(&self, name: &str) -> Option<u32> {
23        self.function_names.get(name).copied()
24    }
25
26    /// Walk every function's declared effects and collect the union of
27    /// effect kinds (with their args).
28    pub fn declared_effects(&self) -> Vec<DeclaredEffect> {
29        let mut out: Vec<DeclaredEffect> = Vec::new();
30        for f in &self.functions {
31            for e in &f.effects {
32                if !out.iter().any(|x| x == e) {
33                    out.push(e.clone());
34                }
35            }
36        }
37        out
38    }
39}
40
41/// Content hash of a function body (#222). 16 bytes = SHA-256 truncated.
42/// Matches `lex-vcs::OpId`'s width so that mixing the two never confuses a
43/// reader expecting a uniform hash size across the codebase.
44pub type BodyHash = [u8; 16];
45
46/// All-zero sentinel — used in `Function::default()` and as a placeholder
47/// before the hash is computed at the end of the compile pass.
48pub const ZERO_BODY_HASH: BodyHash = [0u8; 16];
49
50#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
51pub struct Function {
52    pub name: String,
53    pub arity: u16,
54    pub locals_count: u16,
55    pub code: Vec<Op>,
56    /// Declared effects on this function's signature (spec §7).
57    #[serde(default)]
58    pub effects: Vec<DeclaredEffect>,
59    /// Content hash of the bytecode body — see `compute_body_hash`.
60    /// Populated at the end of the compile pass; used at `Op::MakeClosure`
61    /// to give every `Value::Closure` a canonical identity that does not
62    /// depend on the closure literal's source location (#222).
63    #[serde(default = "zero_body_hash")]
64    pub body_hash: BodyHash,
65    /// Per-parameter refinement predicates (#209 slice 3). `Some(r)`
66    /// for params declared with `Type{x | predicate}`, `None`
67    /// otherwise. The VM evaluates these at `Op::Call` time before
68    /// pushing the frame; failure raises `VmError::RefinementFailed`
69    /// and the tracer records a verdict event with the same shape
70    /// as a runtime gate's `gate.verdict`.
71    #[serde(default)]
72    pub refinements: Vec<Option<Refinement>>,
73}
74
75#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
76pub struct Refinement {
77    /// The bound variable name from `Type{binding | predicate}`.
78    pub binding: String,
79    /// The predicate, stored as a canonical-AST `CExpr`. The VM
80    /// interprets it directly via a small tree-walk evaluator —
81    /// no separate compile pass needed since predicates are pure
82    /// expressions over a single binding plus, eventually, the
83    /// surrounding call-site context (slice 3 supports the
84    /// binding only).
85    pub predicate: lex_ast::CExpr,
86}
87
88fn zero_body_hash() -> BodyHash { ZERO_BODY_HASH }
89
90/// Hash a function body so that two structurally-identical bodies — the
91/// `fn(x) -> x + 1` literal repeated at two source locations, two flow
92/// trampolines built from the same shape, etc. — yield the same hash.
93///
94/// Inputs: the bytecode `Op` sequence, the arity, the locals count.
95/// Capture *types* are intentionally not hashed: capture *values* already
96/// participate in `Value::Closure`'s equality through the `captures`
97/// field, so two closures with different capture values already compare
98/// non-equal regardless of the hash. Capture *types* without values
99/// don't add equality information that captures don't already provide
100/// (a value of type `Int` and a value of type `Str` can't both be `42`).
101///
102/// Constants pool indices referenced from the body are *not* resolved
103/// before hashing — within a single compile the pool is shared, so two
104/// equivalent literals produce identical `Op` sequences. Cross-compile
105/// canonicality is deliberately out of scope (#222).
106pub fn compute_body_hash(arity: u16, locals_count: u16, code: &[Op]) -> BodyHash {
107    use sha2::{Digest, Sha256};
108    let mut hasher = Sha256::new();
109    hasher.update(arity.to_le_bytes());
110    hasher.update(locals_count.to_le_bytes());
111    hasher.update((code.len() as u64).to_le_bytes());
112    // Serialize each Op deterministically. `serde_json` doesn't guarantee
113    // field ordering, so we route through bincode-like manual byte layout
114    // instead: we serialize via `serde_json::to_vec` only because Op's
115    // `Serialize` impl is auto-derived and stable across Rust versions
116    // for this enum shape. If determinism ever drifts we'll switch to a
117    // hand-rolled encoder.
118    for op in code {
119        let bytes = serde_json::to_vec(op)
120            .expect("Op serialization must succeed");
121        hasher.update((bytes.len() as u64).to_le_bytes());
122        hasher.update(&bytes);
123    }
124    let full = hasher.finalize();
125    let mut out = [0u8; 16];
126    out.copy_from_slice(&full[..16]);
127    out
128}
129
130#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
131pub struct DeclaredEffect {
132    pub kind: String,
133    #[serde(default, skip_serializing_if = "Option::is_none")]
134    pub arg: Option<EffectArg>,
135}
136
137#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
138pub enum EffectArg {
139    Str(String),
140    Int(i64),
141    Ident(String),
142}