Skip to main content

lex_bytecode/
value.rs

1//! Runtime values.
2
3use crate::program::BodyHash;
4use arrow_array::RecordBatch;
5use indexmap::IndexMap;
6use smol_str::SmolStr;
7use std::collections::{BTreeMap, BTreeSet, VecDeque};
8use std::sync::atomic::AtomicBool;
9use std::sync::{Arc, Mutex};
10
11/// Internal state of a `conc.Actor`. Protected by a `Mutex` so that
12/// the `Lex` handler variant serialises on message delivery (one
13/// message processed at a time, state mutated under the lock). The
14/// `handler` is dispatched on the *calling* VM's thread — no extra
15/// OS thread required — which lets Lex handlers invoke arbitrary
16/// effects (sql, net, …) through the same handler chain.
17///
18/// Serialisation note: the `Native` variant releases the mutex
19/// *before* invoking its closure (`state` is unused for natives —
20/// the "state" is an external resource like a channel), so two
21/// concurrent `conc.tell`s on the same native bridge may invoke
22/// the closure on overlapping threads. Native bridges therefore
23/// need to be internally thread-safe; the `serve_ws_fn_actor`
24/// `mpsc::Sender` bridge is, because `Sender::send` is.
25#[derive(Debug, Clone)]
26pub struct ActorCell {
27    pub state: Value,
28    pub handler: ActorHandler,
29}
30
31/// Two ways an actor's handler can be implemented.
32///
33/// * `Lex(Value::Closure)` is the user-spawned shape from
34///   `conc.spawn(state, fn (s, m) -> (s, r) { … })`. The VM calls
35///   the closure with `(state, msg)` and expects `(new_state, reply)`.
36///
37/// * `Native(...)` is a Rust-side bridge — the actor cell wraps a
38///   `Box<dyn Fn(Value) -> Result<Value, String>>` that lives outside
39///   the VM. The `state` is ignored; the bridge is fire-and-forget
40///   over an out-of-band channel (e.g. a `mpsc::Sender<String>` to
41///   a WebSocket connection — see `lex-runtime::ws::serve_ws_fn_actor`).
42///   `conc.ask` against a native actor returns whatever the bridge
43///   produces; `conc.tell` discards it. v1 is only used internally by
44///   the WS server's outbound-bridge registration; not exposed via the
45///   `conc` builtin surface.
46#[derive(Clone)]
47pub enum ActorHandler {
48    Lex(Value),
49    Native(Arc<NativeActorHandler>),
50}
51
52/// Erased Rust-side handler for `ActorHandler::Native`. Boxed so we
53/// can store any closure that captures (e.g. an `mpsc::Sender`).
54/// Wrapped in `Arc` so cloning an `ActorCell` (which the existing
55/// `conc.tell` flow does — `let handler = guard.handler.clone()`)
56/// is cheap and the closure isn't duplicated.
57pub struct NativeActorHandler {
58    pub send: Box<dyn Fn(Value) -> Result<Value, String> + Send + Sync>,
59}
60
61impl std::fmt::Debug for NativeActorHandler {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        write!(f, "<native actor handler>")
64    }
65}
66
67impl std::fmt::Debug for ActorHandler {
68    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69        match self {
70            ActorHandler::Lex(v) => f.debug_tuple("Lex").field(v).finish(),
71            ActorHandler::Native(n) => f.debug_tuple("Native").field(n).finish(),
72        }
73    }
74}
75
76#[derive(Debug, Clone)]
77pub enum Value {
78    Int(i64),
79    Float(f64),
80    Bool(bool),
81    /// String value. `SmolStr` stores strings ≤ 22 bytes inline — no heap
82    /// allocation for identifiers, HTTP methods, status codes, short keys, etc.
83    /// Clone of a short `SmolStr` is a 24-byte stack copy (#389 slice 4).
84    Str(SmolStr),
85    Bytes(Vec<u8>),
86    Unit,
87    List(VecDeque<Value>),
88    Tuple(Vec<Value>),
89    Record(IndexMap<String, Value>),
90    Variant { name: String, args: Vec<Value> },
91    /// First-class function value (a lambda + its captured locals). The
92    /// function's first `captures.len()` params bind to `captures`; the
93    /// remaining params are supplied at call time.
94    ///
95    /// `fn_id` is a dense compile-time index into `Program::functions`
96    /// for fast dispatch; `body_hash` is the **canonical identity** —
97    /// two closures with identical bytecode bodies compare equal even
98    /// when their `fn_id`s differ (which they will, when the source
99    /// has the same closure literal at two locations). See `PartialEq`
100    /// below and #222 for the rationale.
101    Closure { fn_id: u32, body_hash: BodyHash, captures: Vec<Value> },
102    /// Dense row-major `f64` matrix. A "fast lane" representation that
103    /// avoids the per-element `Value::Float` boxing of `Value::List`.
104    /// Used by Core's native tensor ops (matmul, dot, …) so end-to-end
105    /// matmul perf hits the §13.7 #1 100ms target without paying for
106    /// 2M Value boxings at the call boundary.
107    F64Array { rows: u32, cols: u32, data: Vec<f64> },
108    /// Persistent map keyed by `MapKey` (`Str` or `Int`). Insertion-
109    /// independent equality (sorted by `BTreeMap`'s `Ord`), so two
110    /// maps built from the same pairs in different orders compare
111    /// equal. Restricting keys to two primitive variants keeps
112    /// `Eq + Hash` requirements off `Value` itself, which has
113    /// closures and floats and can't be hashed soundly.
114    Map(BTreeMap<MapKey, Value>),
115    /// Persistent set with the same key-type discipline as `Map`.
116    Set(BTreeSet<MapKey>),
117    /// Double-ended queue. O(1) push/pop on both ends; otherwise
118    /// behaves like `List` for iteration / equality / JSON shape.
119    /// Lex's type system tracks `Deque[T]` separately from `List[T]`
120    /// so users explicitly opt in to deque semantics; the runtime
121    /// uses this dedicated variant rather than backing a deque on top
122    /// of `Value::List` (which would make `push_front` O(n)).
123    Deque(VecDeque<Value>),
124    /// A handle to a `conc.Actor`. The `Arc<Mutex<ActorCell>>` allows
125    /// cheap cloning and safe concurrent access — the mutex serialises
126    /// message delivery so the actor processes one message at a time.
127    /// Two actor handles compare equal iff they point to the same cell
128    /// (identity equality, not structural equality).
129    Actor(Arc<Mutex<ActorCell>>),
130    /// A periodic-tick handle returned by `conc.every` (#445). The
131    /// `AtomicBool` is the cancel flag — `conc.cancel(t)` sets it and
132    /// the background scheduler thread observes it on its next iteration
133    /// and exits. Two ticker handles compare equal iff they point to the
134    /// same cancel flag.
135    Ticker(Arc<AtomicBool>),
136    /// Apache Arrow `RecordBatch` — an unboxed columnar table. The
137    /// "fast lane" representation for `lex-frame` and any future
138    /// dataframe code: a `Value::ArrowTable` with one int64 column
139    /// of N rows is N×8 bytes of contiguous memory, not N
140    /// `Value::Int(_)` enum tags inside a `VecDeque`. Reductions
141    /// (`arrow.col_sum_int`, `arrow.col_mean`, …) execute as one
142    /// Rust call over the flat buffer, bypassing the bytecode VM
143    /// for the inner loop.
144    ///
145    /// `Arc` makes clone cheap (refcount bump) — Arrow tables are
146    /// already immutable so structural sharing across closures is
147    /// safe. Equality is structural over schema + columns.
148    ArrowTable(Arc<RecordBatch>),
149}
150
151/// Manual `PartialEq` for `Value` (#222). Mirrors the auto-derived
152/// implementation for every variant *except* `Closure`, which compares
153/// on `(body_hash, captures)` only — `fn_id` is a dense compile-time
154/// index that is not stable across source-location-equivalent closure
155/// literals, and including it would defeat the canonicality property
156/// the `body_hash` field exists to provide.
157impl PartialEq for Value {
158    fn eq(&self, other: &Self) -> bool {
159        use Value::*;
160        match (self, other) {
161            (Int(a), Int(b)) => a == b,
162            (Float(a), Float(b)) => a == b,
163            (Bool(a), Bool(b)) => a == b,
164            (Str(a), Str(b)) => a == b,
165            (Bytes(a), Bytes(b)) => a == b,
166            (Unit, Unit) => true,
167            (List(a), List(b)) => a == b,
168            (Tuple(a), Tuple(b)) => a == b,
169            (Record(a), Record(b)) => a == b,
170            (Variant { name: an, args: aa }, Variant { name: bn, args: ba }) =>
171                an == bn && aa == ba,
172            (Closure { body_hash: ah, captures: ac, .. },
173             Closure { body_hash: bh, captures: bc, .. }) =>
174                ah == bh && ac == bc,
175            (F64Array { rows: ar, cols: ac, data: ad },
176             F64Array { rows: br, cols: bc, data: bd }) =>
177                ar == br && ac == bc && ad == bd,
178            (Map(a), Map(b)) => a == b,
179            (Set(a), Set(b)) => a == b,
180            (Deque(a), Deque(b)) => a == b,
181            // Actor identity: same if both handles point to the same cell.
182            (Actor(a), Actor(b)) => Arc::ptr_eq(a, b),
183            // Ticker identity: same if both handles point to the same
184            // cancel flag (one ticker spawn → one flag).
185            (Ticker(a), Ticker(b)) => Arc::ptr_eq(a, b),
186            // Arrow table equality: structural over schema + columns.
187            // RecordBatch implements PartialEq directly.
188            (ArrowTable(a), ArrowTable(b)) => a == b,
189            _ => false,
190        }
191    }
192}
193
194/// Hashable, ordered key for `Value::Map` / `Value::Set`. v1
195/// supports `Str` and `Int`; extending to other primitives or to
196/// records is forward-compatible since the type is not exposed
197/// to user code beyond the surface API.
198#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
199pub enum MapKey {
200    Str(String),
201    Int(i64),
202}
203
204impl MapKey {
205    pub fn from_value(v: &Value) -> Result<Self, String> {
206        match v {
207            Value::Str(s) => Ok(MapKey::Str(s.to_string())),
208            Value::Int(n) => Ok(MapKey::Int(*n)),
209            other => Err(format!(
210                "map/set key must be Str or Int, got {other:?}")),
211        }
212    }
213    pub fn into_value(self) -> Value {
214        match self {
215            MapKey::Str(s) => Value::Str(s.into()),
216            MapKey::Int(n) => Value::Int(n),
217        }
218    }
219    pub fn as_value(&self) -> Value {
220        match self {
221            MapKey::Str(s) => Value::Str(s.as_str().into()),
222            MapKey::Int(n) => Value::Int(*n),
223        }
224    }
225}
226
227impl Value {
228    pub fn as_int(&self) -> i64 {
229        match self { Value::Int(n) => *n, other => panic!("expected Int, got {other:?}") }
230    }
231    pub fn as_float(&self) -> f64 {
232        match self { Value::Float(n) => *n, other => panic!("expected Float, got {other:?}") }
233    }
234    pub fn as_bool(&self) -> bool {
235        match self { Value::Bool(b) => *b, other => panic!("expected Bool, got {other:?}") }
236    }
237    pub fn as_str(&self) -> &str {
238        match self { Value::Str(s) => s, other => panic!("expected Str, got {other:?}") }
239    }
240
241    /// Render this `Value` as a `serde_json::Value` for emission to
242    /// CLI output, the agent API, conformance harness reports, etc.
243    /// Canonical mapping shared across crates; previously every
244    /// boundary had its own copy.
245    ///
246    /// Encoding:
247    /// - `Variant { name, args }` → `{"$variant": name, "args": [...]}`
248    /// - `F64Array { ... }` → `{"$f64_array": true, rows, cols, data}`
249    /// - `Closure { body_hash, .. }` → `"<closure HEX8>"` (first 8 hex
250    ///   chars of the body hash; equivalent closures across source
251    ///   locations render identically — see #222)
252    /// - `Bytes` → `{"$bytes": "deadbeef"}` (lowercase hex). Round-trips
253    ///   through `from_json`. Bare hex strings decode as `Str`, so the
254    ///   marker is required to disambiguate bytes from a string that
255    ///   happens to look like hex.
256    /// - `Map` with all-`Str` keys → JSON object; otherwise array of
257    ///   `[key, value]` pairs (Int keys can't be JSON-object keys)
258    /// - `Set` → JSON array of elements
259    /// - other variants → their natural JSON shape
260    ///
261    /// Note: this form is **not** round-trippable for traces (see
262    /// `lex-trace`'s recorder, which uses a richer marker form).
263    pub fn to_json(&self) -> serde_json::Value {
264        use serde_json::Value as J;
265        match self {
266            Value::Int(n) => J::from(*n),
267            Value::Float(f) => J::from(*f),
268            Value::Bool(b) => J::Bool(*b),
269            Value::Str(s) => J::String(s.to_string()),
270            Value::Bytes(b) => {
271                let hex: String = b.iter().map(|b| format!("{:02x}", b)).collect();
272                let mut m = serde_json::Map::new();
273                m.insert("$bytes".into(), J::String(hex));
274                J::Object(m)
275            }
276            Value::Unit => J::Null,
277            Value::List(items) => J::Array(items.iter().map(Value::to_json).collect()),
278            Value::Tuple(items) => J::Array(items.iter().map(Value::to_json).collect()),
279            Value::Record(fields) => {
280                let mut m = serde_json::Map::new();
281                for (k, v) in fields { m.insert(k.clone(), v.to_json()); }
282                J::Object(m)
283            }
284            Value::Variant { name, args } => {
285                let mut m = serde_json::Map::new();
286                m.insert("$variant".into(), J::String(name.clone()));
287                m.insert("args".into(), J::Array(args.iter().map(Value::to_json).collect()));
288                J::Object(m)
289            }
290            Value::Closure { body_hash, .. } => {
291                // Render the first 4 bytes (8 hex chars) of the body
292                // hash. Trace stability follows: equivalent closures
293                // produced from different source locations get the
294                // same string. See #222.
295                let prefix: String = body_hash.iter().take(4)
296                    .map(|b| format!("{b:02x}")).collect();
297                J::String(format!("<closure {prefix}>"))
298            }
299            Value::F64Array { rows, cols, data } => {
300                let mut m = serde_json::Map::new();
301                m.insert("$f64_array".into(), J::Bool(true));
302                m.insert("rows".into(), J::from(*rows));
303                m.insert("cols".into(), J::from(*cols));
304                m.insert("data".into(), J::Array(data.iter().map(|f| J::from(*f)).collect()));
305                J::Object(m)
306            }
307            Value::Map(m) => {
308                let all_str = m.keys().all(|k| matches!(k, MapKey::Str(_)));
309                if all_str {
310                    let mut out = serde_json::Map::new();
311                    for (k, v) in m {
312                        if let MapKey::Str(s) = k {
313                            out.insert(s.clone(), v.to_json());
314                        }
315                    }
316                    J::Object(out)
317                } else {
318                    J::Array(m.iter().map(|(k, v)| {
319                        J::Array(vec![k.as_value().to_json(), v.to_json()])
320                    }).collect())
321                }
322            }
323            Value::Set(s) => J::Array(
324                s.iter().map(|k| k.as_value().to_json()).collect()),
325            Value::Deque(items) => J::Array(items.iter().map(Value::to_json).collect()),
326            Value::Actor(_) => J::String("<actor>".into()),
327            Value::Ticker(_) => J::String("<ticker>".into()),
328            Value::ArrowTable(t) => {
329                // Compact summary: schema + nrows. Full data is intentionally
330                // not emitted — Arrow tables can be GB-scale and a JSON dump
331                // would defeat the point. Callers that need the rows go
332                // through `arrow.row_at` / `arrow.col_to_*_list`.
333                let mut m = serde_json::Map::new();
334                m.insert("$arrow_table".into(), J::Bool(true));
335                m.insert("nrows".into(), J::from(t.num_rows() as i64));
336                m.insert("ncols".into(), J::from(t.num_columns() as i64));
337                let cols: Vec<J> = t
338                    .schema()
339                    .fields()
340                    .iter()
341                    .map(|f| {
342                        let mut o = serde_json::Map::new();
343                        o.insert("name".into(), J::String(f.name().clone()));
344                        o.insert("type".into(), J::String(format!("{}", f.data_type())));
345                        J::Object(o)
346                    })
347                    .collect();
348                m.insert("schema".into(), J::Array(cols));
349                J::Object(m)
350            }
351        }
352    }
353
354    /// Decode a `serde_json::Value` into a `Value`. The inverse of
355    /// [`to_json`](Self::to_json) for the shapes Lex round-trips:
356    ///
357    /// - `{"$variant": "Name", "args": [...]}` → `Value::Variant`
358    /// - `{"$bytes": "deadbeef"}` → `Value::Bytes` (lowercase hex; an
359    ///   odd-length string or non-hex character falls through to
360    ///   `Value::Record`, matching the malformed-`$variant` fallback)
361    /// - JSON object → `Value::Record`
362    /// - JSON array → `Value::List`
363    /// - JSON null → `Value::Unit`
364    /// - JSON string / bool / number → the corresponding scalar
365    ///
366    /// Map, Set, F64Array, and Closure don't round-trip — they decode
367    /// as their natural JSON shape (Object / Array / Object / Str
368    /// respectively), since the CLI / HTTP / VM callers building Values
369    /// from JSON don't have those shapes in their input vocabulary.
370    pub fn from_json(v: &serde_json::Value) -> Value {
371        use serde_json::Value as J;
372        match v {
373            J::Null => Value::Unit,
374            J::Bool(b) => Value::Bool(*b),
375            J::Number(n) => {
376                if let Some(i) = n.as_i64() { Value::Int(i) }
377                else if let Some(f) = n.as_f64() { Value::Float(f) }
378                else { Value::Unit }
379            }
380            J::String(s) => Value::Str(s.as_str().into()),
381            J::Array(items) => Value::List(items.iter().map(Value::from_json).collect::<VecDeque<_>>()),
382            J::Object(map) => {
383                if let (Some(J::String(name)), Some(J::Array(args))) =
384                    (map.get("$variant"), map.get("args"))
385                {
386                    return Value::Variant {
387                        name: name.clone(),
388                        args: args.iter().map(Value::from_json).collect(),
389                    };
390                }
391                if map.len() == 1 {
392                    if let Some(J::String(hex)) = map.get("$bytes") {
393                        if let Some(bytes) = decode_hex(hex) {
394                            return Value::Bytes(bytes);
395                        }
396                    }
397                }
398                let mut out = indexmap::IndexMap::new();
399                for (k, v) in map {
400                    out.insert(k.clone(), Value::from_json(v));
401                }
402                Value::Record(out)
403            }
404        }
405    }
406}
407
408/// Lowercase-hex → bytes. Returns `None` for odd length or non-hex chars
409/// (callers fall through to a record decode rather than erroring).
410fn decode_hex(s: &str) -> Option<Vec<u8>> {
411    if !s.len().is_multiple_of(2) { return None; }
412    let mut out = Vec::with_capacity(s.len() / 2);
413    let bytes = s.as_bytes();
414    for pair in bytes.chunks(2) {
415        let hi = (pair[0] as char).to_digit(16)?;
416        let lo = (pair[1] as char).to_digit(16)?;
417        out.push(((hi << 4) | lo) as u8);
418    }
419    Some(out)
420}