Skip to main content

lex_bytecode/
value.rs

1//! Runtime values.
2
3use crate::program::BodyHash;
4use arrow_array::RecordBatch;
5use indexmap::IndexMap;
6use smol_str::SmolStr;
7use std::collections::{BTreeMap, BTreeSet, VecDeque};
8use std::sync::{Arc, Mutex};
9
10/// Internal state of a `conc.Actor`. Protected by a `Mutex` so that
11/// concurrent callers serialise on message delivery (the actor processes
12/// one message at a time). The `handler` closure is called on the
13/// *calling* VM's thread — no extra OS thread required — which lets it
14/// invoke arbitrary effects (sql, net, …) through the same handler chain.
15#[derive(Debug, Clone)]
16pub struct ActorCell {
17    pub state: Value,
18    pub handler: Value,
19}
20
21#[derive(Debug, Clone)]
22pub enum Value {
23    Int(i64),
24    Float(f64),
25    Bool(bool),
26    /// String value. `SmolStr` stores strings ≤ 22 bytes inline — no heap
27    /// allocation for identifiers, HTTP methods, status codes, short keys, etc.
28    /// Clone of a short `SmolStr` is a 24-byte stack copy (#389 slice 4).
29    Str(SmolStr),
30    Bytes(Vec<u8>),
31    Unit,
32    List(VecDeque<Value>),
33    Tuple(Vec<Value>),
34    Record(IndexMap<String, Value>),
35    Variant { name: String, args: Vec<Value> },
36    /// First-class function value (a lambda + its captured locals). The
37    /// function's first `captures.len()` params bind to `captures`; the
38    /// remaining params are supplied at call time.
39    ///
40    /// `fn_id` is a dense compile-time index into `Program::functions`
41    /// for fast dispatch; `body_hash` is the **canonical identity** —
42    /// two closures with identical bytecode bodies compare equal even
43    /// when their `fn_id`s differ (which they will, when the source
44    /// has the same closure literal at two locations). See `PartialEq`
45    /// below and #222 for the rationale.
46    Closure { fn_id: u32, body_hash: BodyHash, captures: Vec<Value> },
47    /// Dense row-major `f64` matrix. A "fast lane" representation that
48    /// avoids the per-element `Value::Float` boxing of `Value::List`.
49    /// Used by Core's native tensor ops (matmul, dot, …) so end-to-end
50    /// matmul perf hits the §13.7 #1 100ms target without paying for
51    /// 2M Value boxings at the call boundary.
52    F64Array { rows: u32, cols: u32, data: Vec<f64> },
53    /// Persistent map keyed by `MapKey` (`Str` or `Int`). Insertion-
54    /// independent equality (sorted by `BTreeMap`'s `Ord`), so two
55    /// maps built from the same pairs in different orders compare
56    /// equal. Restricting keys to two primitive variants keeps
57    /// `Eq + Hash` requirements off `Value` itself, which has
58    /// closures and floats and can't be hashed soundly.
59    Map(BTreeMap<MapKey, Value>),
60    /// Persistent set with the same key-type discipline as `Map`.
61    Set(BTreeSet<MapKey>),
62    /// Double-ended queue. O(1) push/pop on both ends; otherwise
63    /// behaves like `List` for iteration / equality / JSON shape.
64    /// Lex's type system tracks `Deque[T]` separately from `List[T]`
65    /// so users explicitly opt in to deque semantics; the runtime
66    /// uses this dedicated variant rather than backing a deque on top
67    /// of `Value::List` (which would make `push_front` O(n)).
68    Deque(VecDeque<Value>),
69    /// A handle to a `conc.Actor`. The `Arc<Mutex<ActorCell>>` allows
70    /// cheap cloning and safe concurrent access — the mutex serialises
71    /// message delivery so the actor processes one message at a time.
72    /// Two actor handles compare equal iff they point to the same cell
73    /// (identity equality, not structural equality).
74    Actor(Arc<Mutex<ActorCell>>),
75    /// Apache Arrow `RecordBatch` — an unboxed columnar table. The
76    /// "fast lane" representation for `lex-frame` and any future
77    /// dataframe code: a `Value::ArrowTable` with one int64 column
78    /// of N rows is N×8 bytes of contiguous memory, not N
79    /// `Value::Int(_)` enum tags inside a `VecDeque`. Reductions
80    /// (`arrow.col_sum_int`, `arrow.col_mean`, …) execute as one
81    /// Rust call over the flat buffer, bypassing the bytecode VM
82    /// for the inner loop.
83    ///
84    /// `Arc` makes clone cheap (refcount bump) — Arrow tables are
85    /// already immutable so structural sharing across closures is
86    /// safe. Equality is structural over schema + columns.
87    ArrowTable(Arc<RecordBatch>),
88}
89
90/// Manual `PartialEq` for `Value` (#222). Mirrors the auto-derived
91/// implementation for every variant *except* `Closure`, which compares
92/// on `(body_hash, captures)` only — `fn_id` is a dense compile-time
93/// index that is not stable across source-location-equivalent closure
94/// literals, and including it would defeat the canonicality property
95/// the `body_hash` field exists to provide.
96impl PartialEq for Value {
97    fn eq(&self, other: &Self) -> bool {
98        use Value::*;
99        match (self, other) {
100            (Int(a), Int(b)) => a == b,
101            (Float(a), Float(b)) => a == b,
102            (Bool(a), Bool(b)) => a == b,
103            (Str(a), Str(b)) => a == b,
104            (Bytes(a), Bytes(b)) => a == b,
105            (Unit, Unit) => true,
106            (List(a), List(b)) => a == b,
107            (Tuple(a), Tuple(b)) => a == b,
108            (Record(a), Record(b)) => a == b,
109            (Variant { name: an, args: aa }, Variant { name: bn, args: ba }) =>
110                an == bn && aa == ba,
111            (Closure { body_hash: ah, captures: ac, .. },
112             Closure { body_hash: bh, captures: bc, .. }) =>
113                ah == bh && ac == bc,
114            (F64Array { rows: ar, cols: ac, data: ad },
115             F64Array { rows: br, cols: bc, data: bd }) =>
116                ar == br && ac == bc && ad == bd,
117            (Map(a), Map(b)) => a == b,
118            (Set(a), Set(b)) => a == b,
119            (Deque(a), Deque(b)) => a == b,
120            // Actor identity: same if both handles point to the same cell.
121            (Actor(a), Actor(b)) => Arc::ptr_eq(a, b),
122            // Arrow table equality: structural over schema + columns.
123            // RecordBatch implements PartialEq directly.
124            (ArrowTable(a), ArrowTable(b)) => a == b,
125            _ => false,
126        }
127    }
128}
129
130/// Hashable, ordered key for `Value::Map` / `Value::Set`. v1
131/// supports `Str` and `Int`; extending to other primitives or to
132/// records is forward-compatible since the type is not exposed
133/// to user code beyond the surface API.
134#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
135pub enum MapKey {
136    Str(String),
137    Int(i64),
138}
139
140impl MapKey {
141    pub fn from_value(v: &Value) -> Result<Self, String> {
142        match v {
143            Value::Str(s) => Ok(MapKey::Str(s.to_string())),
144            Value::Int(n) => Ok(MapKey::Int(*n)),
145            other => Err(format!(
146                "map/set key must be Str or Int, got {other:?}")),
147        }
148    }
149    pub fn into_value(self) -> Value {
150        match self {
151            MapKey::Str(s) => Value::Str(s.into()),
152            MapKey::Int(n) => Value::Int(n),
153        }
154    }
155    pub fn as_value(&self) -> Value {
156        match self {
157            MapKey::Str(s) => Value::Str(s.as_str().into()),
158            MapKey::Int(n) => Value::Int(*n),
159        }
160    }
161}
162
163impl Value {
164    pub fn as_int(&self) -> i64 {
165        match self { Value::Int(n) => *n, other => panic!("expected Int, got {other:?}") }
166    }
167    pub fn as_float(&self) -> f64 {
168        match self { Value::Float(n) => *n, other => panic!("expected Float, got {other:?}") }
169    }
170    pub fn as_bool(&self) -> bool {
171        match self { Value::Bool(b) => *b, other => panic!("expected Bool, got {other:?}") }
172    }
173    pub fn as_str(&self) -> &str {
174        match self { Value::Str(s) => s, other => panic!("expected Str, got {other:?}") }
175    }
176
177    /// Render this `Value` as a `serde_json::Value` for emission to
178    /// CLI output, the agent API, conformance harness reports, etc.
179    /// Canonical mapping shared across crates; previously every
180    /// boundary had its own copy.
181    ///
182    /// Encoding:
183    /// - `Variant { name, args }` → `{"$variant": name, "args": [...]}`
184    /// - `F64Array { ... }` → `{"$f64_array": true, rows, cols, data}`
185    /// - `Closure { body_hash, .. }` → `"<closure HEX8>"` (first 8 hex
186    ///   chars of the body hash; equivalent closures across source
187    ///   locations render identically — see #222)
188    /// - `Bytes` → `{"$bytes": "deadbeef"}` (lowercase hex). Round-trips
189    ///   through `from_json`. Bare hex strings decode as `Str`, so the
190    ///   marker is required to disambiguate bytes from a string that
191    ///   happens to look like hex.
192    /// - `Map` with all-`Str` keys → JSON object; otherwise array of
193    ///   `[key, value]` pairs (Int keys can't be JSON-object keys)
194    /// - `Set` → JSON array of elements
195    /// - other variants → their natural JSON shape
196    ///
197    /// Note: this form is **not** round-trippable for traces (see
198    /// `lex-trace`'s recorder, which uses a richer marker form).
199    pub fn to_json(&self) -> serde_json::Value {
200        use serde_json::Value as J;
201        match self {
202            Value::Int(n) => J::from(*n),
203            Value::Float(f) => J::from(*f),
204            Value::Bool(b) => J::Bool(*b),
205            Value::Str(s) => J::String(s.to_string()),
206            Value::Bytes(b) => {
207                let hex: String = b.iter().map(|b| format!("{:02x}", b)).collect();
208                let mut m = serde_json::Map::new();
209                m.insert("$bytes".into(), J::String(hex));
210                J::Object(m)
211            }
212            Value::Unit => J::Null,
213            Value::List(items) => J::Array(items.iter().map(Value::to_json).collect()),
214            Value::Tuple(items) => J::Array(items.iter().map(Value::to_json).collect()),
215            Value::Record(fields) => {
216                let mut m = serde_json::Map::new();
217                for (k, v) in fields { m.insert(k.clone(), v.to_json()); }
218                J::Object(m)
219            }
220            Value::Variant { name, args } => {
221                let mut m = serde_json::Map::new();
222                m.insert("$variant".into(), J::String(name.clone()));
223                m.insert("args".into(), J::Array(args.iter().map(Value::to_json).collect()));
224                J::Object(m)
225            }
226            Value::Closure { body_hash, .. } => {
227                // Render the first 4 bytes (8 hex chars) of the body
228                // hash. Trace stability follows: equivalent closures
229                // produced from different source locations get the
230                // same string. See #222.
231                let prefix: String = body_hash.iter().take(4)
232                    .map(|b| format!("{b:02x}")).collect();
233                J::String(format!("<closure {prefix}>"))
234            }
235            Value::F64Array { rows, cols, data } => {
236                let mut m = serde_json::Map::new();
237                m.insert("$f64_array".into(), J::Bool(true));
238                m.insert("rows".into(), J::from(*rows));
239                m.insert("cols".into(), J::from(*cols));
240                m.insert("data".into(), J::Array(data.iter().map(|f| J::from(*f)).collect()));
241                J::Object(m)
242            }
243            Value::Map(m) => {
244                let all_str = m.keys().all(|k| matches!(k, MapKey::Str(_)));
245                if all_str {
246                    let mut out = serde_json::Map::new();
247                    for (k, v) in m {
248                        if let MapKey::Str(s) = k {
249                            out.insert(s.clone(), v.to_json());
250                        }
251                    }
252                    J::Object(out)
253                } else {
254                    J::Array(m.iter().map(|(k, v)| {
255                        J::Array(vec![k.as_value().to_json(), v.to_json()])
256                    }).collect())
257                }
258            }
259            Value::Set(s) => J::Array(
260                s.iter().map(|k| k.as_value().to_json()).collect()),
261            Value::Deque(items) => J::Array(items.iter().map(Value::to_json).collect()),
262            Value::Actor(_) => J::String("<actor>".into()),
263            Value::ArrowTable(t) => {
264                // Compact summary: schema + nrows. Full data is intentionally
265                // not emitted — Arrow tables can be GB-scale and a JSON dump
266                // would defeat the point. Callers that need the rows go
267                // through `arrow.row_at` / `arrow.col_to_*_list`.
268                let mut m = serde_json::Map::new();
269                m.insert("$arrow_table".into(), J::Bool(true));
270                m.insert("nrows".into(), J::from(t.num_rows() as i64));
271                m.insert("ncols".into(), J::from(t.num_columns() as i64));
272                let cols: Vec<J> = t
273                    .schema()
274                    .fields()
275                    .iter()
276                    .map(|f| {
277                        let mut o = serde_json::Map::new();
278                        o.insert("name".into(), J::String(f.name().clone()));
279                        o.insert("type".into(), J::String(format!("{}", f.data_type())));
280                        J::Object(o)
281                    })
282                    .collect();
283                m.insert("schema".into(), J::Array(cols));
284                J::Object(m)
285            }
286        }
287    }
288
289    /// Decode a `serde_json::Value` into a `Value`. The inverse of
290    /// [`to_json`](Self::to_json) for the shapes Lex round-trips:
291    ///
292    /// - `{"$variant": "Name", "args": [...]}` → `Value::Variant`
293    /// - `{"$bytes": "deadbeef"}` → `Value::Bytes` (lowercase hex; an
294    ///   odd-length string or non-hex character falls through to
295    ///   `Value::Record`, matching the malformed-`$variant` fallback)
296    /// - JSON object → `Value::Record`
297    /// - JSON array → `Value::List`
298    /// - JSON null → `Value::Unit`
299    /// - JSON string / bool / number → the corresponding scalar
300    ///
301    /// Map, Set, F64Array, and Closure don't round-trip — they decode
302    /// as their natural JSON shape (Object / Array / Object / Str
303    /// respectively), since the CLI / HTTP / VM callers building Values
304    /// from JSON don't have those shapes in their input vocabulary.
305    pub fn from_json(v: &serde_json::Value) -> Value {
306        use serde_json::Value as J;
307        match v {
308            J::Null => Value::Unit,
309            J::Bool(b) => Value::Bool(*b),
310            J::Number(n) => {
311                if let Some(i) = n.as_i64() { Value::Int(i) }
312                else if let Some(f) = n.as_f64() { Value::Float(f) }
313                else { Value::Unit }
314            }
315            J::String(s) => Value::Str(s.as_str().into()),
316            J::Array(items) => Value::List(items.iter().map(Value::from_json).collect::<VecDeque<_>>()),
317            J::Object(map) => {
318                if let (Some(J::String(name)), Some(J::Array(args))) =
319                    (map.get("$variant"), map.get("args"))
320                {
321                    return Value::Variant {
322                        name: name.clone(),
323                        args: args.iter().map(Value::from_json).collect(),
324                    };
325                }
326                if map.len() == 1 {
327                    if let Some(J::String(hex)) = map.get("$bytes") {
328                        if let Some(bytes) = decode_hex(hex) {
329                            return Value::Bytes(bytes);
330                        }
331                    }
332                }
333                let mut out = indexmap::IndexMap::new();
334                for (k, v) in map {
335                    out.insert(k.clone(), Value::from_json(v));
336                }
337                Value::Record(out)
338            }
339        }
340    }
341}
342
343/// Lowercase-hex → bytes. Returns `None` for odd length or non-hex chars
344/// (callers fall through to a record decode rather than erroring).
345fn decode_hex(s: &str) -> Option<Vec<u8>> {
346    if !s.len().is_multiple_of(2) { return None; }
347    let mut out = Vec::with_capacity(s.len() / 2);
348    let bytes = s.as_bytes();
349    for pair in bytes.chunks(2) {
350        let hi = (pair[0] as char).to_digit(16)?;
351        let lo = (pair[1] as char).to_digit(16)?;
352        out.push(((hi << 4) | lo) as u8);
353    }
354    Some(out)
355}