Skip to main content

cairn_core/
node.rs

1//! The content-addressed AST node model.
2//!
3//! A node references its children by their content hash, so a node's own hash
4//! covers the hashes of everything beneath it — the Merkle property the store
5//! relies on (`docs/design.md` Section 7). Two structurally identical subtrees
6//! therefore have the same hash and are stored once.
7//!
8//! This is the v0.1 seed model. It now carries the Section 4 function contract
9//! (`given`/`produces`/`requires`/`on_failure`) so the checker can enforce it.
10//! A signature is part of a function's identity, so `Param`/`Produces` are
11//! inlined into the `Function` node rather than being separate nodes. Record
12//! and variant *definitions*, generics, and operators are later slices.
13
14use crate::ty::{Confidence, Effect, Type};
15use serde::{Deserialize, Serialize};
16use sha2::{Digest, Sha256};
17use std::collections::BTreeSet;
18use std::fmt;
19
20/// A content hash: the lowercase hex SHA-256 of a node's canonical bytes.
21#[derive(Clone, PartialEq, Eq, Hash, Debug, Serialize, Deserialize)]
22pub struct NodeHash(String);
23
24impl NodeHash {
25    pub fn as_str(&self) -> &str {
26        &self.0
27    }
28
29    /// Wrap an existing hash string. Used by the store when reading a ref back;
30    /// not for constructing arbitrary hashes in normal code.
31    pub(crate) fn from_raw(s: String) -> Self {
32        NodeHash(s)
33    }
34
35    /// Reconstruct a hash identifier from its string form — for transports
36    /// (MCP, CLI) that received it from a previous call. The string is treated
37    /// as an opaque identifier; its content is not validated.
38    pub fn parse(s: &str) -> Self {
39        NodeHash(s.to_string())
40    }
41}
42
43impl fmt::Display for NodeHash {
44    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45        write!(f, "{}", self.0)
46    }
47}
48
49/// One declared input: its name, type, and the minimum confidence a caller
50/// must supply (the `given` clause).
51#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
52pub struct Param {
53    pub name: String,
54    pub ty: Type,
55    pub min_confidence: Confidence,
56}
57
58/// The `produces` clause: the output type and the confidence it carries.
59#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
60pub struct Produces {
61    pub ty: Type,
62    pub confidence: Confidence,
63}
64
65/// A binary operator. Arithmetic operands and result are `Number`;
66/// comparison operands share a type and the result is `Bool`; logical
67/// operands and result are `Bool` (short-circuit). Deliberately small
68/// (Principle 9): one canonical operator per operation.
69#[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize, Deserialize)]
70pub enum BinOp {
71    Add,
72    Sub,
73    Mul,
74    Div,
75    /// Signed remainder (`a % b`); arithmetic, both operands `Number`.
76    Mod,
77    Eq,
78    /// `a != b` — the negation of `Eq`, same operand rule.
79    Neq,
80    Lt,
81    /// `a <= b`.
82    Le,
83    /// `a > b`.
84    Gt,
85    /// `a >= b`.
86    Ge,
87    /// Short-circuit boolean conjunction; both operands `Bool`.
88    And,
89    /// Short-circuit boolean disjunction; both operands `Bool`.
90    Or,
91}
92
93impl BinOp {
94    /// Comparisons yield `Bool` over two same-typed operands.
95    pub fn is_comparison(self) -> bool {
96        use BinOp::*;
97        matches!(self, Eq | Neq | Lt | Le | Gt | Ge)
98    }
99
100    /// Logical operators take and yield `Bool` and short-circuit.
101    pub fn is_logical(self) -> bool {
102        matches!(self, BinOp::And | BinOp::Or)
103    }
104
105    /// The Section 5 infix symbol.
106    pub fn symbol(self) -> &'static str {
107        match self {
108            BinOp::Add => "+",
109            BinOp::Sub => "-",
110            BinOp::Mul => "*",
111            BinOp::Div => "/",
112            BinOp::Mod => "%",
113            BinOp::Eq => "==",
114            BinOp::Neq => "!=",
115            BinOp::Lt => "<",
116            BinOp::Le => "<=",
117            BinOp::Gt => ">",
118            BinOp::Ge => ">=",
119            BinOp::And => "&&",
120            BinOp::Or => "||",
121        }
122    }
123}
124
125/// One arm of a `match`: a variant case, names bound to its payload fields
126/// (in declaration order), and the body evaluated when that case matches.
127#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
128pub struct MatchArm {
129    pub case: String,
130    pub bindings: Vec<String>,
131    pub body: NodeHash,
132}
133
134/// A stored AST node. Compound variants reference children by [`NodeHash`].
135#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
136pub enum Node {
137    /// An integer literal. (A single unified numeric type is the language
138    /// direction; the seed model only needs a concrete numeric leaf.)
139    Lit(i64),
140    /// An IEEE-754 double, stored as its bit pattern so the node stays
141    /// `Eq`/hashable and content-addresses deterministically. The uniform
142    /// i64 slot *is* these bits; only `FloatOp`/conversions reinterpret.
143    FloatLit(u64),
144    /// Arithmetic or comparison on two `Float`s. `op` is restricted to
145    /// `+ - * /` and `== < <= > >=` (no `% != && ||` on Float).
146    FloatOp {
147        op: BinOp,
148        lhs: NodeHash,
149        rhs: NodeHash,
150    },
151    /// `Number` → `Float`.
152    IntToFloat(NodeHash),
153    /// `Float` → `Number`: truncates toward zero; traps on NaN/overflow
154    /// (a clean trap, not silent UB — like the bounds checks).
155    FloatToInt(NodeHash),
156    /// A `Decimal` literal: the value pre-scaled by 10_000 (so `1.25` is
157    /// `12500`). Exact; no rounding at use.
158    DecimalLit(i64),
159    /// Arithmetic or comparison on two `Decimal`s. `+ -` are plain i64;
160    /// `*` is `(a*b)/10000`, `/` is `(a*10000)/b` (rescaling). All six
161    /// comparisons are exact. `% && ||` are rejected by the checker.
162    DecimalOp {
163        op: BinOp,
164        lhs: NodeHash,
165        rhs: NodeHash,
166    },
167    /// `Number` → `Decimal` (multiplies by 10_000).
168    IntToDecimal(NodeHash),
169    /// `Decimal` → `Number` (integer part; truncates toward zero).
170    DecimalToInt(NodeHash),
171    /// `Decimal` → `Number`: the raw scaled mantissa (the value ×10_000,
172    /// `Decimal`'s documented representation). Identity at runtime; the
173    /// seam that lets `decimal_to_str` be written in Cairn.
174    DecimalRaw(NodeHash),
175    /// A boolean literal. Bool was previously only producible by a
176    /// comparison; the literal makes `Bool` a first-class value the
177    /// logical operators can stand on.
178    Bool(bool),
179    /// Logical negation of a `Bool`, yielding `Bool`. The one unary
180    /// operator (Principle 9); `&&`/`||` are the binary `BinOp`s.
181    Not(NodeHash),
182    /// A UTF-8 string literal.
183    Str(String),
184    /// Length (in bytes) of a string. The first string operation; more
185    /// follow with the stdlib.
186    StrLen(NodeHash),
187    /// ASCII-lowercase a string (`A`–`Z` → `a`–`z`, other bytes
188    /// unchanged). The minimal primitive for case-insensitive
189    /// matching — needed because some platforms (e.g. Cloudflare)
190    /// lowercase HTTP header names, so the `header`/`cookie` lookup
191    /// cannot be case-sensitive (design.md §9).
192    StrLower(NodeHash),
193    /// A one-byte string from a code point 0–255 (the low byte of the
194    /// Number; the inverse of indexing a byte out). The minimal
195    /// primitive for percent-decoding — added when a real HTML form
196    /// (the shipped blog's create) forced `form_value` to URL-decode
197    /// `%XX`/`+`, which is otherwise inexpressible in pure Cairn
198    /// (design.md §9; Principle 10 — a real need, not speculation).
199    StrFromCode(NodeHash),
200    /// `a ++ b` — concatenate two strings.
201    StrConcat(NodeHash, NodeHash),
202    /// Byte substring `s[start .. start+len]`. Bounds-checked: a
203    /// negative index/length or `start+len > len(s)` traps (not silent
204    /// UB), proven by `out_of_bounds_list_get_and_str_slice_trap`.
205    /// **Byte-indexed by design** (decided, not a gap): a Cairn `String`
206    /// is byte-addressable for predictable, AI-author-friendly indexing;
207    /// codepoint-aware slicing is a stdlib concern, not a core operator
208    /// (Principle 9). Slicing across a UTF-8 boundary therefore yields
209    /// bytes the host renders lossily — the documented, intended
210    /// semantics of a byte slice.
211    StrSlice {
212        s: NodeHash,
213        start: NodeHash,
214        len: NodeHash,
215    },
216    /// Content equality of two strings, yielding `Bool`.
217    StrEq(NodeHash, NodeHash),
218    /// Whether `needle` occurs in `haystack`, yielding `Bool`.
219    StrContains {
220        haystack: NodeHash,
221        needle: NodeHash,
222    },
223    /// Whether `s` begins with `prefix`, yielding `Bool`. With
224    /// `StrSlice`/`StrLen` this is enough for prefix routing and
225    /// path-parameter extraction (e.g. `/customers/{id}`).
226    StrStartsWith {
227        s: NodeHash,
228        prefix: NodeHash,
229    },
230    /// 0-based byte index of the first occurrence of `needle` in
231    /// `haystack`, or `-1` if absent (an empty needle is at 0). Yields
232    /// `Number`. With `StrSlice` this is enough to split a delimited
233    /// string — e.g. parsing `db_query` rows into typed records.
234    StrIndexOf {
235        haystack: NodeHash,
236        needle: NodeHash,
237    },
238    /// Decimal rendering of a `Number` as a `String` (e.g. for HTML
239    /// output). Correct over the full i64 range, including `i64::MIN`
240    /// (rendered in the non-positive domain — no magnitude overflow).
241    NumberToStr(NodeHash),
242    /// Parse a `String` to a `Number`, leniently: an optional leading `-`
243    /// then the leading decimal digits; a non-numeric/empty string yields
244    /// 0. The unchecked fast path (cf. `StrToNumberOpt`).
245    StrToNumber(NodeHash),
246    /// Checked parse: `Some(n)` only if the whole string is a valid
247    /// integer (optional `-` then ≥1 digits, nothing else), else `None`.
248    /// The handle-able counterpart to `StrToNumber`, paralleling
249    /// `ListGet`/`ListTryGet`.
250    StrToNumberOpt(NodeHash),
251    /// Read the clock (milliseconds). The first effectful primitive: it
252    /// performs the `Time` effect and yields a `Number` at `external`
253    /// confidence (the value comes from outside the program). Lowers to a
254    /// host import; other effects follow the same pattern.
255    Now,
256    /// A non-empty list literal. Elements share a type `T`; the list's type
257    /// is `List<T>`. (Empty literals need a type annotation — unsupported in
258    /// v0.3.)
259    List(Vec<NodeHash>),
260    /// The empty `List<elem>`. The typed counterpart to a `List` literal,
261    /// needed because the element type can't be inferred with no elements.
262    /// With `ListCons` this lets a recursive function build a list of
263    /// runtime length.
264    ListEmpty { elem: Type },
265    /// Prepend `head` to `tail` (a `List<T>`), yielding a new `List<T>`.
266    /// v0.3: an O(n) fresh-array copy — immutable, no structural sharing,
267    /// consistent with the bump allocator's documented simplicity.
268    ListCons { head: NodeHash, tail: NodeHash },
269    /// `Some(value)` — a present `Option<T>` where `T` is the value's
270    /// type. The typed result for operations that may have no answer.
271    OptionSome(NodeHash),
272    /// `None : Option<elem>` — the absent case (element type can't be
273    /// inferred with no value, like `ListEmpty`).
274    OptionNone { elem: Type },
275    /// Eliminate an `Option<T>`: the contained value if `Some`, else
276    /// `default`. Both arms share `T`, the expression's type. This is the
277    /// noise-free recovery path — no `on_failure` threading.
278    OptionElse { opt: NodeHash, default: NodeHash },
279    /// Case analysis over an `Option<T>`: bind the payload to
280    /// `some_bind` in `some_body`, else evaluate `none_body`. Both bodies
281    /// share a type (the expression's type). Unlike `OptionElse` (value
282    /// or default) this runs different logic per case — the construct
283    /// that makes a returned `Option` usable for control flow.
284    OptionMatch {
285        opt: NodeHash,
286        some_bind: String,
287        some_body: NodeHash,
288        none_body: NodeHash,
289    },
290    /// Checked indexing: `Some(elem)` if `index` is in bounds, else
291    /// `None`. The handle-able counterpart to `ListGet` (which traps);
292    /// neither pollutes signatures with a failure.
293    ListTryGet { list: NodeHash, index: NodeHash },
294    /// Number of elements in a list.
295    ListLen(NodeHash),
296    /// Element of a list at a (0-based `Number`) index. No runtime bounds
297    /// check in v0.3 (a known simplification, like the bump allocator).
298    ListGet { list: NodeHash, index: NodeHash },
299    /// A non-empty map literal of key/value pairs. Keys share a type `K`,
300    /// values a type `V`; the type is `Map<K, V>`.
301    Map(Vec<(NodeHash, NodeHash)>),
302    /// Value for a key (linear scan). Key equality is i64/identity —
303    /// **exact for `Number` keys, which is `Map`'s decided domain**
304    /// (Principle 9: one canonical form). A missing key yields 0;
305    /// `MapTryGet` is the handle-able form. Structural keying of any
306    /// type (incl. `String`) is *not* a second mechanism here — it is
307    /// `find` over a `List` of key/value records (stdlib, first-class
308    /// functions), which is structural for any `K`. Decided, not
309    /// deferred: adding key-kind dispatch to `Map` would be the
310    /// speculative generality Principle 10 forbids when `find` already
311    /// covers it (see `string_keyed_lookup_is_find_over_a_list`).
312    MapGet { map: NodeHash, key: NodeHash },
313    /// Checked lookup: `Some(V)` for the first matching key, else
314    /// `None` — the handle-able counterpart to `MapGet` (which yields 0
315    /// on a miss, indistinguishable from a real 0). Same decided
316    /// Number/identity key domain as `MapGet`.
317    MapTryGet { map: NodeHash, key: NodeHash },
318    /// Number of entries in a map.
319    MapLen(NodeHash),
320    /// Observe a value for diagnostics (the `Log` effect), passing it
321    /// through unchanged (same type and confidence as the argument). Lowers
322    /// to a `host::log` import.
323    Log(NodeHash),
324    /// Announce that `topic` (a `String`) changed — the `Live` effect
325    /// (design.md §10). The live runtime re-renders and pushes to every
326    /// connection subscribed to that topic. Yields `Number` (0). Lowers
327    /// to a `host::publish` import. Liveness is *this*, explicit and
328    /// effect-typed — never an implicit default.
329    Publish(NodeHash),
330    /// Emit an extra HTTP response header — the `Resp` effect. `name`
331    /// and `value` are `String`s (e.g. `"Set-Cookie"`,
332    /// `"sid=abc; HttpOnly; Path=/"`); the host buffers it per request
333    /// and writes it after the standard headers. Yields `Number` (0),
334    /// so it sequences in a step like `publish`. Lowers to a
335    /// `host::set_header` import. Response headers are *this*, explicit
336    /// and effect-typed — not a hidden `Response` field.
337    SetHeader { name: NodeHash, value: NodeHash },
338    /// Draw a random `Number` (the `Rand` effect), at `external` confidence
339    /// (it comes from outside the program). Lowers to a `host::rand` import.
340    Rand,
341    /// Allocate a mutable cell holding `value` (the `Mut` effect). Type is
342    /// `Cell<T>` where `T` is the value's type.
343    MutNew(NodeHash),
344    /// Read a mutable cell's current value.
345    MutGet(NodeHash),
346    /// Write `value` into a cell (the `Mut` effect); passes `value` through.
347    MutSet { cell: NodeHash, value: NodeHash },
348    /// Write a `String` to a path (the `Disk` effect); returns the number of
349    /// bytes written.
350    DiskWrite { path: NodeHash, content: NodeHash },
351    /// Read the file at a `String` path (the `Disk` effect), returning its
352    /// contents as a `String` (host→wasm allocation). A missing file yields
353    /// the empty string in v0.3.
354    DiskRead(NodeHash),
355    /// HTTP(S) GET a `String` URL (the `Net` effect), returning the
356    /// HTTP status as a `Number` (`-1` on transport error). Backed by a
357    /// real `ureq` + TLS client — no stub path exists (proven by
358    /// `real_net_get_returns_the_http_status`).
359    NetGet(NodeHash),
360    /// Run a SQL statement (the `Db` effect), returning the result as a
361    /// `String` at `persisted` confidence (read back from the system of
362    /// record). `params` is a `List<String>` bound to the statement's `?`
363    /// placeholders by the driver — the *only* query form, so user input
364    /// never reaches SQL by concatenation (use an empty list for static
365    /// SQL). Backed by **real embedded SQLite only** (rusqlite —
366    /// in-memory by default, a file when a path is given). There is **no
367    /// canned/stub path**: every query hits the engine (`wasm::run_sql`;
368    /// the v0.3 removal of the old canned path is complete).
369    DbQuery {
370        sql: NodeHash,
371        params: NodeHash,
372    },
373    /// A reference to an in-scope binding, by name.
374    Ref(String),
375    /// A call: a callee function name plus argument nodes.
376    Call { func: String, args: Vec<NodeHash> },
377    /// A named module function used as a first-class value. Its type is the
378    /// function's signature as a `Fn`. Anonymous lambdas with transitive
379    /// closure capture are **shipped** (K2 — see `Lambda`/`CallValue`; the
380    /// stdlib `map`/`filter`/`fold` and every app pass closures). `FuncRef`
381    /// is the named-function value form; `Lambda` is the anonymous/closure
382    /// form — both lower through the funcref table + `call_indirect`.
383    FuncRef(String),
384    /// Apply a function *value* to arguments. `callee` evaluates to a
385    /// `Fn`; `args` are checked against its parameter types and its
386    /// effects union into the caller. Lowers to `call_indirect`.
387    CallValue {
388        callee: NodeHash,
389        args: Vec<NodeHash>,
390    },
391    /// An anonymous closure. `params` are its inputs (typed, like a
392    /// function's); `body` is a single result expression. Names it
393    /// references that are neither its params nor module functions are
394    /// *captured* by value from the enclosing scope at creation. Its type
395    /// is a `Fn`; its body's effects live in that `Fn` (a closure's
396    /// effects fire when it is *called*, not when it is made). v0.4: a
397    /// single-expression body and no uncaught `Fail` (documented — write a
398    /// named function and `FuncRef` it for step bodies or failures).
399    Lambda {
400        params: Vec<Param>,
401        body: NodeHash,
402    },
403    /// A typed hole: an unfilled position carrying what it expects.
404    Hole { expects: String },
405    /// A binary operation over two sub-expressions.
406    BinOp {
407        op: BinOp,
408        lhs: NodeHash,
409        rhs: NodeHash,
410    },
411    /// Raise a typed failure, short-circuiting the reasoning chain. Its type
412    /// is `Never`. The named variant must be covered by the enclosing
413    /// function's `on_failure` unless a `Handle` catches it.
414    Fail(String),
415    /// Run `body`; if it raises one of the handled failure variants, evaluate
416    /// that handler's `recover` expression instead. A handled failure does not
417    /// propagate. `handlers` maps a failure-variant name to its recovery
418    /// expression (which yields the value, or itself `fail`s).
419    Handle {
420        body: NodeHash,
421        handlers: Vec<(String, NodeHash)>,
422    },
423    /// A conditional expression. `cond` must be `Bool`; the two branches
424    /// share a type, which is the expression's type. There is no statement
425    /// form — Cairn has expressions and reasoning-chain steps, not imperative
426    /// control flow.
427    If {
428        cond: NodeHash,
429        then_branch: NodeHash,
430        else_branch: NodeHash,
431    },
432    /// A reasoning-chain step: `binding = value`.
433    Step { binding: String, value: NodeHash },
434    /// A function: the full Section 4 contract plus an ordered body and a
435    /// result expression.
436    Function {
437        name: String,
438        /// Declared generic type parameters (e.g. `["T"]`). Empty = monomorphic.
439        type_params: Vec<String>,
440        params: Vec<Param>,
441        produces: Produces,
442        requires: BTreeSet<Effect>,
443        on_failure: Vec<String>,
444        body: Vec<NodeHash>,
445        result: NodeHash,
446    },
447    /// A module: named type definitions and function definitions, referenced
448    /// by hash. The unit a checker resolves calls and type names within.
449    Module {
450        name: String,
451        types: Vec<NodeHash>,
452        functions: Vec<NodeHash>,
453    },
454    /// A record (product) type definition: named fields, each with a type.
455    RecordDef {
456        name: String,
457        fields: Vec<(String, Type)>,
458    },
459    /// Construct a record value of a named record type.
460    Record {
461        type_name: String,
462        fields: Vec<(String, NodeHash)>,
463    },
464    /// Access a field of a record value. `type_name` is the record type the
465    /// base must have; it makes the node self-describing for type-directed
466    /// lowering (the checker verifies it; the projection elides it).
467    Field {
468        base: NodeHash,
469        type_name: String,
470        field: String,
471    },
472    /// A variant (sum) type definition: named cases, each with named payload
473    /// fields (an empty payload is a nullary case).
474    VariantDef {
475        name: String,
476        cases: Vec<(String, Vec<(String, Type)>)>,
477    },
478    /// Construct a value of a named variant type, in one of its cases.
479    Variant {
480        type_name: String,
481        case: String,
482        fields: Vec<(String, NodeHash)>,
483    },
484    /// Case analysis over a variant value. Must be exhaustive. `type_name`
485    /// is the scrutinee's variant type (self-describing for type-directed
486    /// lowering; the checker verifies it, the projection elides it).
487    Match {
488        scrutinee: NodeHash,
489        type_name: String,
490        arms: Vec<MatchArm>,
491    },
492}
493
494impl Node {
495    /// Canonical bytes used for both hashing and storage.
496    ///
497    /// v0.1 uses `serde_json`; struct and enum field order is stable and the
498    /// effect set is a `BTreeSet`, so the bytes are deterministic. A canonical
499    /// binary encoding is a later refinement.
500    pub(crate) fn canonical_bytes(&self) -> Vec<u8> {
501        serde_json::to_vec(self).expect("Node serialization is infallible")
502    }
503
504    /// This node's content hash. Because children are referenced by hash, this
505    /// transitively covers the entire subtree (the Merkle property).
506    pub fn hash(&self) -> NodeHash {
507        let mut hasher = Sha256::new();
508        hasher.update(self.canonical_bytes());
509        NodeHash(hex(&hasher.finalize()))
510    }
511}
512
513fn hex(bytes: &[u8]) -> String {
514    use std::fmt::Write;
515    let mut s = String::with_capacity(bytes.len() * 2);
516    for b in bytes {
517        write!(s, "{:02x}", b).expect("writing to a String cannot fail");
518    }
519    s
520}