cairn_core/node.rs
1//! The content-addressed AST node model.
2//!
3//! A node references its children by their content hash, so a node's own hash
4//! covers the hashes of everything beneath it — the Merkle property the store
5//! relies on (`docs/design.md` Section 7). Two structurally identical subtrees
6//! therefore have the same hash and are stored once.
7//!
8//! This is the v0.1 seed model. It now carries the Section 4 function contract
9//! (`given`/`produces`/`requires`/`on_failure`) so the checker can enforce it.
10//! A signature is part of a function's identity, so `Param`/`Produces` are
11//! inlined into the `Function` node rather than being separate nodes. Record
12//! and variant *definitions*, generics, and operators are later slices.
13
14use crate::ty::{Confidence, Effect, Type};
15use serde::{Deserialize, Serialize};
16use sha2::{Digest, Sha256};
17use std::collections::BTreeSet;
18use std::fmt;
19
20/// A content hash: the lowercase hex SHA-256 of a node's canonical bytes.
21#[derive(Clone, PartialEq, Eq, Hash, Debug, Serialize, Deserialize)]
22pub struct NodeHash(String);
23
24impl NodeHash {
25 pub fn as_str(&self) -> &str {
26 &self.0
27 }
28
29 /// Wrap an existing hash string. Used by the store when reading a ref back;
30 /// not for constructing arbitrary hashes in normal code.
31 pub(crate) fn from_raw(s: String) -> Self {
32 NodeHash(s)
33 }
34
35 /// Reconstruct a hash identifier from its string form — for transports
36 /// (MCP, CLI) that received it from a previous call. The string is treated
37 /// as an opaque identifier; its content is not validated.
38 pub fn parse(s: &str) -> Self {
39 NodeHash(s.to_string())
40 }
41}
42
43impl fmt::Display for NodeHash {
44 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45 write!(f, "{}", self.0)
46 }
47}
48
49/// One declared input: its name, type, and the minimum confidence a caller
50/// must supply (the `given` clause).
51#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
52pub struct Param {
53 pub name: String,
54 pub ty: Type,
55 pub min_confidence: Confidence,
56}
57
58/// The `produces` clause: the output type and the confidence it carries.
59#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
60pub struct Produces {
61 pub ty: Type,
62 pub confidence: Confidence,
63}
64
65/// A binary operator. Arithmetic operands and result are `Number`;
66/// comparison operands share a type and the result is `Bool`; logical
67/// operands and result are `Bool` (short-circuit). Deliberately small
68/// (Principle 9): one canonical operator per operation.
69#[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize, Deserialize)]
70pub enum BinOp {
71 Add,
72 Sub,
73 Mul,
74 Div,
75 /// Signed remainder (`a % b`); arithmetic, both operands `Number`.
76 Mod,
77 Eq,
78 /// `a != b` — the negation of `Eq`, same operand rule.
79 Neq,
80 Lt,
81 /// `a <= b`.
82 Le,
83 /// `a > b`.
84 Gt,
85 /// `a >= b`.
86 Ge,
87 /// Short-circuit boolean conjunction; both operands `Bool`.
88 And,
89 /// Short-circuit boolean disjunction; both operands `Bool`.
90 Or,
91}
92
93impl BinOp {
94 /// Comparisons yield `Bool` over two same-typed operands.
95 pub fn is_comparison(self) -> bool {
96 use BinOp::*;
97 matches!(self, Eq | Neq | Lt | Le | Gt | Ge)
98 }
99
100 /// Logical operators take and yield `Bool` and short-circuit.
101 pub fn is_logical(self) -> bool {
102 matches!(self, BinOp::And | BinOp::Or)
103 }
104
105 /// The Section 5 infix symbol.
106 pub fn symbol(self) -> &'static str {
107 match self {
108 BinOp::Add => "+",
109 BinOp::Sub => "-",
110 BinOp::Mul => "*",
111 BinOp::Div => "/",
112 BinOp::Mod => "%",
113 BinOp::Eq => "==",
114 BinOp::Neq => "!=",
115 BinOp::Lt => "<",
116 BinOp::Le => "<=",
117 BinOp::Gt => ">",
118 BinOp::Ge => ">=",
119 BinOp::And => "&&",
120 BinOp::Or => "||",
121 }
122 }
123}
124
125/// One arm of a `match`: a variant case, names bound to its payload fields
126/// (in declaration order), and the body evaluated when that case matches.
127#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
128pub struct MatchArm {
129 pub case: String,
130 pub bindings: Vec<String>,
131 pub body: NodeHash,
132}
133
134/// A stored AST node. Compound variants reference children by [`NodeHash`].
135#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
136pub enum Node {
137 /// An integer literal. (A single unified numeric type is the language
138 /// direction; the seed model only needs a concrete numeric leaf.)
139 Lit(i64),
140 /// An IEEE-754 double, stored as its bit pattern so the node stays
141 /// `Eq`/hashable and content-addresses deterministically. The uniform
142 /// i64 slot *is* these bits; only `FloatOp`/conversions reinterpret.
143 FloatLit(u64),
144 /// Arithmetic or comparison on two `Float`s. `op` is restricted to
145 /// `+ - * /` and `== < <= > >=` (no `% != && ||` on Float).
146 FloatOp {
147 op: BinOp,
148 lhs: NodeHash,
149 rhs: NodeHash,
150 },
151 /// `Number` → `Float`.
152 IntToFloat(NodeHash),
153 /// `Float` → `Number`: truncates toward zero; traps on NaN/overflow
154 /// (a clean trap, not silent UB — like the bounds checks).
155 FloatToInt(NodeHash),
156 /// A `Decimal` literal: the value pre-scaled by 10_000 (so `1.25` is
157 /// `12500`). Exact; no rounding at use.
158 DecimalLit(i64),
159 /// Arithmetic or comparison on two `Decimal`s. `+ -` are plain i64;
160 /// `*` is `(a*b)/10000`, `/` is `(a*10000)/b` (rescaling). All six
161 /// comparisons are exact. `% && ||` are rejected by the checker.
162 DecimalOp {
163 op: BinOp,
164 lhs: NodeHash,
165 rhs: NodeHash,
166 },
167 /// `Number` → `Decimal` (multiplies by 10_000).
168 IntToDecimal(NodeHash),
169 /// `Decimal` → `Number` (integer part; truncates toward zero).
170 DecimalToInt(NodeHash),
171 /// `Decimal` → `Number`: the raw scaled mantissa (the value ×10_000,
172 /// `Decimal`'s documented representation). Identity at runtime; the
173 /// seam that lets `decimal_to_str` be written in Cairn.
174 DecimalRaw(NodeHash),
175 /// A boolean literal. Bool was previously only producible by a
176 /// comparison; the literal makes `Bool` a first-class value the
177 /// logical operators can stand on.
178 Bool(bool),
179 /// Logical negation of a `Bool`, yielding `Bool`. The one unary
180 /// operator (Principle 9); `&&`/`||` are the binary `BinOp`s.
181 Not(NodeHash),
182 /// A UTF-8 string literal.
183 Str(String),
184 /// Length (in bytes) of a string. The first string operation; more
185 /// follow with the stdlib.
186 StrLen(NodeHash),
187 /// ASCII-lowercase a string (`A`–`Z` → `a`–`z`, other bytes
188 /// unchanged). The minimal primitive for case-insensitive
189 /// matching — needed because some platforms (e.g. Cloudflare)
190 /// lowercase HTTP header names, so the `header`/`cookie` lookup
191 /// cannot be case-sensitive (design.md §9).
192 StrLower(NodeHash),
193 /// A one-byte string from a code point 0–255 (the low byte of the
194 /// Number; the inverse of indexing a byte out). The minimal
195 /// primitive for percent-decoding — added when a real HTML form
196 /// (the shipped blog's create) forced `form_value` to URL-decode
197 /// `%XX`/`+`, which is otherwise inexpressible in pure Cairn
198 /// (design.md §9; Principle 10 — a real need, not speculation).
199 StrFromCode(NodeHash),
200 /// `a ++ b` — concatenate two strings.
201 StrConcat(NodeHash, NodeHash),
202 /// Byte substring `s[start .. start+len]`. Bounds-checked: a
203 /// negative index/length or `start+len > len(s)` traps (not silent
204 /// UB), proven by `out_of_bounds_list_get_and_str_slice_trap`.
205 /// **Byte-indexed by design** (decided, not a gap): a Cairn `String`
206 /// is byte-addressable for predictable, AI-author-friendly indexing;
207 /// codepoint-aware slicing is a stdlib concern, not a core operator
208 /// (Principle 9). Slicing across a UTF-8 boundary therefore yields
209 /// bytes the host renders lossily — the documented, intended
210 /// semantics of a byte slice.
211 StrSlice {
212 s: NodeHash,
213 start: NodeHash,
214 len: NodeHash,
215 },
216 /// Content equality of two strings, yielding `Bool`.
217 StrEq(NodeHash, NodeHash),
218 /// Whether `needle` occurs in `haystack`, yielding `Bool`.
219 StrContains {
220 haystack: NodeHash,
221 needle: NodeHash,
222 },
223 /// Whether `s` begins with `prefix`, yielding `Bool`. With
224 /// `StrSlice`/`StrLen` this is enough for prefix routing and
225 /// path-parameter extraction (e.g. `/customers/{id}`).
226 StrStartsWith {
227 s: NodeHash,
228 prefix: NodeHash,
229 },
230 /// 0-based byte index of the first occurrence of `needle` in
231 /// `haystack`, or `-1` if absent (an empty needle is at 0). Yields
232 /// `Number`. With `StrSlice` this is enough to split a delimited
233 /// string — e.g. parsing `db_query` rows into typed records.
234 StrIndexOf {
235 haystack: NodeHash,
236 needle: NodeHash,
237 },
238 /// Decimal rendering of a `Number` as a `String` (e.g. for HTML
239 /// output). Correct over the full i64 range, including `i64::MIN`
240 /// (rendered in the non-positive domain — no magnitude overflow).
241 NumberToStr(NodeHash),
242 /// Parse a `String` to a `Number`, leniently: an optional leading `-`
243 /// then the leading decimal digits; a non-numeric/empty string yields
244 /// 0. The unchecked fast path (cf. `StrToNumberOpt`).
245 StrToNumber(NodeHash),
246 /// Checked parse: `Some(n)` only if the whole string is a valid
247 /// integer (optional `-` then ≥1 digits, nothing else), else `None`.
248 /// The handle-able counterpart to `StrToNumber`, paralleling
249 /// `ListGet`/`ListTryGet`.
250 StrToNumberOpt(NodeHash),
251 /// Read the clock (milliseconds). The first effectful primitive: it
252 /// performs the `Time` effect and yields a `Number` at `external`
253 /// confidence (the value comes from outside the program). Lowers to a
254 /// host import; other effects follow the same pattern.
255 Now,
256 /// A non-empty list literal. Elements share a type `T`; the list's type
257 /// is `List<T>`. (Empty literals need a type annotation — unsupported in
258 /// v0.3.)
259 List(Vec<NodeHash>),
260 /// The empty `List<elem>`. The typed counterpart to a `List` literal,
261 /// needed because the element type can't be inferred with no elements.
262 /// With `ListCons` this lets a recursive function build a list of
263 /// runtime length.
264 ListEmpty { elem: Type },
265 /// Prepend `head` to `tail` (a `List<T>`), yielding a new `List<T>`.
266 /// v0.3: an O(n) fresh-array copy — immutable, no structural sharing,
267 /// consistent with the bump allocator's documented simplicity.
268 ListCons { head: NodeHash, tail: NodeHash },
269 /// `Some(value)` — a present `Option<T>` where `T` is the value's
270 /// type. The typed result for operations that may have no answer.
271 OptionSome(NodeHash),
272 /// `None : Option<elem>` — the absent case (element type can't be
273 /// inferred with no value, like `ListEmpty`).
274 OptionNone { elem: Type },
275 /// Eliminate an `Option<T>`: the contained value if `Some`, else
276 /// `default`. Both arms share `T`, the expression's type. This is the
277 /// noise-free recovery path — no `on_failure` threading.
278 OptionElse { opt: NodeHash, default: NodeHash },
279 /// Case analysis over an `Option<T>`: bind the payload to
280 /// `some_bind` in `some_body`, else evaluate `none_body`. Both bodies
281 /// share a type (the expression's type). Unlike `OptionElse` (value
282 /// or default) this runs different logic per case — the construct
283 /// that makes a returned `Option` usable for control flow.
284 OptionMatch {
285 opt: NodeHash,
286 some_bind: String,
287 some_body: NodeHash,
288 none_body: NodeHash,
289 },
290 /// Checked indexing: `Some(elem)` if `index` is in bounds, else
291 /// `None`. The handle-able counterpart to `ListGet` (which traps);
292 /// neither pollutes signatures with a failure.
293 ListTryGet { list: NodeHash, index: NodeHash },
294 /// Number of elements in a list.
295 ListLen(NodeHash),
296 /// Element of a list at a (0-based `Number`) index. No runtime bounds
297 /// check in v0.3 (a known simplification, like the bump allocator).
298 ListGet { list: NodeHash, index: NodeHash },
299 /// A non-empty map literal of key/value pairs. Keys share a type `K`,
300 /// values a type `V`; the type is `Map<K, V>`.
301 Map(Vec<(NodeHash, NodeHash)>),
302 /// Value for a key (linear scan). Key equality is i64/identity —
303 /// **exact for `Number` keys, which is `Map`'s decided domain**
304 /// (Principle 9: one canonical form). A missing key yields 0;
305 /// `MapTryGet` is the handle-able form. Structural keying of any
306 /// type (incl. `String`) is *not* a second mechanism here — it is
307 /// `find` over a `List` of key/value records (stdlib, first-class
308 /// functions), which is structural for any `K`. Decided, not
309 /// deferred: adding key-kind dispatch to `Map` would be the
310 /// speculative generality Principle 10 forbids when `find` already
311 /// covers it (see `string_keyed_lookup_is_find_over_a_list`).
312 MapGet { map: NodeHash, key: NodeHash },
313 /// Checked lookup: `Some(V)` for the first matching key, else
314 /// `None` — the handle-able counterpart to `MapGet` (which yields 0
315 /// on a miss, indistinguishable from a real 0). Same decided
316 /// Number/identity key domain as `MapGet`.
317 MapTryGet { map: NodeHash, key: NodeHash },
318 /// Number of entries in a map.
319 MapLen(NodeHash),
320 /// Observe a value for diagnostics (the `Log` effect), passing it
321 /// through unchanged (same type and confidence as the argument). Lowers
322 /// to a `host::log` import.
323 Log(NodeHash),
324 /// Announce that `topic` (a `String`) changed — the `Live` effect
325 /// (design.md §10). The live runtime re-renders and pushes to every
326 /// connection subscribed to that topic. Yields `Number` (0). Lowers
327 /// to a `host::publish` import. Liveness is *this*, explicit and
328 /// effect-typed — never an implicit default.
329 Publish(NodeHash),
330 /// Emit an extra HTTP response header — the `Resp` effect. `name`
331 /// and `value` are `String`s (e.g. `"Set-Cookie"`,
332 /// `"sid=abc; HttpOnly; Path=/"`); the host buffers it per request
333 /// and writes it after the standard headers. Yields `Number` (0),
334 /// so it sequences in a step like `publish`. Lowers to a
335 /// `host::set_header` import. Response headers are *this*, explicit
336 /// and effect-typed — not a hidden `Response` field.
337 SetHeader { name: NodeHash, value: NodeHash },
338 /// Draw a random `Number` (the `Rand` effect), at `external` confidence
339 /// (it comes from outside the program). Lowers to a `host::rand` import.
340 Rand,
341 /// Allocate a mutable cell holding `value` (the `Mut` effect). Type is
342 /// `Cell<T>` where `T` is the value's type.
343 MutNew(NodeHash),
344 /// Read a mutable cell's current value.
345 MutGet(NodeHash),
346 /// Write `value` into a cell (the `Mut` effect); passes `value` through.
347 MutSet { cell: NodeHash, value: NodeHash },
348 /// Write a `String` to a path (the `Disk` effect); returns the number of
349 /// bytes written.
350 DiskWrite { path: NodeHash, content: NodeHash },
351 /// Read the file at a `String` path (the `Disk` effect), returning its
352 /// contents as a `String` (host→wasm allocation). A missing file yields
353 /// the empty string in v0.3.
354 DiskRead(NodeHash),
355 /// HTTP(S) GET a `String` URL (the `Net` effect), returning the
356 /// HTTP status as a `Number` (`-1` on transport error). Backed by a
357 /// real `ureq` + TLS client — no stub path exists (proven by
358 /// `real_net_get_returns_the_http_status`).
359 NetGet(NodeHash),
360 /// Run a SQL statement (the `Db` effect), returning the result as a
361 /// `String` at `persisted` confidence (read back from the system of
362 /// record). `params` is a `List<String>` bound to the statement's `?`
363 /// placeholders by the driver — the *only* query form, so user input
364 /// never reaches SQL by concatenation (use an empty list for static
365 /// SQL). Backed by **real embedded SQLite only** (rusqlite —
366 /// in-memory by default, a file when a path is given). There is **no
367 /// canned/stub path**: every query hits the engine (`wasm::run_sql`;
368 /// the v0.3 removal of the old canned path is complete).
369 DbQuery {
370 sql: NodeHash,
371 params: NodeHash,
372 },
373 /// A reference to an in-scope binding, by name.
374 Ref(String),
375 /// A call: a callee function name plus argument nodes.
376 Call { func: String, args: Vec<NodeHash> },
377 /// A named module function used as a first-class value. Its type is the
378 /// function's signature as a `Fn`. Anonymous lambdas with transitive
379 /// closure capture are **shipped** (K2 — see `Lambda`/`CallValue`; the
380 /// stdlib `map`/`filter`/`fold` and every app pass closures). `FuncRef`
381 /// is the named-function value form; `Lambda` is the anonymous/closure
382 /// form — both lower through the funcref table + `call_indirect`.
383 FuncRef(String),
384 /// Apply a function *value* to arguments. `callee` evaluates to a
385 /// `Fn`; `args` are checked against its parameter types and its
386 /// effects union into the caller. Lowers to `call_indirect`.
387 CallValue {
388 callee: NodeHash,
389 args: Vec<NodeHash>,
390 },
391 /// An anonymous closure. `params` are its inputs (typed, like a
392 /// function's); `body` is a single result expression. Names it
393 /// references that are neither its params nor module functions are
394 /// *captured* by value from the enclosing scope at creation. Its type
395 /// is a `Fn`; its body's effects live in that `Fn` (a closure's
396 /// effects fire when it is *called*, not when it is made). v0.4: a
397 /// single-expression body and no uncaught `Fail` (documented — write a
398 /// named function and `FuncRef` it for step bodies or failures).
399 Lambda {
400 params: Vec<Param>,
401 body: NodeHash,
402 },
403 /// A typed hole: an unfilled position carrying what it expects.
404 Hole { expects: String },
405 /// A binary operation over two sub-expressions.
406 BinOp {
407 op: BinOp,
408 lhs: NodeHash,
409 rhs: NodeHash,
410 },
411 /// Raise a typed failure, short-circuiting the reasoning chain. Its type
412 /// is `Never`. The named variant must be covered by the enclosing
413 /// function's `on_failure` unless a `Handle` catches it.
414 Fail(String),
415 /// Run `body`; if it raises one of the handled failure variants, evaluate
416 /// that handler's `recover` expression instead. A handled failure does not
417 /// propagate. `handlers` maps a failure-variant name to its recovery
418 /// expression (which yields the value, or itself `fail`s).
419 Handle {
420 body: NodeHash,
421 handlers: Vec<(String, NodeHash)>,
422 },
423 /// A conditional expression. `cond` must be `Bool`; the two branches
424 /// share a type, which is the expression's type. There is no statement
425 /// form — Cairn has expressions and reasoning-chain steps, not imperative
426 /// control flow.
427 If {
428 cond: NodeHash,
429 then_branch: NodeHash,
430 else_branch: NodeHash,
431 },
432 /// A reasoning-chain step: `binding = value`.
433 Step { binding: String, value: NodeHash },
434 /// A function: the full Section 4 contract plus an ordered body and a
435 /// result expression.
436 Function {
437 name: String,
438 /// Declared generic type parameters (e.g. `["T"]`). Empty = monomorphic.
439 type_params: Vec<String>,
440 params: Vec<Param>,
441 produces: Produces,
442 requires: BTreeSet<Effect>,
443 on_failure: Vec<String>,
444 body: Vec<NodeHash>,
445 result: NodeHash,
446 },
447 /// A module: named type definitions and function definitions, referenced
448 /// by hash. The unit a checker resolves calls and type names within.
449 Module {
450 name: String,
451 types: Vec<NodeHash>,
452 functions: Vec<NodeHash>,
453 },
454 /// A record (product) type definition: named fields, each with a type.
455 RecordDef {
456 name: String,
457 fields: Vec<(String, Type)>,
458 },
459 /// Construct a record value of a named record type.
460 Record {
461 type_name: String,
462 fields: Vec<(String, NodeHash)>,
463 },
464 /// Access a field of a record value. `type_name` is the record type the
465 /// base must have; it makes the node self-describing for type-directed
466 /// lowering (the checker verifies it; the projection elides it).
467 Field {
468 base: NodeHash,
469 type_name: String,
470 field: String,
471 },
472 /// A variant (sum) type definition: named cases, each with named payload
473 /// fields (an empty payload is a nullary case).
474 VariantDef {
475 name: String,
476 cases: Vec<(String, Vec<(String, Type)>)>,
477 },
478 /// Construct a value of a named variant type, in one of its cases.
479 Variant {
480 type_name: String,
481 case: String,
482 fields: Vec<(String, NodeHash)>,
483 },
484 /// Case analysis over a variant value. Must be exhaustive. `type_name`
485 /// is the scrutinee's variant type (self-describing for type-directed
486 /// lowering; the checker verifies it, the projection elides it).
487 Match {
488 scrutinee: NodeHash,
489 type_name: String,
490 arms: Vec<MatchArm>,
491 },
492}
493
494impl Node {
495 /// Canonical bytes used for both hashing and storage.
496 ///
497 /// v0.1 uses `serde_json`; struct and enum field order is stable and the
498 /// effect set is a `BTreeSet`, so the bytes are deterministic. A canonical
499 /// binary encoding is a later refinement.
500 pub(crate) fn canonical_bytes(&self) -> Vec<u8> {
501 serde_json::to_vec(self).expect("Node serialization is infallible")
502 }
503
504 /// This node's content hash. Because children are referenced by hash, this
505 /// transitively covers the entire subtree (the Merkle property).
506 pub fn hash(&self) -> NodeHash {
507 let mut hasher = Sha256::new();
508 hasher.update(self.canonical_bytes());
509 NodeHash(hex(&hasher.finalize()))
510 }
511}
512
513fn hex(bytes: &[u8]) -> String {
514 use std::fmt::Write;
515 let mut s = String::with_capacity(bytes.len() * 2);
516 for b in bytes {
517 write!(s, "{:02x}", b).expect("writing to a String cannot fail");
518 }
519 s
520}