Skip to main content

spec_checker/
gate.rs

1//! Spec-checker as a runtime action gate (#186).
2//!
3//! `evaluate_gate` evaluates one or more specs against
4//! caller-supplied bindings (typically `state` + the proposed
5//! action) and returns `Allow` if every spec holds, `Deny` on
6//! the first violation.
7//!
8//! This is a separate evaluation mode from
9//! [`crate::check_spec`]: the latter quantifies over random
10//! inputs to discover counterexamples; the former takes the
11//! inputs *given* and answers a single deterministic verdict.
12//! Specs reuse their existing AST — quantifier names become the
13//! lookup keys for the supplied bindings — so the same spec
14//! that an offline checker proves over random inputs can also
15//! gate one specific action online.
16//!
17//! Trace integration is intentionally not wired here — the
18//! function returns the verdict, and the caller (e.g. an agent
19//! runtime in a downstream crate) records it. Keeps spec-checker
20//! free of a lex-trace dependency.
21
22use crate::ast::{Spec, SpecExpr, SpecOp};
23use indexmap::IndexMap;
24use lex_bytecode::{compile_program, vm::Vm, Value};
25use lex_runtime::{DefaultHandler, Policy};
26use lex_syntax::parse_source;
27use serde::{Deserialize, Serialize};
28
29/// Verdict returned by [`evaluate_gate`].
30#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
31#[serde(tag = "verdict", rename_all = "snake_case")]
32pub enum GateVerdict {
33    Allow,
34    /// Spec returned `false` (action violates an invariant).
35    /// `spec_name` is the name of the offending spec, `reason`
36    /// is human-readable detail (typically the spec name plus
37    /// the relevant bindings).
38    Deny { spec_name: String, reason: String },
39    /// Evaluation failed for a non-spec reason (e.g. the body
40    /// referenced a Lex function whose call errored). Surfaced
41    /// as a separate variant so callers can distinguish "spec
42    /// said no" from "we couldn't tell."
43    Inconclusive { spec_name: String, reason: String },
44}
45
46/// Evaluate every `spec` against `bindings` and return the
47/// first non-Allow verdict (or `Allow` if all pass). `lex_source`
48/// supplies the host program — any `SpecExpr::Call` in a spec's
49/// body resolves to a function in this program.
50///
51/// Designed for synchronous per-action use. The Lex program is
52/// type-checked and compiled on each call; callers that gate at
53/// high frequency should prefer [`evaluate_gate_compiled`].
54pub fn evaluate_gate(
55    specs: &[Spec],
56    bindings: &IndexMap<String, Value>,
57    lex_source: &str,
58) -> GateVerdict {
59    let prog = match parse_source(lex_source) {
60        Ok(p) => p,
61        Err(e) => return GateVerdict::Inconclusive {
62            spec_name: "<parse>".into(),
63            reason: format!("parse: {e}"),
64        },
65    };
66    let stages = lex_ast::canonicalize_program(&prog);
67    if let Err(errs) = lex_types::check_program(&stages) {
68        return GateVerdict::Inconclusive {
69            spec_name: "<typecheck>".into(),
70            reason: format!("typecheck: {errs:?}"),
71        };
72    }
73    let bc = compile_program(&stages);
74    evaluate_gate_compiled(specs, bindings, &bc)
75}
76
77/// Same as [`evaluate_gate`] but takes already-compiled
78/// bytecode. Use when gating at high frequency: compile the
79/// program once, evaluate many actions against it.
80pub fn evaluate_gate_compiled(
81    specs: &[Spec],
82    bindings: &IndexMap<String, Value>,
83    bc: &lex_bytecode::Program,
84) -> GateVerdict {
85    evaluate_gate_compiled_inner(specs, bindings, bc, None)
86}
87
88/// Like [`evaluate_gate_compiled`] but additionally threads a
89/// caller-supplied tracer into every Vm the spec body spins up
90/// for [`SpecExpr::Call`] (#199).
91///
92/// `new_tracer` is called once per host-helper invocation and
93/// must produce a fresh `Box<dyn Tracer>` for each new `Vm`.
94/// Multiple tracers can share state — typically by closing over
95/// a [`lex_trace::Handle`] and cloning it inside the closure —
96/// so the resulting trace tree captures the spec body's call
97/// graph (e.g. `under_budget → projected_load + budget_total`)
98/// alongside the rest of the agent's run.
99///
100/// Existing callers of [`evaluate_gate`] / [`evaluate_gate_compiled`]
101/// stay unchanged; this is purely additive.
102pub fn evaluate_gate_compiled_traced<F>(
103    specs: &[Spec],
104    bindings: &IndexMap<String, Value>,
105    bc: &lex_bytecode::Program,
106    new_tracer: F,
107) -> GateVerdict
108where
109    F: Fn() -> Box<dyn lex_bytecode::vm::Tracer>,
110{
111    evaluate_gate_compiled_inner(specs, bindings, bc, Some(&new_tracer))
112}
113
114fn evaluate_gate_compiled_inner(
115    specs: &[Spec],
116    bindings: &IndexMap<String, Value>,
117    bc: &lex_bytecode::Program,
118    new_tracer: Option<&dyn Fn() -> Box<dyn lex_bytecode::vm::Tracer>>,
119) -> GateVerdict {
120    let policy = Policy::permissive();
121    for spec in specs {
122        match eval_body(&spec.body, bindings, bc, &policy, new_tracer) {
123            Ok(Value::Bool(true)) => continue,
124            Ok(Value::Bool(false)) => {
125                return GateVerdict::Deny {
126                    spec_name: spec.name.clone(),
127                    reason: format!(
128                        "spec `{}` returned false; bindings: {}",
129                        spec.name,
130                        format_bindings(bindings),
131                    ),
132                };
133            }
134            Ok(other) => return GateVerdict::Inconclusive {
135                spec_name: spec.name.clone(),
136                reason: format!("spec body returned non-bool: {other:?}"),
137            },
138            Err(e) => return GateVerdict::Inconclusive {
139                spec_name: spec.name.clone(),
140                reason: e,
141            },
142        }
143    }
144    GateVerdict::Allow
145}
146
147fn format_bindings(b: &IndexMap<String, Value>) -> String {
148    let mut parts: Vec<String> = Vec::with_capacity(b.len());
149    for (k, v) in b {
150        parts.push(format!("{k}={}", short_value(v)));
151    }
152    parts.join(", ")
153}
154
155fn short_value(v: &Value) -> String {
156    match v {
157        Value::Int(i) => format!("{i}"),
158        Value::Float(f) => format!("{f}"),
159        Value::Bool(b) => format!("{b}"),
160        Value::Str(s) => format!("\"{}\"", s.chars().take(40).collect::<String>()),
161        other => format!("{other:?}"),
162    }
163}
164
165/// Evaluate a `SpecExpr` against caller-supplied `bindings`.
166/// Mirrors `checker::eval` but kept separate so the gate path
167/// doesn't have to thread random-generation state.
168///
169/// `new_tracer`, when present, is invoked once per
170/// `SpecExpr::Call` and the resulting Tracer is attached to
171/// the Vm before running the host helper. The factory shape
172/// (rather than a single `Box<dyn Tracer>`) is what lets
173/// multiple sibling calls all flow into the same caller-side
174/// recorder via cloned `Handle`s.
175fn eval_body(
176    e: &SpecExpr,
177    bindings: &IndexMap<String, Value>,
178    bc: &lex_bytecode::Program,
179    policy: &Policy,
180    new_tracer: Option<&dyn Fn() -> Box<dyn lex_bytecode::vm::Tracer>>,
181) -> Result<Value, String> {
182    match e {
183        SpecExpr::IntLit { value } => Ok(Value::Int(*value)),
184        SpecExpr::FloatLit { value } => Ok(Value::Float(*value)),
185        SpecExpr::BoolLit { value } => Ok(Value::Bool(*value)),
186        SpecExpr::StrLit { value } => Ok(Value::Str(value.clone())),
187        SpecExpr::Var { name } => bindings.get(name).cloned()
188            .ok_or_else(|| format!("unbound spec var `{name}` (provide via gate bindings)")),
189        SpecExpr::Let { name, value, body } => {
190            let v = eval_body(value, bindings, bc, policy, new_tracer)?;
191            let mut next = bindings.clone();
192            next.insert(name.clone(), v);
193            eval_body(body, &next, bc, policy, new_tracer)
194        }
195        SpecExpr::Not { expr } => match eval_body(expr, bindings, bc, policy, new_tracer)? {
196            Value::Bool(b) => Ok(Value::Bool(!b)),
197            other => Err(format!("not on non-bool: {other:?}")),
198        },
199        SpecExpr::BinOp { op, lhs, rhs } => {
200            // Short-circuit `and` / `or` so guard expressions like
201            // `length(xs) == 0 or xs[0] > 0` don't evaluate the
202            // second arm when the first already decides the result.
203            // Matches the conventional boolean-operator semantics —
204            // and the gate use case where the second arm may
205            // legitimately error on the values the first arm
206            // exists to filter out (#208 slice 2).
207            if matches!(op, SpecOp::And | SpecOp::Or) {
208                let a = eval_body(lhs, bindings, bc, policy, new_tracer)?;
209                let av = match a {
210                    Value::Bool(b) => b,
211                    other => return Err(format!(
212                        "{} on non-bool lhs: {other:?}", op.as_str())),
213                };
214                if matches!(op, SpecOp::And) && !av { return Ok(Value::Bool(false)); }
215                if matches!(op, SpecOp::Or)  &&  av { return Ok(Value::Bool(true));  }
216                let b = eval_body(rhs, bindings, bc, policy, new_tracer)?;
217                return match b {
218                    Value::Bool(bb) => Ok(Value::Bool(bb)),
219                    other => Err(format!("{} on non-bool rhs: {other:?}", op.as_str())),
220                };
221            }
222            let a = eval_body(lhs, bindings, bc, policy, new_tracer)?;
223            let b = eval_body(rhs, bindings, bc, policy, new_tracer)?;
224            apply_binop(*op, a, b)
225        }
226        SpecExpr::Call { func, args } => {
227            let mut argv = Vec::new();
228            for a in args { argv.push(eval_body(a, bindings, bc, policy, new_tracer)?); }
229            // Spec-builtin list operations (#208). `length`, `head`,
230            // and `tail` are intercepted before falling through to a
231            // host VM call so specs can reason about list-shaped
232            // bindings without the host program needing those names.
233            // Identical name-shadowing behavior to lex's stdlib —
234            // user code can still define a function `length` and
235            // reference it from a spec, but a spec call to `length(xs)`
236            // where `xs` is a `Value::List` resolves to the builtin.
237            if let Some(v) = list_builtin(func, &argv) { return v; }
238            let handler = DefaultHandler::new(policy.clone());
239            let mut vm = Vm::with_handler(bc, Box::new(handler));
240            if let Some(make_tracer) = new_tracer {
241                vm.set_tracer(make_tracer());
242            }
243            vm.call(func, argv).map_err(|e| format!("call `{func}`: {e}"))
244        }
245        SpecExpr::Index { list, index } => {
246            let xs = eval_body(list, bindings, bc, policy, new_tracer)?;
247            let i = eval_body(index, bindings, bc, policy, new_tracer)?;
248            list_index(xs, i)
249        }
250        SpecExpr::Match { scrutinee, arms } => {
251            // #208 slice 3: dispatch on `Value::Variant`'s tag.
252            // Wildcard arms always match. Variant arms match by name
253            // and arity, binding positional args by name in the body.
254            let v = eval_body(scrutinee, bindings, bc, policy, new_tracer)?;
255            for arm in arms {
256                if let Some(extra) = pattern_match(&arm.pattern, &v) {
257                    let mut next = bindings.clone();
258                    for (k, vv) in extra { next.insert(k, vv); }
259                    return eval_body(&arm.body, &next, bc, policy, new_tracer);
260                }
261            }
262            Err(format!(
263                "non-exhaustive match: no arm matched value {}",
264                short_value(&v)))
265        }
266        SpecExpr::FieldAccess { value, field } => {
267            // Drill into a record-typed binding (#208). Fails loudly
268            // if the value isn't a record or the field is missing —
269            // both indicate a spec/binding shape mismatch the agent
270            // wants to know about, not silently default.
271            let v = eval_body(value, bindings, bc, policy, new_tracer)?;
272            match v {
273                Value::Record(fields) => fields.get(field).cloned().ok_or_else(|| {
274                    let known: Vec<&str> = fields.keys().map(String::as_str).collect();
275                    format!("field `{field}` missing on record (have: {})", known.join(", "))
276                }),
277                other => Err(format!(
278                    "field access `.{field}` on non-record: {}",
279                    short_value(&other))),
280            }
281        }
282    }
283}
284
285/// Try to match a pattern against a value (#208 slice 3). Returns
286/// `Some(bindings)` on success — the bindings to add to the
287/// arm's lexical environment — or `None` if the pattern doesn't
288/// match. The caller falls through to the next arm on `None`.
289pub(crate) fn pattern_match(pat: &crate::ast::SpecPattern, v: &Value)
290    -> Option<Vec<(String, Value)>>
291{
292    use crate::ast::SpecPattern;
293    match pat {
294        SpecPattern::Wildcard => Some(Vec::new()),
295        SpecPattern::Variant { name, bindings } => {
296            match v {
297                Value::Variant { name: vn, args } if vn == name && args.len() == bindings.len() => {
298                    Some(bindings.iter().cloned()
299                        .zip(args.iter().cloned())
300                        .collect())
301                }
302                _ => None,
303            }
304        }
305    }
306}
307
308/// Spec-builtin list operations (#208). Returns `Some(result)` if
309/// `func` names a builtin (`length`, `head`, `tail`) and the args
310/// shape matches; returns `None` to indicate the call should fall
311/// through to a host VM dispatch.
312pub(crate) fn list_builtin(func: &str, args: &[Value]) -> Option<Result<Value, String>> {
313    match func {
314        "length" => {
315            if args.len() != 1 {
316                return Some(Err(format!("length: expected 1 arg, got {}", args.len())));
317            }
318            match &args[0] {
319                Value::List(xs) => Some(Ok(Value::Int(xs.len() as i64))),
320                // Not a list — fall through to host dispatch in case
321                // the user defined their own `length` function.
322                _ => None,
323            }
324        }
325        "head" => {
326            if args.len() != 1 { return None; }
327            match &args[0] {
328                Value::List(xs) => Some(match xs.first() {
329                    Some(v) => Ok(v.clone()),
330                    None => Err("head: empty list".into()),
331                }),
332                _ => None,
333            }
334        }
335        "tail" => {
336            if args.len() != 1 { return None; }
337            match &args[0] {
338                Value::List(xs) => Some(match xs.split_first() {
339                    Some((_, rest)) => Ok(Value::List(rest.to_vec())),
340                    None => Err("tail: empty list".into()),
341                }),
342                _ => None,
343            }
344        }
345        _ => None,
346    }
347}
348
349fn list_index(list: Value, index: Value) -> Result<Value, String> {
350    let xs = match list {
351        Value::List(xs) => xs,
352        other => return Err(format!("index `[..]` on non-list: {}", short_value(&other))),
353    };
354    let i = match index {
355        Value::Int(n) => n,
356        other => return Err(format!("list index must be Int, got {}", short_value(&other))),
357    };
358    if i < 0 || (i as usize) >= xs.len() {
359        return Err(format!(
360            "list index {i} out of bounds (length {})", xs.len()));
361    }
362    Ok(xs[i as usize].clone())
363}
364
365fn apply_binop(op: SpecOp, a: Value, b: Value) -> Result<Value, String> {
366    use SpecOp::*;
367    match (op, &a, &b) {
368        (Add, Value::Int(x), Value::Int(y)) => Ok(Value::Int(x + y)),
369        (Sub, Value::Int(x), Value::Int(y)) => Ok(Value::Int(x - y)),
370        (Mul, Value::Int(x), Value::Int(y)) => Ok(Value::Int(x * y)),
371        (Div, Value::Int(x), Value::Int(y)) if *y != 0 => Ok(Value::Int(x / y)),
372        (Mod, Value::Int(x), Value::Int(y)) if *y != 0 => Ok(Value::Int(x % y)),
373        (Add, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x + y)),
374        (Sub, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x - y)),
375        (Mul, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x * y)),
376        (Div, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x / y)),
377        (Eq, x, y) => Ok(Value::Bool(x == y)),
378        (Neq, x, y) => Ok(Value::Bool(x != y)),
379        (Lt, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x < y)),
380        (Le, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x <= y)),
381        (Gt, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x > y)),
382        (Ge, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x >= y)),
383        (Lt, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x < y)),
384        (Le, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x <= y)),
385        (Gt, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x > y)),
386        (Ge, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x >= y)),
387        (And, Value::Bool(x), Value::Bool(y)) => Ok(Value::Bool(*x && *y)),
388        (Or, Value::Bool(x), Value::Bool(y)) => Ok(Value::Bool(*x || *y)),
389        _ => Err(format!("invalid binop {op:?} on {a:?}, {b:?}")),
390    }
391}
392
393#[cfg(test)]
394mod tests {
395    use super::*;
396    use crate::parser::parse_spec;
397
398    fn b<I: IntoIterator<Item = (&'static str, Value)>>(items: I) -> IndexMap<String, Value> {
399        items.into_iter().map(|(k, v)| (k.to_string(), v)).collect()
400    }
401
402    #[test]
403    fn allow_when_spec_returns_true() {
404        let spec = parse_spec("spec ok { forall x :: Int : x + 1 > x }").unwrap();
405        let v = evaluate_gate(&[spec], &b([("x", Value::Int(5))]), "");
406        assert_eq!(v, GateVerdict::Allow);
407    }
408
409    #[test]
410    fn deny_when_spec_returns_false() {
411        let spec = parse_spec("spec budget { forall used :: Int, delta :: Int : (used + delta) <= 100 }").unwrap();
412        let v = evaluate_gate(
413            &[spec],
414            &b([("used", Value::Int(80)), ("delta", Value::Int(30))]),
415            "",
416        );
417        match v {
418            GateVerdict::Deny { spec_name, reason } => {
419                assert_eq!(spec_name, "budget");
420                assert!(reason.contains("used=80"), "reason should include bindings: {reason}");
421            }
422            other => panic!("expected Deny, got {other:?}"),
423        }
424    }
425
426    #[test]
427    fn first_failing_spec_is_reported() {
428        // Two specs, second one fails — verdict mentions the
429        // second one specifically.
430        let s1 = parse_spec("spec always { forall x :: Int : x == x }").unwrap();
431        let s2 = parse_spec("spec never { forall x :: Int : x != x }").unwrap();
432        let v = evaluate_gate(&[s1, s2], &b([("x", Value::Int(0))]), "");
433        match v {
434            GateVerdict::Deny { spec_name, .. } => assert_eq!(spec_name, "never"),
435            other => panic!("expected Deny on `never`, got {other:?}"),
436        }
437    }
438
439    #[test]
440    fn missing_binding_is_inconclusive_not_panic() {
441        // An action that omits a bound the spec needs — surface
442        // as Inconclusive so the caller can fix the gate harness
443        // rather than crash.
444        let spec = parse_spec("spec needs_x { forall x :: Int : x > 0 }").unwrap();
445        let v = evaluate_gate(&[spec], &b([]), "");
446        match v {
447            GateVerdict::Inconclusive { reason, .. } => {
448                assert!(reason.contains("unbound spec var"),
449                    "expected unbound-var error, got: {reason}");
450            }
451            other => panic!("expected Inconclusive, got {other:?}"),
452        }
453    }
454
455    #[test]
456    fn grid_budget_phase1_spec() {
457        // Headline soft Phase 1 spec: site grid load (active +
458        // scheduled + delta) must not exceed budget.
459        let spec = parse_spec(r#"
460            spec grid_budget {
461              forall active :: Int, scheduled :: Int, delta :: Int, budget :: Int :
462                (active + scheduled + delta) <= budget
463            }
464        "#).unwrap();
465        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
466            ("active", Value::Int(40)),
467            ("scheduled", Value::Int(20)),
468            ("delta", Value::Int(15)),
469            ("budget", Value::Int(100)),
470        ]), "");
471        assert_eq!(allow, GateVerdict::Allow);
472        let deny = evaluate_gate(&[spec], &b([
473            ("active", Value::Int(40)),
474            ("scheduled", Value::Int(20)),
475            ("delta", Value::Int(60)),
476            ("budget", Value::Int(100)),
477        ]), "");
478        assert!(matches!(deny, GateVerdict::Deny { .. }));
479    }
480
481    #[test]
482    fn soc_reserve_phase1_spec() {
483        // Second Phase 1 spec: vehicle projected SoC after
484        // proposed action must not drop below reserve.
485        let spec = parse_spec(r#"
486            spec soc_reserve {
487              forall soc :: Int, draw :: Int, reserve :: Int :
488                (soc - draw) >= reserve
489            }
490        "#).unwrap();
491        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
492            ("soc", Value::Int(80)),
493            ("draw", Value::Int(20)),
494            ("reserve", Value::Int(40)),
495        ]), "");
496        assert_eq!(allow, GateVerdict::Allow);
497        let deny = evaluate_gate(&[spec], &b([
498            ("soc", Value::Int(50)),
499            ("draw", Value::Int(20)),
500            ("reserve", Value::Int(40)),
501        ]), "");
502        assert!(matches!(deny, GateVerdict::Deny { .. }));
503    }
504
505    #[test]
506    fn gate_is_fast_enough_for_synchronous_use() {
507        // Issue calls for single-digit ms per verdict on Phase 1's
508        // small spec set. We measure 1k iterations and assert the
509        // average is comfortably under that — the headroom matters
510        // because CI runners are slower than local hardware.
511        let s1 = parse_spec(r#"
512            spec grid_budget {
513              forall active :: Int, scheduled :: Int, delta :: Int, budget :: Int :
514                (active + scheduled + delta) <= budget
515            }
516        "#).unwrap();
517        let s2 = parse_spec(r#"
518            spec soc_reserve {
519              forall soc :: Int, draw :: Int, reserve :: Int :
520                (soc - draw) >= reserve
521            }
522        "#).unwrap();
523        let bindings = b([
524            ("active", Value::Int(40)),
525            ("scheduled", Value::Int(20)),
526            ("delta", Value::Int(15)),
527            ("budget", Value::Int(100)),
528            ("soc", Value::Int(80)),
529            ("draw", Value::Int(20)),
530            ("reserve", Value::Int(40)),
531        ]);
532        let prog = parse_source("").unwrap();
533        let stages = lex_ast::canonicalize_program(&prog);
534        let bc = compile_program(&stages);
535
536        let n = 1000;
537        let start = std::time::Instant::now();
538        for _ in 0..n {
539            let v = evaluate_gate_compiled(&[s1.clone(), s2.clone()], &bindings, &bc);
540            assert_eq!(v, GateVerdict::Allow);
541        }
542        let elapsed = start.elapsed();
543        let per_call_us = elapsed.as_micros() / n as u128;
544        assert!(per_call_us < 5_000,
545            "per-gate verdict should be under 5ms; got {per_call_us}μs");
546    }
547
548    // ---- #199: optional tracer hook -----------------------------
549
550    /// Minimal Tracer that records every enter_call name into a
551    /// shared Vec. Avoids depending on lex-trace; soft-agent's
552    /// real wiring uses `lex_trace::Recorder` + `Handle::clone`.
553    struct CallRecorder {
554        captured: std::sync::Arc<std::sync::Mutex<Vec<String>>>,
555    }
556    impl lex_bytecode::vm::Tracer for CallRecorder {
557        fn enter_call(&mut self, _node_id: &str, name: &str, _args: &[Value]) {
558            self.captured.lock().unwrap().push(name.to_string());
559        }
560        fn enter_effect(&mut self, _: &str, _: &str, _: &str, _: &[Value]) {}
561        fn exit_ok(&mut self, _: &Value) {}
562        fn exit_err(&mut self, _: &str) {}
563        fn exit_call_tail(&mut self) {}
564        fn override_effect(&mut self, _: &str) -> Option<Value> { None }
565    }
566
567    #[test]
568    fn traced_gate_captures_nested_call_events() {
569        // Spec body calls `under_budget`, which itself calls
570        // `projected_load` and `budget_total`. Without the tracer
571        // hook, only the top-level Lex call appears in any
572        // recorder; with it, the nested helpers do too.
573        let host_src = r#"
574            fn projected_load(active :: Int, delta :: Int) -> Int {
575              active + delta
576            }
577            fn budget_total(budget :: Int, headroom :: Int) -> Int {
578              budget + headroom
579            }
580            fn under_budget(active :: Int, delta :: Int, budget :: Int, headroom :: Int) -> Bool {
581              projected_load(active, delta) <= budget_total(budget, headroom)
582            }
583        "#;
584        let prog = parse_source(host_src).unwrap();
585        let stages = lex_ast::canonicalize_program(&prog);
586        let bc = compile_program(&stages);
587
588        let spec = parse_spec(r#"
589            spec gated_budget {
590              forall active :: Int, delta :: Int, budget :: Int, headroom :: Int :
591                under_budget(active, delta, budget, headroom)
592            }
593        "#).unwrap();
594        let bindings = b([
595            ("active", Value::Int(40)),
596            ("delta", Value::Int(15)),
597            ("budget", Value::Int(60)),
598            ("headroom", Value::Int(0)),
599        ]);
600
601        let captured = std::sync::Arc::new(std::sync::Mutex::new(Vec::<String>::new()));
602        let captured_for_factory = std::sync::Arc::clone(&captured);
603        let v = evaluate_gate_compiled_traced(
604            std::slice::from_ref(&spec),
605            &bindings,
606            &bc,
607            move || Box::new(CallRecorder {
608                captured: std::sync::Arc::clone(&captured_for_factory),
609            }),
610        );
611        assert_eq!(v, GateVerdict::Allow);
612
613        let calls = captured.lock().unwrap();
614        // The Vm fires `enter_call` for sub-calls executed inside
615        // the entry function's body, not for the host-driven
616        // `Vm::call("under_budget", ...)` itself — that's the
617        // host's contract. The point of the tracer hook is that
618        // these *nested* helpers are visible at all; pre-#199
619        // they were entirely opaque to the gate's recorder.
620        for expected in ["projected_load", "budget_total"] {
621            assert!(calls.iter().any(|c| c == expected),
622                "expected `{expected}` in captured calls; got {:?}", *calls);
623        }
624    }
625
626    #[test]
627    fn untraced_gate_path_unchanged() {
628        // The existing `evaluate_gate_compiled` API stays unaffected
629        // by #199: same signature, same behavior. Pin this so future
630        // refactors of the inner factory threading don't quietly
631        // shift the public contract.
632        let spec = parse_spec("spec ok { forall x :: Int : x + 1 > x }").unwrap();
633        let v = evaluate_gate_compiled(
634            std::slice::from_ref(&spec),
635            &b([("x", Value::Int(5))]),
636            &compile_program(&lex_ast::canonicalize_program(&parse_source("").unwrap())),
637        );
638        assert_eq!(v, GateVerdict::Allow);
639    }
640
641    // ---- #208: record-typed bindings + field access ------------------
642
643    /// Build a `Value::Record` from `(field, value)` pairs.
644    fn rec(fields: &[(&str, Value)]) -> Value {
645        let mut m = indexmap::IndexMap::new();
646        for (k, v) in fields {
647            m.insert((*k).into(), v.clone());
648        }
649        Value::Record(m)
650    }
651
652    #[test]
653    fn record_quantifier_type_parses() {
654        // The header type uses the new record syntax. The body
655        // doesn't have to use it — confirms the parser accepts the
656        // `{ name :: Ty, ... }` shape independently of how the spec
657        // body references the binding.
658        let spec = parse_spec(r#"
659            spec session_ok {
660              forall s :: { used :: Int, ceiling :: Int } : true
661            }
662        "#).unwrap();
663        let v = evaluate_gate(&[spec], &b([
664            ("s", rec(&[("used", Value::Int(0)), ("ceiling", Value::Int(100))])),
665        ]), "");
666        assert_eq!(v, GateVerdict::Allow);
667    }
668
669    #[test]
670    fn field_access_drills_into_record_value() {
671        // The headline #208 case: spec quantifies a record-shaped
672        // binding *and* references its fields directly. Pre-#208
673        // soft-agent had to flatten this via BindingsFn.
674        let spec = parse_spec(r#"
675            spec budget_ok {
676              forall s :: { used :: Int, ceiling :: Int } :
677                s.used <= s.ceiling
678            }
679        "#).unwrap();
680        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
681            ("s", rec(&[("used", Value::Int(40)), ("ceiling", Value::Int(100))])),
682        ]), "");
683        assert_eq!(allow, GateVerdict::Allow);
684        let deny = evaluate_gate(&[spec], &b([
685            ("s", rec(&[("used", Value::Int(120)), ("ceiling", Value::Int(100))])),
686        ]), "");
687        assert!(matches!(deny, GateVerdict::Deny { .. }));
688    }
689
690    #[test]
691    fn nested_record_field_access_works() {
692        // `s.charge.power_drawn` — chained field access. Mirrors the
693        // structured-state pattern that motivated the issue (see the
694        // "active sessions, station.power_drawn ≤ station.budget"
695        // example in #208's background).
696        let spec = parse_spec(r#"
697            spec station_ok {
698              forall s :: { charge :: { power_drawn :: Int, budget :: Int } } :
699                s.charge.power_drawn <= s.charge.budget
700            }
701        "#).unwrap();
702        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
703            ("s", rec(&[("charge", rec(&[
704                ("power_drawn", Value::Int(50)),
705                ("budget", Value::Int(80)),
706            ]))])),
707        ]), "");
708        assert_eq!(allow, GateVerdict::Allow);
709    }
710
711    #[test]
712    fn missing_field_is_inconclusive_not_panic() {
713        // Spec references `s.budget` but the binding has only `s.used`.
714        // This is an agent/spec mismatch; surface as Inconclusive with a
715        // diagnostic listing the available fields.
716        let spec = parse_spec(r#"
717            spec needs_budget {
718              forall s :: { used :: Int, budget :: Int } : s.used <= s.budget
719            }
720        "#).unwrap();
721        let v = evaluate_gate(&[spec], &b([
722            ("s", rec(&[("used", Value::Int(40))])),
723        ]), "");
724        match v {
725            GateVerdict::Inconclusive { reason, .. } => {
726                assert!(reason.contains("field `budget`"),
727                    "reason should name the missing field; got: {reason}");
728            }
729            other => panic!("expected Inconclusive, got {other:?}"),
730        }
731    }
732
733    #[test]
734    fn field_access_on_non_record_is_inconclusive() {
735        // Catches the "spec author forgot the value was scalar" case.
736        let spec = parse_spec(r#"
737            spec wrong_shape {
738              forall x :: Int : x.used > 0
739            }
740        "#).unwrap();
741        let v = evaluate_gate(&[spec], &b([("x", Value::Int(40))]), "");
742        match v {
743            GateVerdict::Inconclusive { reason, .. } => {
744                assert!(reason.contains("non-record"),
745                    "reason should call out non-record; got: {reason}");
746            }
747            other => panic!("expected Inconclusive, got {other:?}"),
748        }
749    }
750
751    // ---- #208 slice 2: list-typed bindings ---------------------------
752
753    /// Build a `Value::List` from a slice of values.
754    fn lst(items: &[Value]) -> Value {
755        Value::List(items.to_vec())
756    }
757
758    #[test]
759    fn list_quantifier_type_parses() {
760        let spec = parse_spec(r#"
761            spec ok {
762              forall xs :: List[Int] : true
763            }
764        "#).unwrap();
765        let v = evaluate_gate(&[spec], &b([
766            ("xs", lst(&[Value::Int(1), Value::Int(2)])),
767        ]), "");
768        assert_eq!(v, GateVerdict::Allow);
769    }
770
771    #[test]
772    fn length_builtin_returns_list_length() {
773        let spec = parse_spec(r#"
774            spec at_least_one {
775              forall xs :: List[Int] : length(xs) > 0
776            }
777        "#).unwrap();
778        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
779            ("xs", lst(&[Value::Int(7)])),
780        ]), "");
781        assert_eq!(allow, GateVerdict::Allow);
782        let deny = evaluate_gate(&[spec], &b([
783            ("xs", lst(&[])),
784        ]), "");
785        assert!(matches!(deny, GateVerdict::Deny { .. }));
786    }
787
788    #[test]
789    fn indexed_access_reads_list_element() {
790        let spec = parse_spec(r#"
791            spec head_positive {
792              forall xs :: List[Int] : xs[0] > 0
793            }
794        "#).unwrap();
795        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
796            ("xs", lst(&[Value::Int(5), Value::Int(10)])),
797        ]), "");
798        assert_eq!(allow, GateVerdict::Allow);
799        let deny = evaluate_gate(&[spec], &b([
800            ("xs", lst(&[Value::Int(0), Value::Int(10)])),
801        ]), "");
802        assert!(matches!(deny, GateVerdict::Deny { .. }));
803    }
804
805    #[test]
806    fn head_and_tail_builtins_work() {
807        // `head(xs) >= length(tail(xs))` — silly but exercises both
808        // builtins together with a length() over the tail.
809        let spec = parse_spec(r#"
810            spec shape {
811              forall xs :: List[Int] :
812                length(xs) > 0 and head(xs) >= length(tail(xs))
813            }
814        "#).unwrap();
815        // [3, 1, 2]: head=3, tail=[1,2] → length 2; 3 >= 2 ✓
816        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
817            ("xs", lst(&[Value::Int(3), Value::Int(1), Value::Int(2)])),
818        ]), "");
819        assert_eq!(allow, GateVerdict::Allow);
820        // [1, 1, 2, 3]: head=1, tail=[1,2,3] → length 3; 1 >= 3 ✗
821        let deny = evaluate_gate(&[spec], &b([
822            ("xs", lst(&[Value::Int(1), Value::Int(1),
823                         Value::Int(2), Value::Int(3)])),
824        ]), "");
825        assert!(matches!(deny, GateVerdict::Deny { .. }));
826    }
827
828    #[test]
829    fn list_of_records_lets_specs_quantify_structured_collections() {
830        // The pattern motivated by the issue's "for every charging
831        // session in active_sessions, station.power_drawn ≤ station.budget"
832        // example. The spec checks the *first* session's invariant —
833        // a per-element forall is slice 3's territory; this slice
834        // verifies the structural plumbing (List of Record + indexed
835        // access + field access) composes.
836        let spec = parse_spec(r#"
837            spec first_session_within_budget {
838              forall sessions :: List[{ power :: Int, budget :: Int }] :
839                length(sessions) == 0 or sessions[0].power <= sessions[0].budget
840            }
841        "#).unwrap();
842        let mut session = indexmap::IndexMap::new();
843        session.insert("power".into(), Value::Int(50));
844        session.insert("budget".into(), Value::Int(80));
845        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
846            ("sessions", Value::List(vec![Value::Record(session.clone())])),
847        ]), "");
848        assert_eq!(allow, GateVerdict::Allow);
849
850        let mut over = indexmap::IndexMap::new();
851        over.insert("power".into(), Value::Int(120));
852        over.insert("budget".into(), Value::Int(80));
853        let deny = evaluate_gate(&[spec], &b([
854            ("sessions", Value::List(vec![Value::Record(over)])),
855        ]), "");
856        assert!(matches!(deny, GateVerdict::Deny { .. }));
857    }
858
859    #[test]
860    fn empty_list_passes_when_predicate_is_vacuous() {
861        // Verifies the `length(xs) == 0 or ...` short-circuit pattern
862        // used to make per-list predicates well-defined on empties.
863        let spec = parse_spec(r#"
864            spec ok_or_empty {
865              forall xs :: List[Int] : length(xs) == 0 or xs[0] > 0
866            }
867        "#).unwrap();
868        let v = evaluate_gate(&[spec], &b([("xs", lst(&[]))]), "");
869        assert_eq!(v, GateVerdict::Allow);
870    }
871
872    #[test]
873    fn out_of_bounds_index_is_inconclusive() {
874        let spec = parse_spec(r#"
875            spec needs_two {
876              forall xs :: List[Int] : xs[1] > 0
877            }
878        "#).unwrap();
879        let v = evaluate_gate(&[spec], &b([
880            ("xs", lst(&[Value::Int(5)])),  // length 1; xs[1] OOB
881        ]), "");
882        match v {
883            GateVerdict::Inconclusive { reason, .. } => {
884                assert!(reason.contains("out of bounds"),
885                    "expected OOB diagnostic; got: {reason}");
886            }
887            other => panic!("expected Inconclusive, got {other:?}"),
888        }
889    }
890
891    #[test]
892    fn head_of_empty_list_is_inconclusive() {
893        let spec = parse_spec(r#"
894            spec head_pos {
895              forall xs :: List[Int] : head(xs) > 0
896            }
897        "#).unwrap();
898        let v = evaluate_gate(&[spec], &b([("xs", lst(&[]))]), "");
899        match v {
900            GateVerdict::Inconclusive { reason, .. } => {
901                assert!(reason.contains("empty list"),
902                    "expected empty-list diagnostic; got: {reason}");
903            }
904            other => panic!("expected Inconclusive, got {other:?}"),
905        }
906    }
907
908    // ---- #208 slice 3: ADT pattern matching --------------------------
909
910    /// Build a `Value::Variant` with the given name and positional args.
911    fn variant(name: &str, args: Vec<Value>) -> Value {
912        Value::Variant { name: name.into(), args }
913    }
914
915    #[test]
916    fn named_type_in_quantifier_parses() {
917        let spec = parse_spec(r#"
918            spec ok {
919              forall msg :: Message : true
920            }
921        "#).unwrap();
922        let v = evaluate_gate(&[spec], &b([
923            ("msg", variant("Heartbeat", vec![])),
924        ]), "");
925        assert_eq!(v, GateVerdict::Allow);
926    }
927
928    #[test]
929    fn match_dispatches_on_variant_name() {
930        // Two arms: Charge(amount) returns amount > 0; Telemetry(_)
931        // is unconditionally true. Wildcard catches the rest.
932        let spec = parse_spec(r#"
933            spec valid_msg {
934              forall msg :: Message :
935                match msg {
936                  Charge(amount) => amount > 0,
937                  Telemetry(payload) => true,
938                  _ => false,
939                }
940            }
941        "#).unwrap();
942        let allow_charge = evaluate_gate(std::slice::from_ref(&spec), &b([
943            ("msg", variant("Charge", vec![Value::Int(50)])),
944        ]), "");
945        assert_eq!(allow_charge, GateVerdict::Allow);
946
947        let deny_negative = evaluate_gate(std::slice::from_ref(&spec), &b([
948            ("msg", variant("Charge", vec![Value::Int(-1)])),
949        ]), "");
950        assert!(matches!(deny_negative, GateVerdict::Deny { .. }));
951
952        let allow_telemetry = evaluate_gate(std::slice::from_ref(&spec), &b([
953            ("msg", variant("Telemetry", vec![Value::Str("ok".into())])),
954        ]), "");
955        assert_eq!(allow_telemetry, GateVerdict::Allow);
956
957        let deny_unknown = evaluate_gate(&[spec], &b([
958            ("msg", variant("UnknownTopic", vec![])),
959        ]), "");
960        assert!(matches!(deny_unknown, GateVerdict::Deny { .. }));
961    }
962
963    #[test]
964    fn variant_pattern_binds_positional_args() {
965        // `Charge(amount, station)` binds two args; the body
966        // references both. Confirms multi-arg variant patterns work.
967        let spec = parse_spec(r#"
968            spec budget_match {
969              forall msg :: Message :
970                match msg {
971                  Charge(amount, budget) => amount <= budget,
972                  _ => true,
973                }
974            }
975        "#).unwrap();
976        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
977            ("msg", variant("Charge", vec![Value::Int(50), Value::Int(100)])),
978        ]), "");
979        assert_eq!(allow, GateVerdict::Allow);
980        let deny = evaluate_gate(&[spec], &b([
981            ("msg", variant("Charge", vec![Value::Int(150), Value::Int(100)])),
982        ]), "");
983        assert!(matches!(deny, GateVerdict::Deny { .. }));
984    }
985
986    #[test]
987    fn variant_arity_mismatch_falls_through_to_next_arm() {
988        // `Charge(x)` (1 arg) doesn't match a `Charge(a, b)` value
989        // (2 args). The wildcard fallback catches it.
990        let spec = parse_spec(r#"
991            spec arity_check {
992              forall msg :: Message :
993                match msg {
994                  Charge(x) => x > 0,
995                  _ => true,
996                }
997            }
998        "#).unwrap();
999        let v = evaluate_gate(&[spec], &b([
1000            ("msg", variant("Charge", vec![Value::Int(1), Value::Int(2)])),
1001        ]), "");
1002        assert_eq!(v, GateVerdict::Allow);
1003    }
1004
1005    #[test]
1006    fn non_exhaustive_match_is_inconclusive() {
1007        // No wildcard, no matching variant — no arm fires.
1008        let spec = parse_spec(r#"
1009            spec only_charge {
1010              forall msg :: Message :
1011                match msg {
1012                  Charge(_) => true,
1013                }
1014            }
1015        "#).unwrap();
1016        let v = evaluate_gate(&[spec], &b([
1017            ("msg", variant("OtherTopic", vec![])),
1018        ]), "");
1019        match v {
1020            GateVerdict::Inconclusive { reason, .. } => {
1021                assert!(reason.contains("non-exhaustive"),
1022                    "expected non-exhaustive diagnostic; got: {reason}");
1023            }
1024            other => panic!("expected Inconclusive, got {other:?}"),
1025        }
1026    }
1027
1028    #[test]
1029    fn nested_match_drills_into_variant_payload_record() {
1030        // `Charge(s)` binds `s`, which is itself a record. The arm
1031        // body uses `s.power <= s.budget` — combines slice 1
1032        // (FieldAccess) with slice 3 (variant binding). Models the
1033        // soft "for every charging session" pattern.
1034        let spec = parse_spec(r#"
1035            spec station_match {
1036              forall msg :: Message :
1037                match msg {
1038                  Charge(s) => s.power <= s.budget,
1039                  _ => true,
1040                }
1041            }
1042        "#).unwrap();
1043        let mut session = indexmap::IndexMap::new();
1044        session.insert("power".into(), Value::Int(60));
1045        session.insert("budget".into(), Value::Int(100));
1046        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
1047            ("msg", variant("Charge", vec![Value::Record(session)])),
1048        ]), "");
1049        assert_eq!(allow, GateVerdict::Allow);
1050
1051        let mut over = indexmap::IndexMap::new();
1052        over.insert("power".into(), Value::Int(160));
1053        over.insert("budget".into(), Value::Int(100));
1054        let deny = evaluate_gate(&[spec], &b([
1055            ("msg", variant("Charge", vec![Value::Record(over)])),
1056        ]), "");
1057        assert!(matches!(deny, GateVerdict::Deny { .. }));
1058    }
1059}