spec_checker/
gate.rs

1//! Spec-checker as a runtime action gate (#186).
2//!
3//! `evaluate_gate` evaluates one or more specs against
4//! caller-supplied bindings (typically `state` + the proposed
5//! action) and returns `Allow` if every spec holds, `Deny` on
6//! the first violation.
7//!
8//! This is a separate evaluation mode from
9//! [`crate::check_spec`]: the latter quantifies over random
10//! inputs to discover counterexamples; the former takes the
11//! inputs *given* and answers a single deterministic verdict.
12//! Specs reuse their existing AST — quantifier names become the
13//! lookup keys for the supplied bindings — so the same spec
14//! that an offline checker proves over random inputs can also
15//! gate one specific action online.
16//!
17//! Trace integration is intentionally not wired here — the
18//! function returns the verdict, and the caller (e.g. an agent
19//! runtime in a downstream crate) records it. Keeps spec-checker
20//! free of a lex-trace dependency.
21
22use crate::ast::{Spec, SpecExpr, SpecOp};
23use indexmap::IndexMap;
24use lex_bytecode::{compile_program, vm::Vm, Value};
25use lex_runtime::{DefaultHandler, Policy};
26use lex_syntax::parse_source;
27use serde::{Deserialize, Serialize};
28
29/// Verdict returned by [`evaluate_gate`].
30#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
31#[serde(tag = "verdict", rename_all = "snake_case")]
32pub enum GateVerdict {
33    Allow,
34    /// Spec returned `false` (action violates an invariant).
35    /// `spec_name` is the name of the offending spec, `reason`
36    /// is human-readable detail (typically the spec name plus
37    /// the relevant bindings).
38    Deny { spec_name: String, reason: String },
39    /// Evaluation failed for a non-spec reason (e.g. the body
40    /// referenced a Lex function whose call errored). Surfaced
41    /// as a separate variant so callers can distinguish "spec
42    /// said no" from "we couldn't tell."
43    Inconclusive { spec_name: String, reason: String },
44}
45
46/// Evaluate every `spec` against `bindings` and return the
47/// first non-Allow verdict (or `Allow` if all pass). `lex_source`
48/// supplies the host program — any `SpecExpr::Call` in a spec's
49/// body resolves to a function in this program.
50///
51/// Designed for synchronous per-action use. The Lex program is
52/// type-checked and compiled on each call; callers that gate at
53/// high frequency should prefer [`evaluate_gate_compiled`].
54pub fn evaluate_gate(
55    specs: &[Spec],
56    bindings: &IndexMap<String, Value>,
57    lex_source: &str,
58) -> GateVerdict {
59    let prog = match parse_source(lex_source) {
60        Ok(p) => p,
61        Err(e) => return GateVerdict::Inconclusive {
62            spec_name: "<parse>".into(),
63            reason: format!("parse: {e}"),
64        },
65    };
66    let stages = lex_ast::canonicalize_program(&prog);
67    if let Err(errs) = lex_types::check_program(&stages) {
68        return GateVerdict::Inconclusive {
69            spec_name: "<typecheck>".into(),
70            reason: format!("typecheck: {errs:?}"),
71        };
72    }
73    let bc = compile_program(&stages);
74    evaluate_gate_compiled(specs, bindings, &bc)
75}
76
77/// Same as [`evaluate_gate`] but takes already-compiled
78/// bytecode. Use when gating at high frequency: compile the
79/// program once, evaluate many actions against it.
80pub fn evaluate_gate_compiled(
81    specs: &[Spec],
82    bindings: &IndexMap<String, Value>,
83    bc: &lex_bytecode::Program,
84) -> GateVerdict {
85    evaluate_gate_compiled_inner(specs, bindings, bc, None)
86}
87
88/// Like [`evaluate_gate_compiled`] but additionally threads a
89/// caller-supplied tracer into every Vm the spec body spins up
90/// for [`SpecExpr::Call`] (#199).
91///
92/// `new_tracer` is called once per host-helper invocation and
93/// must produce a fresh `Box<dyn Tracer>` for each new `Vm`.
94/// Multiple tracers can share state — typically by closing over
95/// a [`lex_trace::Handle`] and cloning it inside the closure —
96/// so the resulting trace tree captures the spec body's call
97/// graph (e.g. `under_budget → projected_load + budget_total`)
98/// alongside the rest of the agent's run.
99///
100/// Existing callers of [`evaluate_gate`] / [`evaluate_gate_compiled`]
101/// stay unchanged; this is purely additive.
102pub fn evaluate_gate_compiled_traced<F>(
103    specs: &[Spec],
104    bindings: &IndexMap<String, Value>,
105    bc: &lex_bytecode::Program,
106    new_tracer: F,
107) -> GateVerdict
108where
109    F: Fn() -> Box<dyn lex_bytecode::vm::Tracer>,
110{
111    evaluate_gate_compiled_inner(specs, bindings, bc, Some(&new_tracer))
112}
113
114fn evaluate_gate_compiled_inner(
115    specs: &[Spec],
116    bindings: &IndexMap<String, Value>,
117    bc: &lex_bytecode::Program,
118    new_tracer: Option<&dyn Fn() -> Box<dyn lex_bytecode::vm::Tracer>>,
119) -> GateVerdict {
120    let policy = Policy::permissive();
121    for spec in specs {
122        match eval_body(&spec.body, bindings, bc, &policy, new_tracer) {
123            Ok(Value::Bool(true)) => continue,
124            Ok(Value::Bool(false)) => {
125                return GateVerdict::Deny {
126                    spec_name: spec.name.clone(),
127                    reason: format!(
128                        "spec `{}` returned false; bindings: {}",
129                        spec.name,
130                        format_bindings(bindings),
131                    ),
132                };
133            }
134            Ok(other) => return GateVerdict::Inconclusive {
135                spec_name: spec.name.clone(),
136                reason: format!("spec body returned non-bool: {other:?}"),
137            },
138            Err(e) => return GateVerdict::Inconclusive {
139                spec_name: spec.name.clone(),
140                reason: e,
141            },
142        }
143    }
144    GateVerdict::Allow
145}
146
147fn format_bindings(b: &IndexMap<String, Value>) -> String {
148    let mut parts: Vec<String> = Vec::with_capacity(b.len());
149    for (k, v) in b {
150        parts.push(format!("{k}={}", short_value(v)));
151    }
152    parts.join(", ")
153}
154
155fn short_value(v: &Value) -> String {
156    match v {
157        Value::Int(i) => format!("{i}"),
158        Value::Float(f) => format!("{f}"),
159        Value::Bool(b) => format!("{b}"),
160        Value::Str(s) => format!("\"{}\"", s.chars().take(40).collect::<String>()),
161        other => format!("{other:?}"),
162    }
163}
164
165/// Evaluate a `SpecExpr` against caller-supplied `bindings`.
166/// Mirrors `checker::eval` but kept separate so the gate path
167/// doesn't have to thread random-generation state.
168///
169/// `new_tracer`, when present, is invoked once per
170/// `SpecExpr::Call` and the resulting Tracer is attached to
171/// the Vm before running the host helper. The factory shape
172/// (rather than a single `Box<dyn Tracer>`) is what lets
173/// multiple sibling calls all flow into the same caller-side
174/// recorder via cloned `Handle`s.
175fn eval_body(
176    e: &SpecExpr,
177    bindings: &IndexMap<String, Value>,
178    bc: &lex_bytecode::Program,
179    policy: &Policy,
180    new_tracer: Option<&dyn Fn() -> Box<dyn lex_bytecode::vm::Tracer>>,
181) -> Result<Value, String> {
182    match e {
183        SpecExpr::IntLit { value } => Ok(Value::Int(*value)),
184        SpecExpr::FloatLit { value } => Ok(Value::Float(*value)),
185        SpecExpr::BoolLit { value } => Ok(Value::Bool(*value)),
186        SpecExpr::StrLit { value } => Ok(Value::Str(value.clone())),
187        SpecExpr::Var { name } => bindings.get(name).cloned()
188            .ok_or_else(|| format!("unbound spec var `{name}` (provide via gate bindings)")),
189        SpecExpr::Let { name, value, body } => {
190            let v = eval_body(value, bindings, bc, policy, new_tracer)?;
191            let mut next = bindings.clone();
192            next.insert(name.clone(), v);
193            eval_body(body, &next, bc, policy, new_tracer)
194        }
195        SpecExpr::Not { expr } => match eval_body(expr, bindings, bc, policy, new_tracer)? {
196            Value::Bool(b) => Ok(Value::Bool(!b)),
197            other => Err(format!("not on non-bool: {other:?}")),
198        },
199        SpecExpr::BinOp { op, lhs, rhs } => {
200            let a = eval_body(lhs, bindings, bc, policy, new_tracer)?;
201            let b = eval_body(rhs, bindings, bc, policy, new_tracer)?;
202            apply_binop(*op, a, b)
203        }
204        SpecExpr::Call { func, args } => {
205            let mut argv = Vec::new();
206            for a in args { argv.push(eval_body(a, bindings, bc, policy, new_tracer)?); }
207            let handler = DefaultHandler::new(policy.clone());
208            let mut vm = Vm::with_handler(bc, Box::new(handler));
209            if let Some(make_tracer) = new_tracer {
210                vm.set_tracer(make_tracer());
211            }
212            vm.call(func, argv).map_err(|e| format!("call `{func}`: {e}"))
213        }
214    }
215}
216
217fn apply_binop(op: SpecOp, a: Value, b: Value) -> Result<Value, String> {
218    use SpecOp::*;
219    match (op, &a, &b) {
220        (Add, Value::Int(x), Value::Int(y)) => Ok(Value::Int(x + y)),
221        (Sub, Value::Int(x), Value::Int(y)) => Ok(Value::Int(x - y)),
222        (Mul, Value::Int(x), Value::Int(y)) => Ok(Value::Int(x * y)),
223        (Div, Value::Int(x), Value::Int(y)) if *y != 0 => Ok(Value::Int(x / y)),
224        (Mod, Value::Int(x), Value::Int(y)) if *y != 0 => Ok(Value::Int(x % y)),
225        (Add, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x + y)),
226        (Sub, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x - y)),
227        (Mul, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x * y)),
228        (Div, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x / y)),
229        (Eq, x, y) => Ok(Value::Bool(x == y)),
230        (Neq, x, y) => Ok(Value::Bool(x != y)),
231        (Lt, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x < y)),
232        (Le, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x <= y)),
233        (Gt, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x > y)),
234        (Ge, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x >= y)),
235        (Lt, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x < y)),
236        (Le, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x <= y)),
237        (Gt, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x > y)),
238        (Ge, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x >= y)),
239        (And, Value::Bool(x), Value::Bool(y)) => Ok(Value::Bool(*x && *y)),
240        (Or, Value::Bool(x), Value::Bool(y)) => Ok(Value::Bool(*x || *y)),
241        _ => Err(format!("invalid binop {op:?} on {a:?}, {b:?}")),
242    }
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248    use crate::parser::parse_spec;
249
250    fn b<I: IntoIterator<Item = (&'static str, Value)>>(items: I) -> IndexMap<String, Value> {
251        items.into_iter().map(|(k, v)| (k.to_string(), v)).collect()
252    }
253
254    #[test]
255    fn allow_when_spec_returns_true() {
256        let spec = parse_spec("spec ok { forall x :: Int : x + 1 > x }").unwrap();
257        let v = evaluate_gate(&[spec], &b([("x", Value::Int(5))]), "");
258        assert_eq!(v, GateVerdict::Allow);
259    }
260
261    #[test]
262    fn deny_when_spec_returns_false() {
263        let spec = parse_spec("spec budget { forall used :: Int, delta :: Int : (used + delta) <= 100 }").unwrap();
264        let v = evaluate_gate(
265            &[spec],
266            &b([("used", Value::Int(80)), ("delta", Value::Int(30))]),
267            "",
268        );
269        match v {
270            GateVerdict::Deny { spec_name, reason } => {
271                assert_eq!(spec_name, "budget");
272                assert!(reason.contains("used=80"), "reason should include bindings: {reason}");
273            }
274            other => panic!("expected Deny, got {other:?}"),
275        }
276    }
277
278    #[test]
279    fn first_failing_spec_is_reported() {
280        // Two specs, second one fails — verdict mentions the
281        // second one specifically.
282        let s1 = parse_spec("spec always { forall x :: Int : x == x }").unwrap();
283        let s2 = parse_spec("spec never { forall x :: Int : x != x }").unwrap();
284        let v = evaluate_gate(&[s1, s2], &b([("x", Value::Int(0))]), "");
285        match v {
286            GateVerdict::Deny { spec_name, .. } => assert_eq!(spec_name, "never"),
287            other => panic!("expected Deny on `never`, got {other:?}"),
288        }
289    }
290
291    #[test]
292    fn missing_binding_is_inconclusive_not_panic() {
293        // An action that omits a bound the spec needs — surface
294        // as Inconclusive so the caller can fix the gate harness
295        // rather than crash.
296        let spec = parse_spec("spec needs_x { forall x :: Int : x > 0 }").unwrap();
297        let v = evaluate_gate(&[spec], &b([]), "");
298        match v {
299            GateVerdict::Inconclusive { reason, .. } => {
300                assert!(reason.contains("unbound spec var"),
301                    "expected unbound-var error, got: {reason}");
302            }
303            other => panic!("expected Inconclusive, got {other:?}"),
304        }
305    }
306
307    #[test]
308    fn grid_budget_phase1_spec() {
309        // Headline soft Phase 1 spec: site grid load (active +
310        // scheduled + delta) must not exceed budget.
311        let spec = parse_spec(r#"
312            spec grid_budget {
313              forall active :: Int, scheduled :: Int, delta :: Int, budget :: Int :
314                (active + scheduled + delta) <= budget
315            }
316        "#).unwrap();
317        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
318            ("active", Value::Int(40)),
319            ("scheduled", Value::Int(20)),
320            ("delta", Value::Int(15)),
321            ("budget", Value::Int(100)),
322        ]), "");
323        assert_eq!(allow, GateVerdict::Allow);
324        let deny = evaluate_gate(&[spec], &b([
325            ("active", Value::Int(40)),
326            ("scheduled", Value::Int(20)),
327            ("delta", Value::Int(60)),
328            ("budget", Value::Int(100)),
329        ]), "");
330        assert!(matches!(deny, GateVerdict::Deny { .. }));
331    }
332
333    #[test]
334    fn soc_reserve_phase1_spec() {
335        // Second Phase 1 spec: vehicle projected SoC after
336        // proposed action must not drop below reserve.
337        let spec = parse_spec(r#"
338            spec soc_reserve {
339              forall soc :: Int, draw :: Int, reserve :: Int :
340                (soc - draw) >= reserve
341            }
342        "#).unwrap();
343        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
344            ("soc", Value::Int(80)),
345            ("draw", Value::Int(20)),
346            ("reserve", Value::Int(40)),
347        ]), "");
348        assert_eq!(allow, GateVerdict::Allow);
349        let deny = evaluate_gate(&[spec], &b([
350            ("soc", Value::Int(50)),
351            ("draw", Value::Int(20)),
352            ("reserve", Value::Int(40)),
353        ]), "");
354        assert!(matches!(deny, GateVerdict::Deny { .. }));
355    }
356
357    #[test]
358    fn gate_is_fast_enough_for_synchronous_use() {
359        // Issue calls for single-digit ms per verdict on Phase 1's
360        // small spec set. We measure 1k iterations and assert the
361        // average is comfortably under that — the headroom matters
362        // because CI runners are slower than local hardware.
363        let s1 = parse_spec(r#"
364            spec grid_budget {
365              forall active :: Int, scheduled :: Int, delta :: Int, budget :: Int :
366                (active + scheduled + delta) <= budget
367            }
368        "#).unwrap();
369        let s2 = parse_spec(r#"
370            spec soc_reserve {
371              forall soc :: Int, draw :: Int, reserve :: Int :
372                (soc - draw) >= reserve
373            }
374        "#).unwrap();
375        let bindings = b([
376            ("active", Value::Int(40)),
377            ("scheduled", Value::Int(20)),
378            ("delta", Value::Int(15)),
379            ("budget", Value::Int(100)),
380            ("soc", Value::Int(80)),
381            ("draw", Value::Int(20)),
382            ("reserve", Value::Int(40)),
383        ]);
384        let prog = parse_source("").unwrap();
385        let stages = lex_ast::canonicalize_program(&prog);
386        let bc = compile_program(&stages);
387
388        let n = 1000;
389        let start = std::time::Instant::now();
390        for _ in 0..n {
391            let v = evaluate_gate_compiled(&[s1.clone(), s2.clone()], &bindings, &bc);
392            assert_eq!(v, GateVerdict::Allow);
393        }
394        let elapsed = start.elapsed();
395        let per_call_us = elapsed.as_micros() / n as u128;
396        assert!(per_call_us < 5_000,
397            "per-gate verdict should be under 5ms; got {per_call_us}μs");
398    }
399
400    // ---- #199: optional tracer hook -----------------------------
401
402    /// Minimal Tracer that records every enter_call name into a
403    /// shared Vec. Avoids depending on lex-trace; soft-agent's
404    /// real wiring uses `lex_trace::Recorder` + `Handle::clone`.
405    struct CallRecorder {
406        captured: std::sync::Arc<std::sync::Mutex<Vec<String>>>,
407    }
408    impl lex_bytecode::vm::Tracer for CallRecorder {
409        fn enter_call(&mut self, _node_id: &str, name: &str, _args: &[Value]) {
410            self.captured.lock().unwrap().push(name.to_string());
411        }
412        fn enter_effect(&mut self, _: &str, _: &str, _: &str, _: &[Value]) {}
413        fn exit_ok(&mut self, _: &Value) {}
414        fn exit_err(&mut self, _: &str) {}
415        fn exit_call_tail(&mut self) {}
416        fn override_effect(&mut self, _: &str) -> Option<Value> { None }
417    }
418
419    #[test]
420    fn traced_gate_captures_nested_call_events() {
421        // Spec body calls `under_budget`, which itself calls
422        // `projected_load` and `budget_total`. Without the tracer
423        // hook, only the top-level Lex call appears in any
424        // recorder; with it, the nested helpers do too.
425        let host_src = r#"
426            fn projected_load(active :: Int, delta :: Int) -> Int {
427              active + delta
428            }
429            fn budget_total(budget :: Int, headroom :: Int) -> Int {
430              budget + headroom
431            }
432            fn under_budget(active :: Int, delta :: Int, budget :: Int, headroom :: Int) -> Bool {
433              projected_load(active, delta) <= budget_total(budget, headroom)
434            }
435        "#;
436        let prog = parse_source(host_src).unwrap();
437        let stages = lex_ast::canonicalize_program(&prog);
438        let bc = compile_program(&stages);
439
440        let spec = parse_spec(r#"
441            spec gated_budget {
442              forall active :: Int, delta :: Int, budget :: Int, headroom :: Int :
443                under_budget(active, delta, budget, headroom)
444            }
445        "#).unwrap();
446        let bindings = b([
447            ("active", Value::Int(40)),
448            ("delta", Value::Int(15)),
449            ("budget", Value::Int(60)),
450            ("headroom", Value::Int(0)),
451        ]);
452
453        let captured = std::sync::Arc::new(std::sync::Mutex::new(Vec::<String>::new()));
454        let captured_for_factory = std::sync::Arc::clone(&captured);
455        let v = evaluate_gate_compiled_traced(
456            std::slice::from_ref(&spec),
457            &bindings,
458            &bc,
459            move || Box::new(CallRecorder {
460                captured: std::sync::Arc::clone(&captured_for_factory),
461            }),
462        );
463        assert_eq!(v, GateVerdict::Allow);
464
465        let calls = captured.lock().unwrap();
466        // The Vm fires `enter_call` for sub-calls executed inside
467        // the entry function's body, not for the host-driven
468        // `Vm::call("under_budget", ...)` itself — that's the
469        // host's contract. The point of the tracer hook is that
470        // these *nested* helpers are visible at all; pre-#199
471        // they were entirely opaque to the gate's recorder.
472        for expected in ["projected_load", "budget_total"] {
473            assert!(calls.iter().any(|c| c == expected),
474                "expected `{expected}` in captured calls; got {:?}", *calls);
475        }
476    }
477
478    #[test]
479    fn untraced_gate_path_unchanged() {
480        // The existing `evaluate_gate_compiled` API stays unaffected
481        // by #199: same signature, same behavior. Pin this so future
482        // refactors of the inner factory threading don't quietly
483        // shift the public contract.
484        let spec = parse_spec("spec ok { forall x :: Int : x + 1 > x }").unwrap();
485        let v = evaluate_gate_compiled(
486            std::slice::from_ref(&spec),
487            &b([("x", Value::Int(5))]),
488            &compile_program(&lex_ast::canonicalize_program(&parse_source("").unwrap())),
489        );
490        assert_eq!(v, GateVerdict::Allow);
491    }
492}
spec_checker/gate.rs

spec_checker/
gate.rs