Skip to main content

spec_checker/
gate.rs

1//! Spec-checker as a runtime action gate (#186).
2//!
3//! `evaluate_gate` evaluates one or more specs against
4//! caller-supplied bindings (typically `state` + the proposed
5//! action) and returns `Allow` if every spec holds, `Deny` on
6//! the first violation.
7//!
8//! This is a separate evaluation mode from
9//! [`crate::check_spec`]: the latter quantifies over random
10//! inputs to discover counterexamples; the former takes the
11//! inputs *given* and answers a single deterministic verdict.
12//! Specs reuse their existing AST — quantifier names become the
13//! lookup keys for the supplied bindings — so the same spec
14//! that an offline checker proves over random inputs can also
15//! gate one specific action online.
16//!
17//! Trace integration is intentionally not wired here — the
18//! function returns the verdict, and the caller (e.g. an agent
19//! runtime in a downstream crate) records it. Keeps spec-checker
20//! free of a lex-trace dependency.
21
22use crate::ast::{Spec, SpecExpr, SpecOp};
23use indexmap::IndexMap;
24use lex_bytecode::{compile_program, vm::Vm, Value};
25use lex_runtime::{DefaultHandler, Policy};
26use lex_syntax::parse_source;
27use serde::{Deserialize, Serialize};
28
29/// Verdict returned by [`evaluate_gate`].
30#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
31#[serde(tag = "verdict", rename_all = "snake_case")]
32pub enum GateVerdict {
33    Allow,
34    /// Spec returned `false` (action violates an invariant).
35    /// `spec_name` is the name of the offending spec, `reason`
36    /// is human-readable detail (typically the spec name plus
37    /// the relevant bindings).
38    Deny { spec_name: String, reason: String },
39    /// Evaluation failed for a non-spec reason (e.g. the body
40    /// referenced a Lex function whose call errored). Surfaced
41    /// as a separate variant so callers can distinguish "spec
42    /// said no" from "we couldn't tell."
43    Inconclusive { spec_name: String, reason: String },
44}
45
46/// Evaluate every `spec` against `bindings` and return the
47/// first non-Allow verdict (or `Allow` if all pass). `lex_source`
48/// supplies the host program — any `SpecExpr::Call` in a spec's
49/// body resolves to a function in this program.
50///
51/// Designed for synchronous per-action use. The Lex program is
52/// type-checked and compiled on each call; callers that gate at
53/// high frequency should prefer [`evaluate_gate_compiled`].
54pub fn evaluate_gate(
55    specs: &[Spec],
56    bindings: &IndexMap<String, Value>,
57    lex_source: &str,
58) -> GateVerdict {
59    let prog = match parse_source(lex_source) {
60        Ok(p) => p,
61        Err(e) => return GateVerdict::Inconclusive {
62            spec_name: "<parse>".into(),
63            reason: format!("parse: {e}"),
64        },
65    };
66    let stages = lex_ast::canonicalize_program(&prog);
67    if let Err(errs) = lex_types::check_program(&stages) {
68        return GateVerdict::Inconclusive {
69            spec_name: "<typecheck>".into(),
70            reason: format!("typecheck: {errs:?}"),
71        };
72    }
73    let bc = compile_program(&stages);
74    evaluate_gate_compiled(specs, bindings, &bc)
75}
76
77/// Same as [`evaluate_gate`] but takes already-compiled
78/// bytecode. Use when gating at high frequency: compile the
79/// program once, evaluate many actions against it.
80pub fn evaluate_gate_compiled(
81    specs: &[Spec],
82    bindings: &IndexMap<String, Value>,
83    bc: &lex_bytecode::Program,
84) -> GateVerdict {
85    let policy = Policy::permissive();
86    for spec in specs {
87        match eval_body(&spec.body, bindings, bc, &policy) {
88            Ok(Value::Bool(true)) => continue,
89            Ok(Value::Bool(false)) => {
90                return GateVerdict::Deny {
91                    spec_name: spec.name.clone(),
92                    reason: format!(
93                        "spec `{}` returned false; bindings: {}",
94                        spec.name,
95                        format_bindings(bindings),
96                    ),
97                };
98            }
99            Ok(other) => return GateVerdict::Inconclusive {
100                spec_name: spec.name.clone(),
101                reason: format!("spec body returned non-bool: {other:?}"),
102            },
103            Err(e) => return GateVerdict::Inconclusive {
104                spec_name: spec.name.clone(),
105                reason: e,
106            },
107        }
108    }
109    GateVerdict::Allow
110}
111
112fn format_bindings(b: &IndexMap<String, Value>) -> String {
113    let mut parts: Vec<String> = Vec::with_capacity(b.len());
114    for (k, v) in b {
115        parts.push(format!("{k}={}", short_value(v)));
116    }
117    parts.join(", ")
118}
119
120fn short_value(v: &Value) -> String {
121    match v {
122        Value::Int(i) => format!("{i}"),
123        Value::Float(f) => format!("{f}"),
124        Value::Bool(b) => format!("{b}"),
125        Value::Str(s) => format!("\"{}\"", s.chars().take(40).collect::<String>()),
126        other => format!("{other:?}"),
127    }
128}
129
130/// Evaluate a `SpecExpr` against caller-supplied `bindings`.
131/// Mirrors `checker::eval` but kept separate so the gate path
132/// doesn't have to thread random-generation state.
133fn eval_body(
134    e: &SpecExpr,
135    bindings: &IndexMap<String, Value>,
136    bc: &lex_bytecode::Program,
137    policy: &Policy,
138) -> Result<Value, String> {
139    match e {
140        SpecExpr::IntLit { value } => Ok(Value::Int(*value)),
141        SpecExpr::FloatLit { value } => Ok(Value::Float(*value)),
142        SpecExpr::BoolLit { value } => Ok(Value::Bool(*value)),
143        SpecExpr::StrLit { value } => Ok(Value::Str(value.clone())),
144        SpecExpr::Var { name } => bindings.get(name).cloned()
145            .ok_or_else(|| format!("unbound spec var `{name}` (provide via gate bindings)")),
146        SpecExpr::Let { name, value, body } => {
147            let v = eval_body(value, bindings, bc, policy)?;
148            let mut next = bindings.clone();
149            next.insert(name.clone(), v);
150            eval_body(body, &next, bc, policy)
151        }
152        SpecExpr::Not { expr } => match eval_body(expr, bindings, bc, policy)? {
153            Value::Bool(b) => Ok(Value::Bool(!b)),
154            other => Err(format!("not on non-bool: {other:?}")),
155        },
156        SpecExpr::BinOp { op, lhs, rhs } => {
157            let a = eval_body(lhs, bindings, bc, policy)?;
158            let b = eval_body(rhs, bindings, bc, policy)?;
159            apply_binop(*op, a, b)
160        }
161        SpecExpr::Call { func, args } => {
162            let mut argv = Vec::new();
163            for a in args { argv.push(eval_body(a, bindings, bc, policy)?); }
164            let handler = DefaultHandler::new(policy.clone());
165            let mut vm = Vm::with_handler(bc, Box::new(handler));
166            vm.call(func, argv).map_err(|e| format!("call `{func}`: {e}"))
167        }
168    }
169}
170
171fn apply_binop(op: SpecOp, a: Value, b: Value) -> Result<Value, String> {
172    use SpecOp::*;
173    match (op, &a, &b) {
174        (Add, Value::Int(x), Value::Int(y)) => Ok(Value::Int(x + y)),
175        (Sub, Value::Int(x), Value::Int(y)) => Ok(Value::Int(x - y)),
176        (Mul, Value::Int(x), Value::Int(y)) => Ok(Value::Int(x * y)),
177        (Div, Value::Int(x), Value::Int(y)) if *y != 0 => Ok(Value::Int(x / y)),
178        (Mod, Value::Int(x), Value::Int(y)) if *y != 0 => Ok(Value::Int(x % y)),
179        (Add, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x + y)),
180        (Sub, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x - y)),
181        (Mul, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x * y)),
182        (Div, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x / y)),
183        (Eq, x, y) => Ok(Value::Bool(x == y)),
184        (Neq, x, y) => Ok(Value::Bool(x != y)),
185        (Lt, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x < y)),
186        (Le, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x <= y)),
187        (Gt, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x > y)),
188        (Ge, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x >= y)),
189        (Lt, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x < y)),
190        (Le, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x <= y)),
191        (Gt, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x > y)),
192        (Ge, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x >= y)),
193        (And, Value::Bool(x), Value::Bool(y)) => Ok(Value::Bool(*x && *y)),
194        (Or, Value::Bool(x), Value::Bool(y)) => Ok(Value::Bool(*x || *y)),
195        _ => Err(format!("invalid binop {op:?} on {a:?}, {b:?}")),
196    }
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use crate::parser::parse_spec;
203
204    fn b<I: IntoIterator<Item = (&'static str, Value)>>(items: I) -> IndexMap<String, Value> {
205        items.into_iter().map(|(k, v)| (k.to_string(), v)).collect()
206    }
207
208    #[test]
209    fn allow_when_spec_returns_true() {
210        let spec = parse_spec("spec ok { forall x :: Int : x + 1 > x }").unwrap();
211        let v = evaluate_gate(&[spec], &b([("x", Value::Int(5))]), "");
212        assert_eq!(v, GateVerdict::Allow);
213    }
214
215    #[test]
216    fn deny_when_spec_returns_false() {
217        let spec = parse_spec("spec budget { forall used :: Int, delta :: Int : (used + delta) <= 100 }").unwrap();
218        let v = evaluate_gate(
219            &[spec],
220            &b([("used", Value::Int(80)), ("delta", Value::Int(30))]),
221            "",
222        );
223        match v {
224            GateVerdict::Deny { spec_name, reason } => {
225                assert_eq!(spec_name, "budget");
226                assert!(reason.contains("used=80"), "reason should include bindings: {reason}");
227            }
228            other => panic!("expected Deny, got {other:?}"),
229        }
230    }
231
232    #[test]
233    fn first_failing_spec_is_reported() {
234        // Two specs, second one fails — verdict mentions the
235        // second one specifically.
236        let s1 = parse_spec("spec always { forall x :: Int : x == x }").unwrap();
237        let s2 = parse_spec("spec never { forall x :: Int : x != x }").unwrap();
238        let v = evaluate_gate(&[s1, s2], &b([("x", Value::Int(0))]), "");
239        match v {
240            GateVerdict::Deny { spec_name, .. } => assert_eq!(spec_name, "never"),
241            other => panic!("expected Deny on `never`, got {other:?}"),
242        }
243    }
244
245    #[test]
246    fn missing_binding_is_inconclusive_not_panic() {
247        // An action that omits a bound the spec needs — surface
248        // as Inconclusive so the caller can fix the gate harness
249        // rather than crash.
250        let spec = parse_spec("spec needs_x { forall x :: Int : x > 0 }").unwrap();
251        let v = evaluate_gate(&[spec], &b([]), "");
252        match v {
253            GateVerdict::Inconclusive { reason, .. } => {
254                assert!(reason.contains("unbound spec var"),
255                    "expected unbound-var error, got: {reason}");
256            }
257            other => panic!("expected Inconclusive, got {other:?}"),
258        }
259    }
260
261    #[test]
262    fn grid_budget_phase1_spec() {
263        // Headline soft Phase 1 spec: site grid load (active +
264        // scheduled + delta) must not exceed budget.
265        let spec = parse_spec(r#"
266            spec grid_budget {
267              forall active :: Int, scheduled :: Int, delta :: Int, budget :: Int :
268                (active + scheduled + delta) <= budget
269            }
270        "#).unwrap();
271        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
272            ("active", Value::Int(40)),
273            ("scheduled", Value::Int(20)),
274            ("delta", Value::Int(15)),
275            ("budget", Value::Int(100)),
276        ]), "");
277        assert_eq!(allow, GateVerdict::Allow);
278        let deny = evaluate_gate(&[spec], &b([
279            ("active", Value::Int(40)),
280            ("scheduled", Value::Int(20)),
281            ("delta", Value::Int(60)),
282            ("budget", Value::Int(100)),
283        ]), "");
284        assert!(matches!(deny, GateVerdict::Deny { .. }));
285    }
286
287    #[test]
288    fn soc_reserve_phase1_spec() {
289        // Second Phase 1 spec: vehicle projected SoC after
290        // proposed action must not drop below reserve.
291        let spec = parse_spec(r#"
292            spec soc_reserve {
293              forall soc :: Int, draw :: Int, reserve :: Int :
294                (soc - draw) >= reserve
295            }
296        "#).unwrap();
297        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
298            ("soc", Value::Int(80)),
299            ("draw", Value::Int(20)),
300            ("reserve", Value::Int(40)),
301        ]), "");
302        assert_eq!(allow, GateVerdict::Allow);
303        let deny = evaluate_gate(&[spec], &b([
304            ("soc", Value::Int(50)),
305            ("draw", Value::Int(20)),
306            ("reserve", Value::Int(40)),
307        ]), "");
308        assert!(matches!(deny, GateVerdict::Deny { .. }));
309    }
310
311    #[test]
312    fn gate_is_fast_enough_for_synchronous_use() {
313        // Issue calls for single-digit ms per verdict on Phase 1's
314        // small spec set. We measure 1k iterations and assert the
315        // average is comfortably under that — the headroom matters
316        // because CI runners are slower than local hardware.
317        let s1 = parse_spec(r#"
318            spec grid_budget {
319              forall active :: Int, scheduled :: Int, delta :: Int, budget :: Int :
320                (active + scheduled + delta) <= budget
321            }
322        "#).unwrap();
323        let s2 = parse_spec(r#"
324            spec soc_reserve {
325              forall soc :: Int, draw :: Int, reserve :: Int :
326                (soc - draw) >= reserve
327            }
328        "#).unwrap();
329        let bindings = b([
330            ("active", Value::Int(40)),
331            ("scheduled", Value::Int(20)),
332            ("delta", Value::Int(15)),
333            ("budget", Value::Int(100)),
334            ("soc", Value::Int(80)),
335            ("draw", Value::Int(20)),
336            ("reserve", Value::Int(40)),
337        ]);
338        let prog = parse_source("").unwrap();
339        let stages = lex_ast::canonicalize_program(&prog);
340        let bc = compile_program(&stages);
341
342        let n = 1000;
343        let start = std::time::Instant::now();
344        for _ in 0..n {
345            let v = evaluate_gate_compiled(&[s1.clone(), s2.clone()], &bindings, &bc);
346            assert_eq!(v, GateVerdict::Allow);
347        }
348        let elapsed = start.elapsed();
349        let per_call_us = elapsed.as_micros() / n as u128;
350        assert!(per_call_us < 5_000,
351            "per-gate verdict should be under 5ms; got {per_call_us}μs");
352    }
353}