spec-checker 0.2.2

Property-based and runtime-gate checker for Lex specs.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
//! Spec-checker as a runtime action gate (#186).
//!
//! `evaluate_gate` evaluates one or more specs against
//! caller-supplied bindings (typically `state` + the proposed
//! action) and returns `Allow` if every spec holds, `Deny` on
//! the first violation.
//!
//! This is a separate evaluation mode from
//! [`crate::check_spec`]: the latter quantifies over random
//! inputs to discover counterexamples; the former takes the
//! inputs *given* and answers a single deterministic verdict.
//! Specs reuse their existing AST — quantifier names become the
//! lookup keys for the supplied bindings — so the same spec
//! that an offline checker proves over random inputs can also
//! gate one specific action online.
//!
//! Trace integration is intentionally not wired here — the
//! function returns the verdict, and the caller (e.g. an agent
//! runtime in a downstream crate) records it. Keeps spec-checker
//! free of a lex-trace dependency.

use crate::ast::{Spec, SpecExpr, SpecOp};
use indexmap::IndexMap;
use lex_bytecode::{compile_program, vm::Vm, Value};
use lex_runtime::{DefaultHandler, Policy};
use lex_syntax::parse_source;
use serde::{Deserialize, Serialize};

/// Verdict returned by [`evaluate_gate`].
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(tag = "verdict", rename_all = "snake_case")]
pub enum GateVerdict {
    Allow,
    /// Spec returned `false` (action violates an invariant).
    /// `spec_name` is the name of the offending spec, `reason`
    /// is human-readable detail (typically the spec name plus
    /// the relevant bindings).
    Deny { spec_name: String, reason: String },
    /// Evaluation failed for a non-spec reason (e.g. the body
    /// referenced a Lex function whose call errored). Surfaced
    /// as a separate variant so callers can distinguish "spec
    /// said no" from "we couldn't tell."
    Inconclusive { spec_name: String, reason: String },
}

/// Evaluate every `spec` against `bindings` and return the
/// first non-Allow verdict (or `Allow` if all pass). `lex_source`
/// supplies the host program — any `SpecExpr::Call` in a spec's
/// body resolves to a function in this program.
///
/// Designed for synchronous per-action use. The Lex program is
/// type-checked and compiled on each call; callers that gate at
/// high frequency should prefer [`evaluate_gate_compiled`].
pub fn evaluate_gate(
    specs: &[Spec],
    bindings: &IndexMap<String, Value>,
    lex_source: &str,
) -> GateVerdict {
    let prog = match parse_source(lex_source) {
        Ok(p) => p,
        Err(e) => return GateVerdict::Inconclusive {
            spec_name: "<parse>".into(),
            reason: format!("parse: {e}"),
        },
    };
    let stages = lex_ast::canonicalize_program(&prog);
    if let Err(errs) = lex_types::check_program(&stages) {
        return GateVerdict::Inconclusive {
            spec_name: "<typecheck>".into(),
            reason: format!("typecheck: {errs:?}"),
        };
    }
    let bc = compile_program(&stages);
    evaluate_gate_compiled(specs, bindings, &bc)
}

/// Same as [`evaluate_gate`] but takes already-compiled
/// bytecode. Use when gating at high frequency: compile the
/// program once, evaluate many actions against it.
pub fn evaluate_gate_compiled(
    specs: &[Spec],
    bindings: &IndexMap<String, Value>,
    bc: &lex_bytecode::Program,
) -> GateVerdict {
    evaluate_gate_compiled_inner(specs, bindings, bc, None)
}

/// Like [`evaluate_gate_compiled`] but additionally threads a
/// caller-supplied tracer into every Vm the spec body spins up
/// for [`SpecExpr::Call`] (#199).
///
/// `new_tracer` is called once per host-helper invocation and
/// must produce a fresh `Box<dyn Tracer>` for each new `Vm`.
/// Multiple tracers can share state — typically by closing over
/// a [`lex_trace::Handle`] and cloning it inside the closure —
/// so the resulting trace tree captures the spec body's call
/// graph (e.g. `under_budget → projected_load + budget_total`)
/// alongside the rest of the agent's run.
///
/// Existing callers of [`evaluate_gate`] / [`evaluate_gate_compiled`]
/// stay unchanged; this is purely additive.
pub fn evaluate_gate_compiled_traced<F>(
    specs: &[Spec],
    bindings: &IndexMap<String, Value>,
    bc: &lex_bytecode::Program,
    new_tracer: F,
) -> GateVerdict
where
    F: Fn() -> Box<dyn lex_bytecode::vm::Tracer>,
{
    evaluate_gate_compiled_inner(specs, bindings, bc, Some(&new_tracer))
}

fn evaluate_gate_compiled_inner(
    specs: &[Spec],
    bindings: &IndexMap<String, Value>,
    bc: &lex_bytecode::Program,
    new_tracer: Option<&dyn Fn() -> Box<dyn lex_bytecode::vm::Tracer>>,
) -> GateVerdict {
    let policy = Policy::permissive();
    for spec in specs {
        match eval_body(&spec.body, bindings, bc, &policy, new_tracer) {
            Ok(Value::Bool(true)) => continue,
            Ok(Value::Bool(false)) => {
                return GateVerdict::Deny {
                    spec_name: spec.name.clone(),
                    reason: format!(
                        "spec `{}` returned false; bindings: {}",
                        spec.name,
                        format_bindings(bindings),
                    ),
                };
            }
            Ok(other) => return GateVerdict::Inconclusive {
                spec_name: spec.name.clone(),
                reason: format!("spec body returned non-bool: {other:?}"),
            },
            Err(e) => return GateVerdict::Inconclusive {
                spec_name: spec.name.clone(),
                reason: e,
            },
        }
    }
    GateVerdict::Allow
}

fn format_bindings(b: &IndexMap<String, Value>) -> String {
    let mut parts: Vec<String> = Vec::with_capacity(b.len());
    for (k, v) in b {
        parts.push(format!("{k}={}", short_value(v)));
    }
    parts.join(", ")
}

fn short_value(v: &Value) -> String {
    match v {
        Value::Int(i) => format!("{i}"),
        Value::Float(f) => format!("{f}"),
        Value::Bool(b) => format!("{b}"),
        Value::Str(s) => format!("\"{}\"", s.chars().take(40).collect::<String>()),
        other => format!("{other:?}"),
    }
}

/// Evaluate a `SpecExpr` against caller-supplied `bindings`.
/// Mirrors `checker::eval` but kept separate so the gate path
/// doesn't have to thread random-generation state.
///
/// `new_tracer`, when present, is invoked once per
/// `SpecExpr::Call` and the resulting Tracer is attached to
/// the Vm before running the host helper. The factory shape
/// (rather than a single `Box<dyn Tracer>`) is what lets
/// multiple sibling calls all flow into the same caller-side
/// recorder via cloned `Handle`s.
fn eval_body(
    e: &SpecExpr,
    bindings: &IndexMap<String, Value>,
    bc: &lex_bytecode::Program,
    policy: &Policy,
    new_tracer: Option<&dyn Fn() -> Box<dyn lex_bytecode::vm::Tracer>>,
) -> Result<Value, String> {
    match e {
        SpecExpr::IntLit { value } => Ok(Value::Int(*value)),
        SpecExpr::FloatLit { value } => Ok(Value::Float(*value)),
        SpecExpr::BoolLit { value } => Ok(Value::Bool(*value)),
        SpecExpr::StrLit { value } => Ok(Value::Str(value.clone())),
        SpecExpr::Var { name } => bindings.get(name).cloned()
            .ok_or_else(|| format!("unbound spec var `{name}` (provide via gate bindings)")),
        SpecExpr::Let { name, value, body } => {
            let v = eval_body(value, bindings, bc, policy, new_tracer)?;
            let mut next = bindings.clone();
            next.insert(name.clone(), v);
            eval_body(body, &next, bc, policy, new_tracer)
        }
        SpecExpr::Not { expr } => match eval_body(expr, bindings, bc, policy, new_tracer)? {
            Value::Bool(b) => Ok(Value::Bool(!b)),
            other => Err(format!("not on non-bool: {other:?}")),
        },
        SpecExpr::BinOp { op, lhs, rhs } => {
            let a = eval_body(lhs, bindings, bc, policy, new_tracer)?;
            let b = eval_body(rhs, bindings, bc, policy, new_tracer)?;
            apply_binop(*op, a, b)
        }
        SpecExpr::Call { func, args } => {
            let mut argv = Vec::new();
            for a in args { argv.push(eval_body(a, bindings, bc, policy, new_tracer)?); }
            let handler = DefaultHandler::new(policy.clone());
            let mut vm = Vm::with_handler(bc, Box::new(handler));
            if let Some(make_tracer) = new_tracer {
                vm.set_tracer(make_tracer());
            }
            vm.call(func, argv).map_err(|e| format!("call `{func}`: {e}"))
        }
    }
}

fn apply_binop(op: SpecOp, a: Value, b: Value) -> Result<Value, String> {
    use SpecOp::*;
    match (op, &a, &b) {
        (Add, Value::Int(x), Value::Int(y)) => Ok(Value::Int(x + y)),
        (Sub, Value::Int(x), Value::Int(y)) => Ok(Value::Int(x - y)),
        (Mul, Value::Int(x), Value::Int(y)) => Ok(Value::Int(x * y)),
        (Div, Value::Int(x), Value::Int(y)) if *y != 0 => Ok(Value::Int(x / y)),
        (Mod, Value::Int(x), Value::Int(y)) if *y != 0 => Ok(Value::Int(x % y)),
        (Add, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x + y)),
        (Sub, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x - y)),
        (Mul, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x * y)),
        (Div, Value::Float(x), Value::Float(y)) => Ok(Value::Float(x / y)),
        (Eq, x, y) => Ok(Value::Bool(x == y)),
        (Neq, x, y) => Ok(Value::Bool(x != y)),
        (Lt, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x < y)),
        (Le, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x <= y)),
        (Gt, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x > y)),
        (Ge, Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x >= y)),
        (Lt, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x < y)),
        (Le, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x <= y)),
        (Gt, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x > y)),
        (Ge, Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x >= y)),
        (And, Value::Bool(x), Value::Bool(y)) => Ok(Value::Bool(*x && *y)),
        (Or, Value::Bool(x), Value::Bool(y)) => Ok(Value::Bool(*x || *y)),
        _ => Err(format!("invalid binop {op:?} on {a:?}, {b:?}")),
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::parser::parse_spec;

    fn b<I: IntoIterator<Item = (&'static str, Value)>>(items: I) -> IndexMap<String, Value> {
        items.into_iter().map(|(k, v)| (k.to_string(), v)).collect()
    }

    #[test]
    fn allow_when_spec_returns_true() {
        let spec = parse_spec("spec ok { forall x :: Int : x + 1 > x }").unwrap();
        let v = evaluate_gate(&[spec], &b([("x", Value::Int(5))]), "");
        assert_eq!(v, GateVerdict::Allow);
    }

    #[test]
    fn deny_when_spec_returns_false() {
        let spec = parse_spec("spec budget { forall used :: Int, delta :: Int : (used + delta) <= 100 }").unwrap();
        let v = evaluate_gate(
            &[spec],
            &b([("used", Value::Int(80)), ("delta", Value::Int(30))]),
            "",
        );
        match v {
            GateVerdict::Deny { spec_name, reason } => {
                assert_eq!(spec_name, "budget");
                assert!(reason.contains("used=80"), "reason should include bindings: {reason}");
            }
            other => panic!("expected Deny, got {other:?}"),
        }
    }

    #[test]
    fn first_failing_spec_is_reported() {
        // Two specs, second one fails — verdict mentions the
        // second one specifically.
        let s1 = parse_spec("spec always { forall x :: Int : x == x }").unwrap();
        let s2 = parse_spec("spec never { forall x :: Int : x != x }").unwrap();
        let v = evaluate_gate(&[s1, s2], &b([("x", Value::Int(0))]), "");
        match v {
            GateVerdict::Deny { spec_name, .. } => assert_eq!(spec_name, "never"),
            other => panic!("expected Deny on `never`, got {other:?}"),
        }
    }

    #[test]
    fn missing_binding_is_inconclusive_not_panic() {
        // An action that omits a bound the spec needs — surface
        // as Inconclusive so the caller can fix the gate harness
        // rather than crash.
        let spec = parse_spec("spec needs_x { forall x :: Int : x > 0 }").unwrap();
        let v = evaluate_gate(&[spec], &b([]), "");
        match v {
            GateVerdict::Inconclusive { reason, .. } => {
                assert!(reason.contains("unbound spec var"),
                    "expected unbound-var error, got: {reason}");
            }
            other => panic!("expected Inconclusive, got {other:?}"),
        }
    }

    #[test]
    fn grid_budget_phase1_spec() {
        // Headline soft Phase 1 spec: site grid load (active +
        // scheduled + delta) must not exceed budget.
        let spec = parse_spec(r#"
            spec grid_budget {
              forall active :: Int, scheduled :: Int, delta :: Int, budget :: Int :
                (active + scheduled + delta) <= budget
            }
        "#).unwrap();
        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
            ("active", Value::Int(40)),
            ("scheduled", Value::Int(20)),
            ("delta", Value::Int(15)),
            ("budget", Value::Int(100)),
        ]), "");
        assert_eq!(allow, GateVerdict::Allow);
        let deny = evaluate_gate(&[spec], &b([
            ("active", Value::Int(40)),
            ("scheduled", Value::Int(20)),
            ("delta", Value::Int(60)),
            ("budget", Value::Int(100)),
        ]), "");
        assert!(matches!(deny, GateVerdict::Deny { .. }));
    }

    #[test]
    fn soc_reserve_phase1_spec() {
        // Second Phase 1 spec: vehicle projected SoC after
        // proposed action must not drop below reserve.
        let spec = parse_spec(r#"
            spec soc_reserve {
              forall soc :: Int, draw :: Int, reserve :: Int :
                (soc - draw) >= reserve
            }
        "#).unwrap();
        let allow = evaluate_gate(std::slice::from_ref(&spec), &b([
            ("soc", Value::Int(80)),
            ("draw", Value::Int(20)),
            ("reserve", Value::Int(40)),
        ]), "");
        assert_eq!(allow, GateVerdict::Allow);
        let deny = evaluate_gate(&[spec], &b([
            ("soc", Value::Int(50)),
            ("draw", Value::Int(20)),
            ("reserve", Value::Int(40)),
        ]), "");
        assert!(matches!(deny, GateVerdict::Deny { .. }));
    }

    #[test]
    fn gate_is_fast_enough_for_synchronous_use() {
        // Issue calls for single-digit ms per verdict on Phase 1's
        // small spec set. We measure 1k iterations and assert the
        // average is comfortably under that — the headroom matters
        // because CI runners are slower than local hardware.
        let s1 = parse_spec(r#"
            spec grid_budget {
              forall active :: Int, scheduled :: Int, delta :: Int, budget :: Int :
                (active + scheduled + delta) <= budget
            }
        "#).unwrap();
        let s2 = parse_spec(r#"
            spec soc_reserve {
              forall soc :: Int, draw :: Int, reserve :: Int :
                (soc - draw) >= reserve
            }
        "#).unwrap();
        let bindings = b([
            ("active", Value::Int(40)),
            ("scheduled", Value::Int(20)),
            ("delta", Value::Int(15)),
            ("budget", Value::Int(100)),
            ("soc", Value::Int(80)),
            ("draw", Value::Int(20)),
            ("reserve", Value::Int(40)),
        ]);
        let prog = parse_source("").unwrap();
        let stages = lex_ast::canonicalize_program(&prog);
        let bc = compile_program(&stages);

        let n = 1000;
        let start = std::time::Instant::now();
        for _ in 0..n {
            let v = evaluate_gate_compiled(&[s1.clone(), s2.clone()], &bindings, &bc);
            assert_eq!(v, GateVerdict::Allow);
        }
        let elapsed = start.elapsed();
        let per_call_us = elapsed.as_micros() / n as u128;
        assert!(per_call_us < 5_000,
            "per-gate verdict should be under 5ms; got {per_call_us}μs");
    }

    // ---- #199: optional tracer hook -----------------------------

    /// Minimal Tracer that records every enter_call name into a
    /// shared Vec. Avoids depending on lex-trace; soft-agent's
    /// real wiring uses `lex_trace::Recorder` + `Handle::clone`.
    struct CallRecorder {
        captured: std::sync::Arc<std::sync::Mutex<Vec<String>>>,
    }
    impl lex_bytecode::vm::Tracer for CallRecorder {
        fn enter_call(&mut self, _node_id: &str, name: &str, _args: &[Value]) {
            self.captured.lock().unwrap().push(name.to_string());
        }
        fn enter_effect(&mut self, _: &str, _: &str, _: &str, _: &[Value]) {}
        fn exit_ok(&mut self, _: &Value) {}
        fn exit_err(&mut self, _: &str) {}
        fn exit_call_tail(&mut self) {}
        fn override_effect(&mut self, _: &str) -> Option<Value> { None }
    }

    #[test]
    fn traced_gate_captures_nested_call_events() {
        // Spec body calls `under_budget`, which itself calls
        // `projected_load` and `budget_total`. Without the tracer
        // hook, only the top-level Lex call appears in any
        // recorder; with it, the nested helpers do too.
        let host_src = r#"
            fn projected_load(active :: Int, delta :: Int) -> Int {
              active + delta
            }
            fn budget_total(budget :: Int, headroom :: Int) -> Int {
              budget + headroom
            }
            fn under_budget(active :: Int, delta :: Int, budget :: Int, headroom :: Int) -> Bool {
              projected_load(active, delta) <= budget_total(budget, headroom)
            }
        "#;
        let prog = parse_source(host_src).unwrap();
        let stages = lex_ast::canonicalize_program(&prog);
        let bc = compile_program(&stages);

        let spec = parse_spec(r#"
            spec gated_budget {
              forall active :: Int, delta :: Int, budget :: Int, headroom :: Int :
                under_budget(active, delta, budget, headroom)
            }
        "#).unwrap();
        let bindings = b([
            ("active", Value::Int(40)),
            ("delta", Value::Int(15)),
            ("budget", Value::Int(60)),
            ("headroom", Value::Int(0)),
        ]);

        let captured = std::sync::Arc::new(std::sync::Mutex::new(Vec::<String>::new()));
        let captured_for_factory = std::sync::Arc::clone(&captured);
        let v = evaluate_gate_compiled_traced(
            std::slice::from_ref(&spec),
            &bindings,
            &bc,
            move || Box::new(CallRecorder {
                captured: std::sync::Arc::clone(&captured_for_factory),
            }),
        );
        assert_eq!(v, GateVerdict::Allow);

        let calls = captured.lock().unwrap();
        // The Vm fires `enter_call` for sub-calls executed inside
        // the entry function's body, not for the host-driven
        // `Vm::call("under_budget", ...)` itself — that's the
        // host's contract. The point of the tracer hook is that
        // these *nested* helpers are visible at all; pre-#199
        // they were entirely opaque to the gate's recorder.
        for expected in ["projected_load", "budget_total"] {
            assert!(calls.iter().any(|c| c == expected),
                "expected `{expected}` in captured calls; got {:?}", *calls);
        }
    }

    #[test]
    fn untraced_gate_path_unchanged() {
        // The existing `evaluate_gate_compiled` API stays unaffected
        // by #199: same signature, same behavior. Pin this so future
        // refactors of the inner factory threading don't quietly
        // shift the public contract.
        let spec = parse_spec("spec ok { forall x :: Int : x + 1 > x }").unwrap();
        let v = evaluate_gate_compiled(
            std::slice::from_ref(&spec),
            &b([("x", Value::Int(5))]),
            &compile_program(&lex_ast::canonicalize_program(&parse_source("").unwrap())),
        );
        assert_eq!(v, GateVerdict::Allow);
    }
}