assay-core 3.0.0

High-performance evaluation framework for LLM agents (Core)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
//! Tool decision events and always-emit guard (SPEC-Mandate-v1.0.4 §7.9).
//!
//! This module implements the "always emit decision" invariant (I1):
//! Every tool call attempt MUST emit exactly one decision event.

use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::io::Write;
use std::sync::Arc;

/// Reason codes for tool decisions (SPEC-Mandate-v1.0.4 §7.10).
pub mod reason_codes {
    // Policy decisions (P_*)
    pub const P_POLICY_ALLOW: &str = "P_POLICY_ALLOW";
    pub const P_POLICY_DENY: &str = "P_POLICY_DENY";
    pub const P_TOOL_DENIED: &str = "P_TOOL_DENIED";
    pub const P_TOOL_NOT_ALLOWED: &str = "P_TOOL_NOT_ALLOWED";
    pub const P_ARG_SCHEMA: &str = "P_ARG_SCHEMA";
    pub const P_RATE_LIMIT: &str = "P_RATE_LIMIT";
    pub const P_TOOL_DRIFT: &str = "P_TOOL_DRIFT";
    pub const P_MANDATE_REQUIRED: &str = "P_MANDATE_REQUIRED";
    pub const P_MANDATE_VALID: &str = "P_MANDATE_VALID";

    // Mandate failures (M_*)
    pub const M_EXPIRED: &str = "M_EXPIRED";
    pub const M_NOT_YET_VALID: &str = "M_NOT_YET_VALID";
    pub const M_NONCE_REPLAY: &str = "M_NONCE_REPLAY";
    pub const M_ALREADY_USED: &str = "M_ALREADY_USED";
    pub const M_MAX_USES_EXCEEDED: &str = "M_MAX_USES_EXCEEDED";
    pub const M_TOOL_NOT_IN_SCOPE: &str = "M_TOOL_NOT_IN_SCOPE";
    pub const M_KIND_MISMATCH: &str = "M_KIND_MISMATCH";
    pub const M_AUDIENCE_MISMATCH: &str = "M_AUDIENCE_MISMATCH";
    pub const M_ISSUER_NOT_TRUSTED: &str = "M_ISSUER_NOT_TRUSTED";
    pub const M_TRANSACTION_REF_MISMATCH: &str = "M_TRANSACTION_REF_MISMATCH";
    pub const M_NOT_FOUND: &str = "M_NOT_FOUND";
    pub const M_REVOKED: &str = "M_REVOKED";

    // Store/system errors (S_*)
    pub const S_DB_ERROR: &str = "S_DB_ERROR";
    pub const S_INTERNAL_ERROR: &str = "S_INTERNAL_ERROR";

    // Timeout/execution errors (T_*)
    pub const T_TIMEOUT: &str = "T_TIMEOUT";
    pub const T_EXEC_ERROR: &str = "T_EXEC_ERROR";
}

/// Decision outcome for a tool call.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum Decision {
    Allow,
    Deny,
    Error,
}

/// A tool decision event (CloudEvents compliant).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DecisionEvent {
    /// CloudEvents specversion
    pub specversion: &'static str,
    /// Unique event ID
    pub id: String,
    /// Event type: assay.tool.decision
    #[serde(rename = "type")]
    pub event_type: &'static str,
    /// Event source (configured, not dynamic)
    pub source: String,
    /// Event timestamp (ISO 8601)
    pub time: String,
    /// Event data
    pub data: DecisionData,
}

/// Data payload for a decision event.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DecisionData {
    /// Tool name
    pub tool: String,
    /// Tool classes observed at decision time (sorted)
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub tool_classes: Vec<String>,
    /// Tool classes that matched the policy decision (sorted)
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub matched_tool_classes: Vec<String>,
    /// Match basis for policy evaluation: name, class, or name+class
    #[serde(skip_serializing_if = "Option::is_none")]
    pub match_basis: Option<String>,
    /// Rule or policy field that matched
    #[serde(skip_serializing_if = "Option::is_none")]
    pub matched_rule: Option<String>,
    /// Decision outcome
    pub decision: Decision,
    /// Machine-parseable reason code (MUST)
    pub reason_code: String,
    /// Human-readable reason (optional)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reason: Option<String>,
    /// Unique tool call identifier (MUST for idempotency)
    pub tool_call_id: String,
    /// Request ID from JSON-RPC
    #[serde(skip_serializing_if = "Option::is_none")]
    pub request_id: Option<Value>,
    /// Mandate ID if present
    #[serde(skip_serializing_if = "Option::is_none")]
    pub mandate_id: Option<String>,
    /// Use ID from consumption (if consumed)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub use_id: Option<String>,
    /// Use count at time of decision
    #[serde(skip_serializing_if = "Option::is_none")]
    pub use_count: Option<u32>,
    /// Whether tool matched mandate scope
    #[serde(skip_serializing_if = "Option::is_none")]
    pub mandate_scope_match: Option<bool>,
    /// Whether mandate kind allows operation class
    #[serde(skip_serializing_if = "Option::is_none")]
    pub mandate_kind_match: Option<bool>,
    /// Whether transaction_ref matched
    #[serde(skip_serializing_if = "Option::is_none")]
    pub transaction_ref_match: Option<bool>,
    /// Authorization latency in milliseconds
    #[serde(skip_serializing_if = "Option::is_none")]
    pub authz_latency_ms: Option<u64>,
    /// Store latency in milliseconds
    #[serde(skip_serializing_if = "Option::is_none")]
    pub store_latency_ms: Option<u64>,
}

impl DecisionEvent {
    /// Create a new decision event.
    pub fn new(source: String, tool_call_id: String, tool: String) -> Self {
        Self {
            specversion: "1.0",
            id: format!("evt_decision_{}", uuid::Uuid::new_v4()),
            event_type: "assay.tool.decision",
            source,
            time: chrono::Utc::now().to_rfc3339(),
            data: DecisionData {
                tool,
                tool_classes: Vec::new(),
                matched_tool_classes: Vec::new(),
                match_basis: None,
                matched_rule: None,
                decision: Decision::Error, // Default to error, will be set
                reason_code: reason_codes::S_INTERNAL_ERROR.to_string(),
                reason: Some("Decision not finalized (guard dropped without emit)".to_string()),
                tool_call_id,
                request_id: None,
                mandate_id: None,
                use_id: None,
                use_count: None,
                mandate_scope_match: None,
                mandate_kind_match: None,
                transaction_ref_match: None,
                authz_latency_ms: None,
                store_latency_ms: None,
            },
        }
    }

    /// Set allow decision.
    pub fn allow(mut self, reason_code: &str) -> Self {
        self.data.decision = Decision::Allow;
        self.data.reason_code = reason_code.to_string();
        self.data.reason = None;
        self
    }

    /// Set deny decision.
    pub fn deny(mut self, reason_code: &str, reason: Option<String>) -> Self {
        self.data.decision = Decision::Deny;
        self.data.reason_code = reason_code.to_string();
        self.data.reason = reason;
        self
    }

    /// Set error decision.
    pub fn error(mut self, reason_code: &str, reason: Option<String>) -> Self {
        self.data.decision = Decision::Error;
        self.data.reason_code = reason_code.to_string();
        self.data.reason = reason;
        self
    }

    /// Set request ID.
    pub fn with_request_id(mut self, id: Option<Value>) -> Self {
        self.data.request_id = id;
        self
    }

    /// Set mandate info.
    pub fn with_mandate(
        mut self,
        mandate_id: Option<String>,
        use_id: Option<String>,
        use_count: Option<u32>,
    ) -> Self {
        self.data.mandate_id = mandate_id;
        self.data.use_id = use_id;
        self.data.use_count = use_count;
        self
    }

    /// Set mandate match flags.
    pub fn with_mandate_matches(
        mut self,
        scope_match: Option<bool>,
        kind_match: Option<bool>,
        tx_ref_match: Option<bool>,
    ) -> Self {
        self.data.mandate_scope_match = scope_match;
        self.data.mandate_kind_match = kind_match;
        self.data.transaction_ref_match = tx_ref_match;
        self
    }

    /// Set latencies.
    pub fn with_latencies(mut self, authz_ms: Option<u64>, store_ms: Option<u64>) -> Self {
        self.data.authz_latency_ms = authz_ms;
        self.data.store_latency_ms = store_ms;
        self
    }

    /// Set tool match metadata.
    pub fn with_tool_match(
        mut self,
        tool_classes: Vec<String>,
        matched_tool_classes: Vec<String>,
        match_basis: Option<String>,
        matched_rule: Option<String>,
    ) -> Self {
        self.data.tool_classes = tool_classes;
        self.data.matched_tool_classes = matched_tool_classes;
        self.data.match_basis = match_basis;
        self.data.matched_rule = matched_rule;
        self
    }
}

/// Trait for emitting decision events.
pub trait DecisionEmitter: Send + Sync {
    /// Emit a decision event.
    fn emit(&self, event: &DecisionEvent);
}

/// File-based decision emitter (NDJSON).
pub struct FileDecisionEmitter {
    file: std::sync::Mutex<std::fs::File>,
}

impl FileDecisionEmitter {
    /// Create a new file emitter.
    pub fn new(path: &std::path::Path) -> std::io::Result<Self> {
        let file = std::fs::OpenOptions::new()
            .create(true)
            .append(true)
            .open(path)?;
        Ok(Self {
            file: std::sync::Mutex::new(file),
        })
    }
}

impl DecisionEmitter for FileDecisionEmitter {
    fn emit(&self, event: &DecisionEvent) {
        if let Ok(json) = serde_json::to_string(event) {
            if let Ok(mut f) = self.file.lock() {
                let _ = writeln!(f, "{}", json);
            }
        }
    }
}

/// Null emitter for testing.
pub struct NullDecisionEmitter;

impl DecisionEmitter for NullDecisionEmitter {
    fn emit(&self, _event: &DecisionEvent) {}
}

/// RAII guard that ensures a decision event is ALWAYS emitted.
///
/// This implements invariant I1: Every tool call attempt MUST emit exactly one
/// decision event, even on panics or early returns.
///
/// Usage:
/// ```ignore
/// let guard = DecisionEmitterGuard::new(emitter, source, tool_call_id, tool);
/// // ... do authorization work ...
/// guard.emit_allow("P_MANDATE_VALID"); // Consumes guard
/// // OR
/// guard.emit_deny("M_EXPIRED", Some("Mandate expired")); // Consumes guard
/// // If guard is dropped without explicit emit, emits error decision
/// ```
pub struct DecisionEmitterGuard {
    emitter: Arc<dyn DecisionEmitter>,
    event: Option<DecisionEvent>,
}

impl DecisionEmitterGuard {
    /// Create a new guard. The event will be emitted on drop if not explicitly emitted.
    pub fn new(
        emitter: Arc<dyn DecisionEmitter>,
        source: String,
        tool_call_id: String,
        tool: String,
    ) -> Self {
        Self {
            emitter,
            event: Some(DecisionEvent::new(source, tool_call_id, tool)),
        }
    }

    /// Set request ID for the event.
    pub fn set_request_id(&mut self, id: Option<Value>) {
        if let Some(ref mut event) = self.event {
            event.data.request_id = id;
        }
    }

    /// Set mandate info for the event.
    pub fn set_mandate_info(
        &mut self,
        mandate_id: Option<String>,
        use_id: Option<String>,
        use_count: Option<u32>,
    ) {
        if let Some(ref mut event) = self.event {
            event.data.mandate_id = mandate_id;
            event.data.use_id = use_id;
            event.data.use_count = use_count;
        }
    }

    /// Set mandate match flags.
    pub fn set_mandate_matches(
        &mut self,
        scope_match: Option<bool>,
        kind_match: Option<bool>,
        tx_ref_match: Option<bool>,
    ) {
        if let Some(ref mut event) = self.event {
            event.data.mandate_scope_match = scope_match;
            event.data.mandate_kind_match = kind_match;
            event.data.transaction_ref_match = tx_ref_match;
        }
    }

    /// Set latencies.
    pub fn set_latencies(&mut self, authz_ms: Option<u64>, store_ms: Option<u64>) {
        if let Some(ref mut event) = self.event {
            event.data.authz_latency_ms = authz_ms;
            event.data.store_latency_ms = store_ms;
        }
    }

    /// Set tool match metadata for the event.
    pub fn set_tool_match(
        &mut self,
        tool_classes: Vec<String>,
        matched_tool_classes: Vec<String>,
        match_basis: Option<String>,
        matched_rule: Option<String>,
    ) {
        if let Some(ref mut event) = self.event {
            event.data.tool_classes = tool_classes;
            event.data.matched_tool_classes = matched_tool_classes;
            event.data.match_basis = match_basis;
            event.data.matched_rule = matched_rule;
        }
    }

    /// Emit an allow decision and consume the guard.
    pub fn emit_allow(mut self, reason_code: &str) {
        if let Some(event) = self.event.take() {
            self.emitter.emit(&event.allow(reason_code));
        }
    }

    /// Emit a deny decision and consume the guard.
    pub fn emit_deny(mut self, reason_code: &str, reason: Option<String>) {
        if let Some(event) = self.event.take() {
            self.emitter.emit(&event.deny(reason_code, reason));
        }
    }

    /// Emit an error decision and consume the guard.
    pub fn emit_error(mut self, reason_code: &str, reason: Option<String>) {
        if let Some(event) = self.event.take() {
            self.emitter.emit(&event.error(reason_code, reason));
        }
    }

    /// Emit with a pre-built event (advanced use).
    pub fn emit_event(mut self, event: DecisionEvent) {
        self.event = None; // Clear so drop doesn't double-emit
        self.emitter.emit(&event);
    }
}

impl Drop for DecisionEmitterGuard {
    fn drop(&mut self) {
        // If event is still present, it means no explicit emit was called.
        // This is the safety net: always emit something.
        if let Some(event) = self.event.take() {
            // Emit error decision with "guard dropped" reason
            self.emitter.emit(&event.error(
                reason_codes::S_INTERNAL_ERROR,
                Some("Decision guard dropped without explicit emit (possible panic or early return)".to_string()),
            ));
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::sync::atomic::{AtomicUsize, Ordering};

    struct CountingEmitter {
        count: AtomicUsize,
        last_decision: std::sync::Mutex<Option<Decision>>,
        last_reason_code: std::sync::Mutex<Option<String>>,
    }

    impl CountingEmitter {
        fn new() -> Self {
            Self {
                count: AtomicUsize::new(0),
                last_decision: std::sync::Mutex::new(None),
                last_reason_code: std::sync::Mutex::new(None),
            }
        }
    }

    impl DecisionEmitter for CountingEmitter {
        fn emit(&self, event: &DecisionEvent) {
            self.count.fetch_add(1, Ordering::SeqCst);
            *self.last_decision.lock().unwrap() = Some(event.data.decision);
            *self.last_reason_code.lock().unwrap() = Some(event.data.reason_code.clone());
        }
    }

    #[test]
    fn test_guard_explicit_allow_emits_once() {
        let emitter = Arc::new(CountingEmitter::new());
        let guard = DecisionEmitterGuard::new(
            emitter.clone(),
            "assay://test".to_string(),
            "tc_001".to_string(),
            "test_tool".to_string(),
        );

        guard.emit_allow(reason_codes::P_MANDATE_VALID);

        assert_eq!(emitter.count.load(Ordering::SeqCst), 1);
        assert_eq!(
            *emitter.last_decision.lock().unwrap(),
            Some(Decision::Allow)
        );
    }

    #[test]
    fn test_guard_explicit_deny_emits_once() {
        let emitter = Arc::new(CountingEmitter::new());
        let guard = DecisionEmitterGuard::new(
            emitter.clone(),
            "assay://test".to_string(),
            "tc_002".to_string(),
            "test_tool".to_string(),
        );

        guard.emit_deny(reason_codes::M_EXPIRED, Some("Mandate expired".to_string()));

        assert_eq!(emitter.count.load(Ordering::SeqCst), 1);
        assert_eq!(*emitter.last_decision.lock().unwrap(), Some(Decision::Deny));
        assert_eq!(
            *emitter.last_reason_code.lock().unwrap(),
            Some(reason_codes::M_EXPIRED.to_string())
        );
    }

    #[test]
    fn test_guard_drop_emits_error() {
        let emitter = Arc::new(CountingEmitter::new());
        {
            let _guard = DecisionEmitterGuard::new(
                emitter.clone(),
                "assay://test".to_string(),
                "tc_003".to_string(),
                "test_tool".to_string(),
            );
            // Guard dropped without explicit emit
        }

        assert_eq!(emitter.count.load(Ordering::SeqCst), 1);
        assert_eq!(
            *emitter.last_decision.lock().unwrap(),
            Some(Decision::Error)
        );
        assert_eq!(
            *emitter.last_reason_code.lock().unwrap(),
            Some(reason_codes::S_INTERNAL_ERROR.to_string())
        );
    }

    #[test]
    fn test_guard_no_double_emit() {
        let emitter = Arc::new(CountingEmitter::new());
        {
            let guard = DecisionEmitterGuard::new(
                emitter.clone(),
                "assay://test".to_string(),
                "tc_004".to_string(),
                "test_tool".to_string(),
            );
            guard.emit_allow(reason_codes::P_POLICY_DENY);
            // Guard dropped after explicit emit
        }

        // Should only emit once
        assert_eq!(emitter.count.load(Ordering::SeqCst), 1);
    }

    #[test]
    fn test_event_serialization() {
        let event = DecisionEvent::new(
            "assay://test".to_string(),
            "tc_005".to_string(),
            "test_tool".to_string(),
        )
        .allow(reason_codes::P_MANDATE_VALID)
        .with_mandate(
            Some("sha256:abc".to_string()),
            Some("sha256:use".to_string()),
            Some(1),
        )
        .with_mandate_matches(Some(true), Some(true), Some(true));

        let json = serde_json::to_string(&event).unwrap();
        assert!(json.contains("assay.tool.decision"));
        assert!(json.contains("tc_005"));
        assert!(json.contains("allow"));
    }

    #[test]
    fn test_reason_codes_are_string_constants() {
        // Ensure reason codes are stable strings
        assert_eq!(reason_codes::P_POLICY_ALLOW, "P_POLICY_ALLOW");
        assert_eq!(reason_codes::P_POLICY_DENY, "P_POLICY_DENY");
        assert_eq!(reason_codes::M_EXPIRED, "M_EXPIRED");
        assert_eq!(reason_codes::S_DB_ERROR, "S_DB_ERROR");
        assert_eq!(reason_codes::T_TIMEOUT, "T_TIMEOUT");
    }
}