Skip to main content

chio_kernel/
execution_nonce.rs

1//! Phase 1.1: Execution Nonces (TOCTOU fix).
2//!
3//! An `ExecutionNonce` is a short-lived, single-use token that the kernel
4//! attaches to every `Verdict::Allow` response. Tool servers MUST present
5//! the nonce before executing; the kernel rejects stale (>`nonce_ttl_secs`,
6//! default 30s) or replayed nonces. This closes the time-of-check /
7//! time-of-use window between `evaluate()` and tool-server execution that
8//! DPoP alone cannot close.
9//!
10//! # Design
11//!
12//! * The nonce body is an opaque `nonce_id` plus a `NonceBinding` that
13//!   binds the nonce to the exact `(subject, capability, server, tool,
14//!   parameter_hash)` tuple. Substituting a nonce between unrelated tool
15//!   calls therefore fails the binding check.
16//! * The kernel signs the full body (nonce id + binding + expires_at)
17//!   with its receipt-signing key, so downstream tool servers can
18//!   cryptographically verify authenticity without a round trip.
19//! * Replay is prevented by an `ExecutionNonceStore`: the first
20//!   `reserve(nonce_id)` returns true and consumes the nonce; any
21//!   subsequent reservation returns false and the verify path rejects.
22//!
23//! # Backward compatibility
24//!
25//! The whole feature is opt-in. When `ExecutionNonceConfig::require_nonce`
26//! is `false` (the default), no nonce is minted and the verify path is a
27//! no-op. Existing non-nonce deployments keep working; new tool servers
28//! opt in by flipping `require_nonce` on the kernel's config.
29
30use std::num::NonZeroUsize;
31use std::sync::Mutex;
32use std::time::{Duration, Instant};
33
34use chio_core::canonical::canonical_json_bytes;
35use chio_core::crypto::{Keypair, PublicKey, Signature};
36use lru::LruCache;
37use serde::{Deserialize, Serialize};
38use tracing::{error, warn};
39use uuid::Uuid;
40
41use crate::KernelError;
42
43/// Schema identifier for Chio execution nonces.
44pub const EXECUTION_NONCE_SCHEMA: &str = "chio.execution_nonce.v1";
45
46/// Default TTL for a freshly minted execution nonce.
47pub const DEFAULT_EXECUTION_NONCE_TTL_SECS: u64 = 30;
48
49/// Default capacity for the in-memory replay-prevention LRU cache.
50pub const DEFAULT_EXECUTION_NONCE_STORE_CAPACITY: usize = 16_384;
51
52#[must_use]
53pub fn is_supported_execution_nonce_schema(schema: &str) -> bool {
54    schema == EXECUTION_NONCE_SCHEMA
55}
56
57// ---------------------------------------------------------------------------
58// NonceBinding
59// ---------------------------------------------------------------------------
60
61/// Fields that tie a nonce to one specific tool invocation.
62///
63/// All five fields are in the signed body, so any mismatch during verify
64/// means either the nonce was minted for a different call or the nonce was
65/// tampered with after issuance.
66#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
67pub struct NonceBinding {
68    /// Hex-encoded subject (agent) public key, taken from `capability.subject`.
69    pub subject_id: String,
70    /// ID of the capability that authorized this invocation.
71    pub capability_id: String,
72    /// Tool server that is expected to execute the call.
73    pub tool_server: String,
74    /// Tool name that is expected to execute.
75    pub tool_name: String,
76    /// SHA-256 hex of the canonical JSON of the evaluated arguments. Taken
77    /// directly from the `ToolCallAction::parameter_hash` that the kernel
78    /// embedded in the allow receipt.
79    pub parameter_hash: String,
80}
81
82// ---------------------------------------------------------------------------
83// ExecutionNonce (signable body)
84// ---------------------------------------------------------------------------
85
86/// The signable body of an execution nonce.
87///
88/// This is the canonical-JSON-serialized message the kernel signs. Every
89/// field is covered by the signature; none are mutable after issuance.
90#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
91pub struct ExecutionNonce {
92    /// Schema identifier. Must equal `EXECUTION_NONCE_SCHEMA`.
93    pub schema: String,
94    /// Unique nonce identifier (UUIDv7 hex).
95    pub nonce_id: String,
96    /// Unix timestamp (seconds) when the kernel issued this nonce.
97    pub issued_at: i64,
98    /// Unix timestamp (seconds) when this nonce expires.
99    /// Default: `issued_at + 30`. Configurable via `ExecutionNonceConfig`.
100    pub expires_at: i64,
101    /// Invocation binding: subject, capability, server, tool, parameter hash.
102    pub bound_to: NonceBinding,
103}
104
105// ---------------------------------------------------------------------------
106// SignedExecutionNonce
107// ---------------------------------------------------------------------------
108
109/// A kernel-signed execution nonce ready for transmission on an allow verdict.
110#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
111pub struct SignedExecutionNonce {
112    /// The nonce body that was signed.
113    pub nonce: ExecutionNonce,
114    /// Ed25519 signature over `canonical_json_bytes(&nonce)` produced by the
115    /// kernel's receipt-signing key.
116    pub signature: Signature,
117}
118
119impl SignedExecutionNonce {
120    /// Convenience accessor for the nonce identifier.
121    #[must_use]
122    pub fn nonce_id(&self) -> &str {
123        &self.nonce.nonce_id
124    }
125
126    /// Convenience accessor for the expiry.
127    #[must_use]
128    pub fn expires_at(&self) -> i64 {
129        self.nonce.expires_at
130    }
131}
132
133// ---------------------------------------------------------------------------
134// ExecutionNonceConfig
135// ---------------------------------------------------------------------------
136
137/// Configuration for execution nonce issuance and verification.
138#[derive(Debug, Clone)]
139pub struct ExecutionNonceConfig {
140    /// How many seconds a nonce is valid after issuance. Default: 30.
141    pub nonce_ttl_secs: u64,
142    /// Maximum entries in the replay-prevention LRU cache. Default: 16_384.
143    pub nonce_store_capacity: usize,
144    /// When `true`, the kernel's strict-mode verify paths reject any call
145    /// that does not present a signed nonce. Default: `false` (opt-in).
146    pub require_nonce: bool,
147}
148
149impl Default for ExecutionNonceConfig {
150    fn default() -> Self {
151        Self {
152            nonce_ttl_secs: DEFAULT_EXECUTION_NONCE_TTL_SECS,
153            nonce_store_capacity: DEFAULT_EXECUTION_NONCE_STORE_CAPACITY,
154            require_nonce: false,
155        }
156    }
157}
158
159// ---------------------------------------------------------------------------
160// ExecutionNonceStore trait
161// ---------------------------------------------------------------------------
162
163/// Persistence boundary for replay-prevention of execution nonces.
164///
165/// Implementations MUST ensure that `reserve(nonce_id)` returns `true`
166/// exactly once per nonce identifier. All subsequent calls for the same
167/// identifier return `false`. Fail-closed: any internal error is returned
168/// via `KernelError` so the caller can deny the request.
169pub trait ExecutionNonceStore: Send + Sync {
170    /// Attempt to reserve (consume) the given nonce identifier.
171    ///
172    /// * `Ok(true)`  -- nonce was fresh; it is now marked consumed.
173    /// * `Ok(false)` -- nonce has already been consumed (replay detected).
174    /// * `Err(_)`    -- the store is unreachable or corrupted; fail-closed.
175    ///
176    /// Prefer [`Self::reserve_until`] when the caller knows the signed
177    /// expiry of the nonce: durable stores need to retain the consumed
178    /// marker at least as long as the signed nonce is valid, otherwise
179    /// the row may be pruned and the nonce can be replayed within its
180    /// remaining validity window.
181    fn reserve(&self, nonce_id: &str) -> Result<bool, KernelError>;
182
183    /// Reserve a nonce while telling the store when the nonce stops
184    /// being cryptographically valid. Durable implementations (SQLite,
185    /// remote KV stores) MUST retain the consumed marker until at least
186    /// `nonce_expires_at` so replay protection covers the nonce's full
187    /// validity window.
188    ///
189    /// The default implementation falls back to [`Self::reserve`] for
190    /// in-memory / best-effort stores that already track retention
191    /// internally. `nonce_expires_at` is wall-clock unix seconds.
192    fn reserve_until(&self, nonce_id: &str, _nonce_expires_at: i64) -> Result<bool, KernelError> {
193        self.reserve(nonce_id)
194    }
195}
196
197// ---------------------------------------------------------------------------
198// InMemoryExecutionNonceStore
199// ---------------------------------------------------------------------------
200
201/// In-memory LRU-backed execution nonce store.
202///
203/// Mirrors the shape of `dpop::DpopNonceStore` but keys on the nonce_id
204/// alone because the full binding lives inside the signed body and is
205/// checked separately by `verify_execution_nonce`.
206pub struct InMemoryExecutionNonceStore {
207    inner: Mutex<LruCache<String, Instant>>,
208    ttl: Duration,
209}
210
211impl InMemoryExecutionNonceStore {
212    /// Create a new in-memory store.
213    ///
214    /// `capacity` is the maximum number of recently consumed nonces to
215    /// remember. `ttl` is how long a nonce entry is retained. After `ttl`
216    /// elapses the slot can be recycled (which matters only for long-lived
217    /// kernels -- the signed body's `expires_at` still prevents actual
218    /// replay because verify will have already rejected on expiry).
219    #[must_use]
220    pub fn new(capacity: usize, ttl: Duration) -> Self {
221        let nz = NonZeroUsize::new(capacity).unwrap_or_else(|| {
222            NonZeroUsize::new(DEFAULT_EXECUTION_NONCE_STORE_CAPACITY).unwrap_or(NonZeroUsize::MIN)
223        });
224        Self {
225            inner: Mutex::new(LruCache::new(nz)),
226            ttl,
227        }
228    }
229
230    /// Build a store with the TTL and capacity from `config`.
231    #[must_use]
232    pub fn from_config(config: &ExecutionNonceConfig) -> Self {
233        Self::new(
234            config.nonce_store_capacity,
235            Duration::from_secs(config.nonce_ttl_secs),
236        )
237    }
238}
239
240impl Default for InMemoryExecutionNonceStore {
241    fn default() -> Self {
242        Self::new(
243            DEFAULT_EXECUTION_NONCE_STORE_CAPACITY,
244            Duration::from_secs(DEFAULT_EXECUTION_NONCE_TTL_SECS),
245        )
246    }
247}
248
249impl ExecutionNonceStore for InMemoryExecutionNonceStore {
250    fn reserve(&self, nonce_id: &str) -> Result<bool, KernelError> {
251        let mut cache = self.inner.lock().map_err(|_| {
252            error!("execution nonce store mutex poisoned; denying fail-closed");
253            KernelError::Internal("execution nonce store mutex poisoned; fail-closed".to_string())
254        })?;
255
256        let key = nonce_id.to_string();
257        if let Some(consumed_at) = cache.peek(&key) {
258            if consumed_at.elapsed() < self.ttl {
259                return Ok(false);
260            }
261            cache.pop(&key);
262        }
263        cache.put(key, Instant::now());
264        Ok(true)
265    }
266}
267
268// ---------------------------------------------------------------------------
269// Minting
270// ---------------------------------------------------------------------------
271
272/// Mint a fresh signed execution nonce.
273///
274/// The kernel calls this on every `Verdict::Allow` so tool servers can
275/// verify that a call was authorized by the kernel at a known, recent
276/// time. The returned nonce is signed by `kernel_keypair`; downstream
277/// verifiers check the signature with the kernel's public key.
278pub fn mint_execution_nonce(
279    kernel_keypair: &Keypair,
280    binding: NonceBinding,
281    config: &ExecutionNonceConfig,
282    now: i64,
283) -> Result<SignedExecutionNonce, KernelError> {
284    let ttl = i64::try_from(config.nonce_ttl_secs).unwrap_or(i64::MAX);
285    let expires_at = now.saturating_add(ttl);
286    let nonce = ExecutionNonce {
287        schema: EXECUTION_NONCE_SCHEMA.to_string(),
288        nonce_id: Uuid::now_v7().as_hyphenated().to_string(),
289        issued_at: now,
290        expires_at,
291        bound_to: binding,
292    };
293    let (signature, _bytes) = kernel_keypair.sign_canonical(&nonce).map_err(|e| {
294        KernelError::ReceiptSigningFailed(format!("failed to sign execution nonce: {e}"))
295    })?;
296    Ok(SignedExecutionNonce { nonce, signature })
297}
298
299// ---------------------------------------------------------------------------
300// Verification
301// ---------------------------------------------------------------------------
302
303/// All the reasons an execution nonce can fail verification.
304///
305/// Every variant is a hard deny on the kernel side. The nonce flow is
306/// fail-closed: schema, expiry, binding, signature, and replay checks all
307/// execute on every presented nonce and any failure short-circuits.
308#[derive(Debug, Clone, PartialEq, Eq)]
309pub enum ExecutionNonceError {
310    /// Schema did not equal `EXECUTION_NONCE_SCHEMA`.
311    BadSchema { got: String },
312    /// Nonce has expired (now >= expires_at).
313    Expired { now: i64, expires_at: i64 },
314    /// Binding fields did not match the presented invocation.
315    BindingMismatch { field: &'static str },
316    /// Ed25519 signature did not verify under the kernel's public key.
317    InvalidSignature,
318    /// Nonce was already consumed (single-use).
319    Replayed,
320    /// Canonical JSON serialization failed during verification.
321    Encoding(String),
322    /// Replay store was unreachable; fail-closed.
323    Store(String),
324}
325
326impl std::fmt::Display for ExecutionNonceError {
327    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
328        match self {
329            Self::BadSchema { got } => write!(
330                f,
331                "execution nonce has unsupported schema: expected {EXECUTION_NONCE_SCHEMA}, got {got}"
332            ),
333            Self::Expired { now, expires_at } => write!(
334                f,
335                "execution nonce expired (now={now}, expires_at={expires_at})"
336            ),
337            Self::BindingMismatch { field } => {
338                write!(f, "execution nonce binding mismatch on field {field}")
339            }
340            Self::InvalidSignature => write!(f, "execution nonce signature is invalid"),
341            Self::Replayed => write!(f, "execution nonce has already been consumed"),
342            Self::Encoding(e) => write!(f, "execution nonce canonical encoding failed: {e}"),
343            Self::Store(e) => write!(f, "execution nonce store error: {e}"),
344        }
345    }
346}
347
348impl std::error::Error for ExecutionNonceError {}
349
350impl From<ExecutionNonceError> for KernelError {
351    fn from(err: ExecutionNonceError) -> Self {
352        KernelError::Internal(format!("execution nonce verification failed: {err}"))
353    }
354}
355
356/// Verify a signed execution nonce against the expected binding.
357///
358/// Steps, in order:
359/// 1. Schema check.
360/// 2. Expiry check -- `now < nonce.expires_at`.
361/// 3. Binding check -- subject, capability, server, tool, parameter_hash.
362/// 4. Signature check -- canonical JSON under the kernel's pubkey.
363/// 5. Replay check -- `nonce_store.reserve(nonce_id)` must return `true`.
364pub fn verify_execution_nonce(
365    presented: &SignedExecutionNonce,
366    kernel_pubkey: &PublicKey,
367    expected: &NonceBinding,
368    now: i64,
369    nonce_store: &dyn ExecutionNonceStore,
370) -> Result<(), ExecutionNonceError> {
371    if !is_supported_execution_nonce_schema(&presented.nonce.schema) {
372        warn!(
373            schema = %presented.nonce.schema,
374            "rejecting execution nonce with unsupported schema"
375        );
376        return Err(ExecutionNonceError::BadSchema {
377            got: presented.nonce.schema.clone(),
378        });
379    }
380
381    if now >= presented.nonce.expires_at {
382        warn!(
383            nonce_id = %presented.nonce.nonce_id,
384            now,
385            expires_at = presented.nonce.expires_at,
386            "rejecting stale execution nonce"
387        );
388        return Err(ExecutionNonceError::Expired {
389            now,
390            expires_at: presented.nonce.expires_at,
391        });
392    }
393
394    let bound = &presented.nonce.bound_to;
395    if bound.subject_id != expected.subject_id {
396        return Err(ExecutionNonceError::BindingMismatch {
397            field: "subject_id",
398        });
399    }
400    if bound.capability_id != expected.capability_id {
401        return Err(ExecutionNonceError::BindingMismatch {
402            field: "capability_id",
403        });
404    }
405    if bound.tool_server != expected.tool_server {
406        return Err(ExecutionNonceError::BindingMismatch {
407            field: "tool_server",
408        });
409    }
410    if bound.tool_name != expected.tool_name {
411        return Err(ExecutionNonceError::BindingMismatch { field: "tool_name" });
412    }
413    if bound.parameter_hash != expected.parameter_hash {
414        return Err(ExecutionNonceError::BindingMismatch {
415            field: "parameter_hash",
416        });
417    }
418
419    let signed_bytes = canonical_json_bytes(&presented.nonce)
420        .map_err(|e| ExecutionNonceError::Encoding(e.to_string()))?;
421    if !kernel_pubkey.verify(&signed_bytes, &presented.signature) {
422        warn!(
423            nonce_id = %presented.nonce.nonce_id,
424            "execution nonce signature verification failed"
425        );
426        return Err(ExecutionNonceError::InvalidSignature);
427    }
428
429    // Pass the nonce's signed expiry so durable stores retain the
430    // consumed marker for the full validity window - otherwise the row
431    // can be pruned while the nonce is still cryptographically valid,
432    // allowing replay within the remaining window.
433    match nonce_store.reserve_until(&presented.nonce.nonce_id, presented.nonce.expires_at) {
434        Ok(true) => Ok(()),
435        Ok(false) => {
436            warn!(
437                nonce_id = %presented.nonce.nonce_id,
438                "rejecting replayed execution nonce"
439            );
440            Err(ExecutionNonceError::Replayed)
441        }
442        Err(e) => Err(ExecutionNonceError::Store(e.to_string())),
443    }
444}
445
446#[cfg(test)]
447#[allow(clippy::expect_used, clippy::unwrap_used)]
448mod tests {
449    use super::*;
450    use std::thread;
451
452    fn sample_binding() -> NonceBinding {
453        NonceBinding {
454            subject_id: "subject-abc".to_string(),
455            capability_id: "cap-123".to_string(),
456            tool_server: "fs".to_string(),
457            tool_name: "read_file".to_string(),
458            parameter_hash: "0000000000000000000000000000000000000000000000000000000000000000"
459                .to_string(),
460        }
461    }
462
463    #[test]
464    fn mint_then_verify_roundtrip() {
465        let kp = Keypair::generate();
466        let store = InMemoryExecutionNonceStore::default();
467        let cfg = ExecutionNonceConfig::default();
468        let binding = sample_binding();
469        let now = 1_000_000;
470
471        let signed = mint_execution_nonce(&kp, binding.clone(), &cfg, now).unwrap();
472        assert_eq!(signed.nonce.schema, EXECUTION_NONCE_SCHEMA);
473        assert_eq!(signed.nonce.expires_at, now + cfg.nonce_ttl_secs as i64);
474
475        verify_execution_nonce(&signed, &kp.public_key(), &binding, now + 1, &store).unwrap();
476    }
477
478    #[test]
479    fn stale_nonce_is_rejected() {
480        let kp = Keypair::generate();
481        let store = InMemoryExecutionNonceStore::default();
482        let cfg = ExecutionNonceConfig::default();
483        let binding = sample_binding();
484
485        let now = 1_000_000;
486        let signed = mint_execution_nonce(&kp, binding.clone(), &cfg, now).unwrap();
487        let err = verify_execution_nonce(
488            &signed,
489            &kp.public_key(),
490            &binding,
491            now + cfg.nonce_ttl_secs as i64 + 1,
492            &store,
493        )
494        .unwrap_err();
495        assert!(matches!(err, ExecutionNonceError::Expired { .. }));
496    }
497
498    #[test]
499    fn replayed_nonce_is_rejected() {
500        let kp = Keypair::generate();
501        let store = InMemoryExecutionNonceStore::default();
502        let cfg = ExecutionNonceConfig::default();
503        let binding = sample_binding();
504        let now = 1_000_000;
505
506        let signed = mint_execution_nonce(&kp, binding.clone(), &cfg, now).unwrap();
507        verify_execution_nonce(&signed, &kp.public_key(), &binding, now + 1, &store).unwrap();
508        let err = verify_execution_nonce(&signed, &kp.public_key(), &binding, now + 2, &store)
509            .unwrap_err();
510        assert!(matches!(err, ExecutionNonceError::Replayed));
511    }
512
513    #[test]
514    fn mismatched_binding_is_rejected() {
515        let kp = Keypair::generate();
516        let store = InMemoryExecutionNonceStore::default();
517        let cfg = ExecutionNonceConfig::default();
518        let minted_binding = sample_binding();
519        let now = 1_000_000;
520
521        let signed = mint_execution_nonce(&kp, minted_binding.clone(), &cfg, now).unwrap();
522        let mut wrong = minted_binding;
523        wrong.tool_name = "write_file".to_string();
524
525        let err =
526            verify_execution_nonce(&signed, &kp.public_key(), &wrong, now + 1, &store).unwrap_err();
527        assert!(matches!(
528            err,
529            ExecutionNonceError::BindingMismatch { field: "tool_name" }
530        ));
531    }
532
533    #[test]
534    fn tampered_signature_is_rejected() {
535        let kp = Keypair::generate();
536        let store = InMemoryExecutionNonceStore::default();
537        let cfg = ExecutionNonceConfig::default();
538        let binding = sample_binding();
539        let now = 1_000_000;
540
541        let mut signed = mint_execution_nonce(&kp, binding.clone(), &cfg, now).unwrap();
542        // Mutate a signed field without re-signing: signature must no longer verify.
543        signed.nonce.bound_to.tool_name = "write_file".to_string();
544        // Revert the binding mismatch check by also mutating the presented binding.
545        let mut expected = binding;
546        expected.tool_name = "write_file".to_string();
547
548        let err = verify_execution_nonce(&signed, &kp.public_key(), &expected, now + 1, &store)
549            .unwrap_err();
550        assert!(matches!(err, ExecutionNonceError::InvalidSignature));
551    }
552
553    #[test]
554    fn store_reserves_each_nonce_exactly_once() {
555        let store = InMemoryExecutionNonceStore::default();
556        assert!(store.reserve("a").unwrap());
557        assert!(!store.reserve("a").unwrap());
558        assert!(store.reserve("b").unwrap());
559    }
560
561    #[test]
562    fn store_does_not_stall_between_threads() {
563        let store = std::sync::Arc::new(InMemoryExecutionNonceStore::default());
564        let mut handles = Vec::new();
565        for i in 0..4 {
566            let store = std::sync::Arc::clone(&store);
567            handles.push(thread::spawn(move || {
568                let id = format!("t-{i}");
569                store.reserve(&id).unwrap()
570            }));
571        }
572        for h in handles {
573            assert!(h.join().unwrap());
574        }
575    }
576}