Skip to main content

chio_guards/external/
mod.rs

1//! External guard adapter infrastructure.
2//!
3//! The building blocks in this module let you wrap a synchronous external
4//! API (cloud guardrails, threat intel feeds, ML classifiers) as an async
5//! Chio guard without leaking I/O concerns into the sync [`chio_kernel::Guard`]
6//! trait.
7//!
8//! The pieces are:
9//!
10//! * [`ExternalGuard`] -- the async trait a concrete external adapter
11//!   implements. It describes the one operation we actually want to make
12//!   resilient: `eval(ctx) -> Result<Verdict, _>`.
13//! * [`AsyncGuardAdapter`] -- composes a [`circuit_breaker::CircuitBreaker`],
14//!   [`token_bucket::TokenBucket`], [`cache::TtlCache`], and
15//!   [`retry_with_jitter`] around an [`ExternalGuard`].
16//! * [`CircuitOpenVerdict`] -- what the adapter returns when the breaker is
17//!   open. Default is [`CircuitOpenVerdict::Deny`] (fail-closed).
18//! * [`RateLimitedVerdict`] -- what the adapter returns when the rate
19//!   limiter rejects a call. Default is [`RateLimitedVerdict::Deny`]
20//!   (fail-closed, per the phase-13.1 acceptance criteria).
21//!
22//! # Example
23//!
24//! ```ignore
25//! use std::sync::Arc;
26//! use std::time::Duration;
27//! use async_trait::async_trait;
28//! use chio_guards::external::{
29//!     AsyncGuardAdapter, ExternalGuard, ExternalGuardError, GuardCallContext,
30//! };
31//! use chio_kernel::Verdict;
32//!
33//! struct HelloGuard;
34//!
35//! #[async_trait]
36//! impl ExternalGuard for HelloGuard {
37//!     fn name(&self) -> &str { "hello" }
38//!     fn cache_key(&self, ctx: &GuardCallContext) -> Option<String> {
39//!         Some(ctx.tool_name.clone())
40//!     }
41//!     async fn eval(&self, _ctx: &GuardCallContext) -> Result<Verdict, ExternalGuardError> {
42//!         Ok(Verdict::Allow)
43//!     }
44//! }
45//!
46//! let adapter = AsyncGuardAdapter::builder(Arc::new(HelloGuard))
47//!     .cache_ttl(Duration::from_secs(30))
48//!     .build();
49//! ```
50
51pub mod cache;
52pub mod circuit_breaker;
53pub mod retry;
54pub mod token_bucket;
55
56use std::num::NonZeroUsize;
57use std::sync::Arc;
58use std::time::Duration;
59
60use async_trait::async_trait;
61use chio_kernel::Verdict;
62use thiserror::Error;
63
64pub use cache::{Clock, TokioClock, TtlCache};
65pub use circuit_breaker::{CircuitBreaker, CircuitBreakerConfig, CircuitState};
66pub use retry::{retry_with_jitter, retry_with_jitter_rng, BackoffStrategy, RetryConfig};
67pub use token_bucket::TokenBucket;
68
69/// Subset of guard-request information passed to an [`ExternalGuard`].
70///
71/// This is intentionally a thin, owned structure: external guards typically
72/// need to cache or hash a small description of the request rather than the
73/// full kernel `GuardContext`. Concrete adapters can extend this by wrapping
74/// the adapter in a kernel-level [`chio_kernel::Guard`] that synthesizes a
75/// richer `GuardCallContext` from the actual `GuardContext`.
76#[derive(Debug, Clone, Default)]
77pub struct GuardCallContext {
78    /// Tool name being invoked.
79    pub tool_name: String,
80    /// Calling agent identifier.
81    pub agent_id: String,
82    /// Target server identifier.
83    pub server_id: String,
84    /// Tool arguments serialized as JSON. Kept as a `String` so the cache
85    /// key can hash it cheaply without committing to a fixed schema.
86    pub arguments_json: String,
87}
88
89/// Errors surfaced from an [`ExternalGuard`] call.
90#[derive(Debug, Error)]
91pub enum ExternalGuardError {
92    /// The downstream service timed out.
93    #[error("external guard timeout")]
94    Timeout,
95    /// The downstream service returned a retryable failure (5xx, connection
96    /// reset, etc.). Retryable errors are counted towards the circuit
97    /// breaker and may trigger a retry.
98    #[error("transient external error: {0}")]
99    Transient(String),
100    /// A permanent error that should not be retried (e.g. malformed request,
101    /// 4xx auth failure).
102    #[error("permanent external error: {0}")]
103    Permanent(String),
104}
105
106impl ExternalGuardError {
107    /// Returns true for errors that should count as a circuit-breaker
108    /// failure and be retried.
109    pub fn is_retryable(&self) -> bool {
110        matches!(self, Self::Timeout | Self::Transient(_))
111    }
112}
113
114/// Trait implemented by guards that call external services.
115///
116/// Keep implementations free of retry / caching / rate-limiting concerns
117/// -- those are handled by [`AsyncGuardAdapter`]. The `eval` method should
118/// describe a single attempt: one HTTP call (or equivalent), one decision.
119#[async_trait]
120pub trait ExternalGuard: Send + Sync {
121    /// Human-readable guard name (e.g. `"bedrock-guardrail"`).
122    fn name(&self) -> &str;
123
124    /// Return a cache key for this request, or `None` to skip caching.
125    fn cache_key(&self, ctx: &GuardCallContext) -> Option<String>;
126
127    /// Evaluate the request against the external service.
128    async fn eval(&self, ctx: &GuardCallContext) -> Result<Verdict, ExternalGuardError>;
129}
130
131/// Verdict returned when the circuit breaker is open.
132#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
133pub enum CircuitOpenVerdict {
134    /// Fail-closed: deny the request while the breaker is open (default).
135    #[default]
136    Deny,
137    /// Fail-open: allow the request while the breaker is open. Use only for
138    /// advisory guards where unavailability should not block traffic.
139    Allow,
140}
141
142impl CircuitOpenVerdict {
143    fn to_verdict(self) -> Verdict {
144        match self {
145            Self::Deny => Verdict::Deny,
146            Self::Allow => Verdict::Allow,
147        }
148    }
149}
150
151/// Verdict returned when the token bucket rejects a call.
152#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
153pub enum RateLimitedVerdict {
154    /// Fail-closed: deny the request when we exceed the guard's QPS budget
155    /// (default for phase 13.1).
156    #[default]
157    Deny,
158    /// Fail-open: allow the request when rate limited. Useful for advisory
159    /// guards where overloading the external service is acceptable.
160    Allow,
161}
162
163impl RateLimitedVerdict {
164    fn to_verdict(self) -> Verdict {
165        match self {
166            Self::Deny => Verdict::Deny,
167            Self::Allow => Verdict::Allow,
168        }
169    }
170}
171
172/// Configuration for [`AsyncGuardAdapter`]. Built via
173/// [`AsyncGuardAdapter::builder`].
174#[derive(Debug, Clone)]
175pub struct AsyncGuardAdapterConfig {
176    /// Circuit breaker tuning.
177    pub circuit: CircuitBreakerConfig,
178    /// Retry configuration applied inside the breaker.
179    pub retry: RetryConfig,
180    /// Maximum number of cached verdicts.
181    pub cache_capacity: NonZeroUsize,
182    /// TTL applied to every cached verdict.
183    pub cache_ttl: Duration,
184    /// Rate limit (calls per second).
185    pub rate_per_second: f64,
186    /// Burst capacity for the rate limiter.
187    pub rate_burst: u32,
188    /// Verdict returned when the breaker is open.
189    pub circuit_open_verdict: CircuitOpenVerdict,
190    /// Verdict returned when the rate limiter rejects a call.
191    pub rate_limited_verdict: RateLimitedVerdict,
192}
193
194impl Default for AsyncGuardAdapterConfig {
195    fn default() -> Self {
196        Self {
197            circuit: CircuitBreakerConfig::default(),
198            retry: RetryConfig::default(),
199            cache_capacity: NonZeroUsize::new(1024).unwrap_or(NonZeroUsize::MIN),
200            cache_ttl: Duration::from_secs(60),
201            rate_per_second: 20.0,
202            rate_burst: 20,
203            circuit_open_verdict: CircuitOpenVerdict::Deny,
204            rate_limited_verdict: RateLimitedVerdict::Deny,
205        }
206    }
207}
208
209/// Fluent builder for [`AsyncGuardAdapter`].
210pub struct AsyncGuardAdapterBuilder<E: ExternalGuard + ?Sized> {
211    inner: Arc<E>,
212    config: AsyncGuardAdapterConfig,
213    clock: Arc<dyn Clock>,
214}
215
216impl<E: ExternalGuard + ?Sized> AsyncGuardAdapterBuilder<E> {
217    /// Start from the given guard and default configuration.
218    pub fn new(inner: Arc<E>) -> Self {
219        Self {
220            inner,
221            config: AsyncGuardAdapterConfig::default(),
222            clock: Arc::new(TokioClock),
223        }
224    }
225
226    /// Override the circuit breaker configuration.
227    pub fn circuit(mut self, circuit: CircuitBreakerConfig) -> Self {
228        self.config.circuit = circuit;
229        self
230    }
231
232    /// Override retry configuration.
233    pub fn retry(mut self, retry: RetryConfig) -> Self {
234        self.config.retry = retry;
235        self
236    }
237
238    /// Set the cache capacity (non-zero).
239    pub fn cache_capacity(mut self, capacity: NonZeroUsize) -> Self {
240        self.config.cache_capacity = capacity;
241        self
242    }
243
244    /// Set the cache TTL.
245    pub fn cache_ttl(mut self, ttl: Duration) -> Self {
246        self.config.cache_ttl = ttl;
247        self
248    }
249
250    /// Set the rate limiter (calls per second + burst).
251    pub fn rate_limit(mut self, rate_per_second: f64, burst: u32) -> Self {
252        self.config.rate_per_second = rate_per_second;
253        self.config.rate_burst = burst;
254        self
255    }
256
257    /// Set the verdict returned while the breaker is open.
258    pub fn circuit_open_verdict(mut self, verdict: CircuitOpenVerdict) -> Self {
259        self.config.circuit_open_verdict = verdict;
260        self
261    }
262
263    /// Set the verdict returned when the rate limiter rejects a call.
264    pub fn rate_limited_verdict(mut self, verdict: RateLimitedVerdict) -> Self {
265        self.config.rate_limited_verdict = verdict;
266        self
267    }
268
269    /// Override the time source. Primarily for tests.
270    pub fn clock(mut self, clock: Arc<dyn Clock>) -> Self {
271        self.clock = clock;
272        self
273    }
274
275    /// Finalize the builder.
276    pub fn build(self) -> AsyncGuardAdapter<E> {
277        let cache = TtlCache::with_clock(self.config.cache_capacity, Arc::clone(&self.clock));
278        let circuit =
279            CircuitBreaker::with_clock(self.config.circuit.clone(), Arc::clone(&self.clock));
280        let bucket = TokenBucket::with_clock(
281            self.config.rate_per_second,
282            self.config.rate_burst,
283            Arc::clone(&self.clock),
284        );
285        AsyncGuardAdapter {
286            inner: self.inner,
287            config: self.config,
288            cache,
289            circuit,
290            bucket,
291        }
292    }
293}
294
295/// Adapter that composes circuit breaker + token bucket + TTL cache + retry
296/// on top of an [`ExternalGuard`].
297///
298/// Flow of a single `evaluate` call:
299///
300/// 1. Check the **circuit breaker**. If open, return
301///    [`CircuitOpenVerdict`] without calling the inner guard.
302/// 2. Check the **cache**. On hit, return the cached verdict without
303///    calling the inner guard.
304/// 3. Check the **token bucket**. If empty, return [`RateLimitedVerdict`].
305///    Rate-limited calls do *not* increment the circuit breaker.
306/// 4. Call the inner guard via [`retry_with_jitter`].
307/// 5. Record success/failure on the breaker. On success, cache the verdict.
308pub struct AsyncGuardAdapter<E: ExternalGuard + ?Sized> {
309    inner: Arc<E>,
310    config: AsyncGuardAdapterConfig,
311    cache: TtlCache<String, Verdict>,
312    circuit: CircuitBreaker,
313    bucket: TokenBucket,
314}
315
316impl<E: ExternalGuard + ?Sized> AsyncGuardAdapter<E> {
317    /// Start a builder with defaults.
318    pub fn builder(inner: Arc<E>) -> AsyncGuardAdapterBuilder<E> {
319        AsyncGuardAdapterBuilder::new(inner)
320    }
321
322    /// Name of the wrapped guard.
323    pub fn name(&self) -> &str {
324        self.inner.name()
325    }
326
327    /// Effective configuration.
328    pub fn config(&self) -> &AsyncGuardAdapterConfig {
329        &self.config
330    }
331
332    /// Inspect the circuit breaker state (primarily for tests and metrics).
333    pub fn circuit_state(&self) -> CircuitState {
334        self.circuit.current_state()
335    }
336
337    /// Evaluate the request end-to-end.
338    pub async fn evaluate(&self, ctx: &GuardCallContext) -> Verdict {
339        // 1. Circuit breaker check.
340        if !self.circuit.allow_call() {
341            return self.config.circuit_open_verdict.to_verdict();
342        }
343
344        // 2. Cache check. Done before rate limiting so that cached hits
345        //    don't count against the external QPS budget.
346        let cache_key = self.inner.cache_key(ctx);
347        if let Some(key) = cache_key.as_ref() {
348            if let Some(cached) = self.cache.get(key) {
349                return cached;
350            }
351        }
352
353        // 3. Rate limit.
354        if !self.bucket.try_acquire() {
355            return self.config.rate_limited_verdict.to_verdict();
356        }
357
358        // 4. Retry loop against the inner guard. A permanent error short-
359        //    circuits by being returned as an Ok(Err(_)) so the retry
360        //    loop doesn't keep calling a known-bad request.
361        let inner = Arc::clone(&self.inner);
362        let ctx_ref = ctx;
363        let loop_outcome: Result<Result<Verdict, ExternalGuardError>, ExternalGuardError> =
364            retry_with_jitter(&self.config.retry, move |_attempt| {
365                let inner = Arc::clone(&inner);
366                async move {
367                    match inner.eval(ctx_ref).await {
368                        Ok(v) => Ok(Ok(v)),
369                        Err(err) if err.is_retryable() => Err(err),
370                        Err(err) => Ok(Err(err)),
371                    }
372                }
373            })
374            .await;
375
376        let call_result: Result<Verdict, ExternalGuardError> = match loop_outcome {
377            Ok(inner) => inner,
378            Err(err) => Err(err),
379        };
380
381        match call_result {
382            Ok(verdict) => {
383                self.circuit.record_success();
384                if let Some(key) = cache_key {
385                    self.cache.insert(key, verdict, self.config.cache_ttl);
386                }
387                verdict
388            }
389            Err(err) => {
390                self.circuit.record_failure();
391                tracing::warn!(
392                    guard = self.inner.name(),
393                    error = %err,
394                    "external guard failed"
395                );
396                Verdict::Deny
397            }
398        }
399    }
400}