chio_guards/external/mod.rs
1//! External guard adapter infrastructure.
2//!
3//! The building blocks in this module let you wrap a synchronous external
4//! API (cloud guardrails, threat intel feeds, ML classifiers) as an async
5//! Chio guard without leaking I/O concerns into the sync [`chio_kernel::Guard`]
6//! trait.
7//!
8//! The pieces are:
9//!
10//! * [`ExternalGuard`] -- the async trait a concrete external adapter
11//! implements. It describes the one operation we actually want to make
12//! resilient: `eval(ctx) -> Result<Verdict, _>`.
13//! * [`AsyncGuardAdapter`] -- composes a [`circuit_breaker::CircuitBreaker`],
14//! [`token_bucket::TokenBucket`], [`cache::TtlCache`], and
15//! [`retry_with_jitter`] around an [`ExternalGuard`].
16//! * [`CircuitOpenVerdict`] -- what the adapter returns when the breaker is
17//! open. Default is [`CircuitOpenVerdict::Deny`] (fail-closed).
18//! * [`RateLimitedVerdict`] -- what the adapter returns when the rate
19//! limiter rejects a call. Default is [`RateLimitedVerdict::Deny`]
20//! (fail-closed, per the phase-13.1 acceptance criteria).
21//!
22//! # Example
23//!
24//! ```ignore
25//! use std::sync::Arc;
26//! use std::time::Duration;
27//! use async_trait::async_trait;
28//! use chio_guards::external::{
29//! AsyncGuardAdapter, ExternalGuard, ExternalGuardError, GuardCallContext,
30//! };
31//! use chio_kernel::Verdict;
32//!
33//! struct HelloGuard;
34//!
35//! #[async_trait]
36//! impl ExternalGuard for HelloGuard {
37//! fn name(&self) -> &str { "hello" }
38//! fn cache_key(&self, ctx: &GuardCallContext) -> Option<String> {
39//! Some(ctx.tool_name.clone())
40//! }
41//! async fn eval(&self, _ctx: &GuardCallContext) -> Result<Verdict, ExternalGuardError> {
42//! Ok(Verdict::Allow)
43//! }
44//! }
45//!
46//! let adapter = AsyncGuardAdapter::builder(Arc::new(HelloGuard))
47//! .cache_ttl(Duration::from_secs(30))
48//! .build();
49//! ```
50
51pub mod cache;
52pub mod circuit_breaker;
53pub mod retry;
54pub mod token_bucket;
55
56use std::num::NonZeroUsize;
57use std::sync::Arc;
58use std::time::Duration;
59
60use async_trait::async_trait;
61use chio_kernel::Verdict;
62use thiserror::Error;
63
64pub use cache::{Clock, TokioClock, TtlCache};
65pub use circuit_breaker::{CircuitBreaker, CircuitBreakerConfig, CircuitState};
66pub use retry::{retry_with_jitter, retry_with_jitter_rng, BackoffStrategy, RetryConfig};
67pub use token_bucket::TokenBucket;
68
69/// Subset of guard-request information passed to an [`ExternalGuard`].
70///
71/// This is intentionally a thin, owned structure: external guards typically
72/// need to cache or hash a small description of the request rather than the
73/// full kernel `GuardContext`. Concrete adapters can extend this by wrapping
74/// the adapter in a kernel-level [`chio_kernel::Guard`] that synthesizes a
75/// richer `GuardCallContext` from the actual `GuardContext`.
76#[derive(Debug, Clone, Default)]
77pub struct GuardCallContext {
78 /// Tool name being invoked.
79 pub tool_name: String,
80 /// Calling agent identifier.
81 pub agent_id: String,
82 /// Target server identifier.
83 pub server_id: String,
84 /// Tool arguments serialized as JSON. Kept as a `String` so the cache
85 /// key can hash it cheaply without committing to a fixed schema.
86 pub arguments_json: String,
87}
88
89/// Errors surfaced from an [`ExternalGuard`] call.
90#[derive(Debug, Error)]
91pub enum ExternalGuardError {
92 /// The downstream service timed out.
93 #[error("external guard timeout")]
94 Timeout,
95 /// The downstream service returned a retryable failure (5xx, connection
96 /// reset, etc.). Retryable errors are counted towards the circuit
97 /// breaker and may trigger a retry.
98 #[error("transient external error: {0}")]
99 Transient(String),
100 /// A permanent error that should not be retried (e.g. malformed request,
101 /// 4xx auth failure).
102 #[error("permanent external error: {0}")]
103 Permanent(String),
104}
105
106impl ExternalGuardError {
107 /// Returns true for errors that should count as a circuit-breaker
108 /// failure and be retried.
109 pub fn is_retryable(&self) -> bool {
110 matches!(self, Self::Timeout | Self::Transient(_))
111 }
112}
113
114/// Trait implemented by guards that call external services.
115///
116/// Keep implementations free of retry / caching / rate-limiting concerns
117/// -- those are handled by [`AsyncGuardAdapter`]. The `eval` method should
118/// describe a single attempt: one HTTP call (or equivalent), one decision.
119#[async_trait]
120pub trait ExternalGuard: Send + Sync {
121 /// Human-readable guard name (e.g. `"bedrock-guardrail"`).
122 fn name(&self) -> &str;
123
124 /// Return a cache key for this request, or `None` to skip caching.
125 fn cache_key(&self, ctx: &GuardCallContext) -> Option<String>;
126
127 /// Evaluate the request against the external service.
128 async fn eval(&self, ctx: &GuardCallContext) -> Result<Verdict, ExternalGuardError>;
129}
130
131/// Verdict returned when the circuit breaker is open.
132#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
133pub enum CircuitOpenVerdict {
134 /// Fail-closed: deny the request while the breaker is open (default).
135 #[default]
136 Deny,
137 /// Fail-open: allow the request while the breaker is open. Use only for
138 /// advisory guards where unavailability should not block traffic.
139 Allow,
140}
141
142impl CircuitOpenVerdict {
143 fn to_verdict(self) -> Verdict {
144 match self {
145 Self::Deny => Verdict::Deny,
146 Self::Allow => Verdict::Allow,
147 }
148 }
149}
150
151/// Verdict returned when the token bucket rejects a call.
152#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
153pub enum RateLimitedVerdict {
154 /// Fail-closed: deny the request when we exceed the guard's QPS budget
155 /// (default for phase 13.1).
156 #[default]
157 Deny,
158 /// Fail-open: allow the request when rate limited. Useful for advisory
159 /// guards where overloading the external service is acceptable.
160 Allow,
161}
162
163impl RateLimitedVerdict {
164 fn to_verdict(self) -> Verdict {
165 match self {
166 Self::Deny => Verdict::Deny,
167 Self::Allow => Verdict::Allow,
168 }
169 }
170}
171
172/// Configuration for [`AsyncGuardAdapter`]. Built via
173/// [`AsyncGuardAdapter::builder`].
174#[derive(Debug, Clone)]
175pub struct AsyncGuardAdapterConfig {
176 /// Circuit breaker tuning.
177 pub circuit: CircuitBreakerConfig,
178 /// Retry configuration applied inside the breaker.
179 pub retry: RetryConfig,
180 /// Maximum number of cached verdicts.
181 pub cache_capacity: NonZeroUsize,
182 /// TTL applied to every cached verdict.
183 pub cache_ttl: Duration,
184 /// Rate limit (calls per second).
185 pub rate_per_second: f64,
186 /// Burst capacity for the rate limiter.
187 pub rate_burst: u32,
188 /// Verdict returned when the breaker is open.
189 pub circuit_open_verdict: CircuitOpenVerdict,
190 /// Verdict returned when the rate limiter rejects a call.
191 pub rate_limited_verdict: RateLimitedVerdict,
192}
193
194impl Default for AsyncGuardAdapterConfig {
195 fn default() -> Self {
196 Self {
197 circuit: CircuitBreakerConfig::default(),
198 retry: RetryConfig::default(),
199 cache_capacity: NonZeroUsize::new(1024).unwrap_or(NonZeroUsize::MIN),
200 cache_ttl: Duration::from_secs(60),
201 rate_per_second: 20.0,
202 rate_burst: 20,
203 circuit_open_verdict: CircuitOpenVerdict::Deny,
204 rate_limited_verdict: RateLimitedVerdict::Deny,
205 }
206 }
207}
208
209/// Fluent builder for [`AsyncGuardAdapter`].
210pub struct AsyncGuardAdapterBuilder<E: ExternalGuard + ?Sized> {
211 inner: Arc<E>,
212 config: AsyncGuardAdapterConfig,
213 clock: Arc<dyn Clock>,
214}
215
216impl<E: ExternalGuard + ?Sized> AsyncGuardAdapterBuilder<E> {
217 /// Start from the given guard and default configuration.
218 pub fn new(inner: Arc<E>) -> Self {
219 Self {
220 inner,
221 config: AsyncGuardAdapterConfig::default(),
222 clock: Arc::new(TokioClock),
223 }
224 }
225
226 /// Override the circuit breaker configuration.
227 pub fn circuit(mut self, circuit: CircuitBreakerConfig) -> Self {
228 self.config.circuit = circuit;
229 self
230 }
231
232 /// Override retry configuration.
233 pub fn retry(mut self, retry: RetryConfig) -> Self {
234 self.config.retry = retry;
235 self
236 }
237
238 /// Set the cache capacity (non-zero).
239 pub fn cache_capacity(mut self, capacity: NonZeroUsize) -> Self {
240 self.config.cache_capacity = capacity;
241 self
242 }
243
244 /// Set the cache TTL.
245 pub fn cache_ttl(mut self, ttl: Duration) -> Self {
246 self.config.cache_ttl = ttl;
247 self
248 }
249
250 /// Set the rate limiter (calls per second + burst).
251 pub fn rate_limit(mut self, rate_per_second: f64, burst: u32) -> Self {
252 self.config.rate_per_second = rate_per_second;
253 self.config.rate_burst = burst;
254 self
255 }
256
257 /// Set the verdict returned while the breaker is open.
258 pub fn circuit_open_verdict(mut self, verdict: CircuitOpenVerdict) -> Self {
259 self.config.circuit_open_verdict = verdict;
260 self
261 }
262
263 /// Set the verdict returned when the rate limiter rejects a call.
264 pub fn rate_limited_verdict(mut self, verdict: RateLimitedVerdict) -> Self {
265 self.config.rate_limited_verdict = verdict;
266 self
267 }
268
269 /// Override the time source. Primarily for tests.
270 pub fn clock(mut self, clock: Arc<dyn Clock>) -> Self {
271 self.clock = clock;
272 self
273 }
274
275 /// Finalize the builder.
276 pub fn build(self) -> AsyncGuardAdapter<E> {
277 let cache = TtlCache::with_clock(self.config.cache_capacity, Arc::clone(&self.clock));
278 let circuit =
279 CircuitBreaker::with_clock(self.config.circuit.clone(), Arc::clone(&self.clock));
280 let bucket = TokenBucket::with_clock(
281 self.config.rate_per_second,
282 self.config.rate_burst,
283 Arc::clone(&self.clock),
284 );
285 AsyncGuardAdapter {
286 inner: self.inner,
287 config: self.config,
288 cache,
289 circuit,
290 bucket,
291 }
292 }
293}
294
295/// Adapter that composes circuit breaker + token bucket + TTL cache + retry
296/// on top of an [`ExternalGuard`].
297///
298/// Flow of a single `evaluate` call:
299///
300/// 1. Check the **circuit breaker**. If open, return
301/// [`CircuitOpenVerdict`] without calling the inner guard.
302/// 2. Check the **cache**. On hit, return the cached verdict without
303/// calling the inner guard.
304/// 3. Check the **token bucket**. If empty, return [`RateLimitedVerdict`].
305/// Rate-limited calls do *not* increment the circuit breaker.
306/// 4. Call the inner guard via [`retry_with_jitter`].
307/// 5. Record success/failure on the breaker. On success, cache the verdict.
308pub struct AsyncGuardAdapter<E: ExternalGuard + ?Sized> {
309 inner: Arc<E>,
310 config: AsyncGuardAdapterConfig,
311 cache: TtlCache<String, Verdict>,
312 circuit: CircuitBreaker,
313 bucket: TokenBucket,
314}
315
316impl<E: ExternalGuard + ?Sized> AsyncGuardAdapter<E> {
317 /// Start a builder with defaults.
318 pub fn builder(inner: Arc<E>) -> AsyncGuardAdapterBuilder<E> {
319 AsyncGuardAdapterBuilder::new(inner)
320 }
321
322 /// Name of the wrapped guard.
323 pub fn name(&self) -> &str {
324 self.inner.name()
325 }
326
327 /// Effective configuration.
328 pub fn config(&self) -> &AsyncGuardAdapterConfig {
329 &self.config
330 }
331
332 /// Inspect the circuit breaker state (primarily for tests and metrics).
333 pub fn circuit_state(&self) -> CircuitState {
334 self.circuit.current_state()
335 }
336
337 /// Evaluate the request end-to-end.
338 pub async fn evaluate(&self, ctx: &GuardCallContext) -> Verdict {
339 // 1. Circuit breaker check.
340 if !self.circuit.allow_call() {
341 return self.config.circuit_open_verdict.to_verdict();
342 }
343
344 // 2. Cache check. Done before rate limiting so that cached hits
345 // don't count against the external QPS budget.
346 let cache_key = self.inner.cache_key(ctx);
347 if let Some(key) = cache_key.as_ref() {
348 if let Some(cached) = self.cache.get(key) {
349 return cached;
350 }
351 }
352
353 // 3. Rate limit.
354 if !self.bucket.try_acquire() {
355 return self.config.rate_limited_verdict.to_verdict();
356 }
357
358 // 4. Retry loop against the inner guard. A permanent error short-
359 // circuits by being returned as an Ok(Err(_)) so the retry
360 // loop doesn't keep calling a known-bad request.
361 let inner = Arc::clone(&self.inner);
362 let ctx_ref = ctx;
363 let loop_outcome: Result<Result<Verdict, ExternalGuardError>, ExternalGuardError> =
364 retry_with_jitter(&self.config.retry, move |_attempt| {
365 let inner = Arc::clone(&inner);
366 async move {
367 match inner.eval(ctx_ref).await {
368 Ok(v) => Ok(Ok(v)),
369 Err(err) if err.is_retryable() => Err(err),
370 Err(err) => Ok(Err(err)),
371 }
372 }
373 })
374 .await;
375
376 let call_result: Result<Verdict, ExternalGuardError> = match loop_outcome {
377 Ok(inner) => inner,
378 Err(err) => Err(err),
379 };
380
381 match call_result {
382 Ok(verdict) => {
383 self.circuit.record_success();
384 if let Some(key) = cache_key {
385 self.cache.insert(key, verdict, self.config.cache_ttl);
386 }
387 verdict
388 }
389 Err(err) => {
390 self.circuit.record_failure();
391 tracing::warn!(
392 guard = self.inner.name(),
393 error = %err,
394 "external guard failed"
395 );
396 Verdict::Deny
397 }
398 }
399 }
400}