Skip to main content

systemprompt_database/resilience/
config.rs

1//! Runtime configuration for the resilience primitives.
2//!
3//! These are the in-memory form used by [`super::guard::ResilienceGuard`].
4//! Callers that load configuration from disk (e.g. `systemprompt-models` config
5//! structs in milliseconds) translate into these `Duration`-typed structs at
6//! construction.
7
8use std::time::Duration;
9
10/// Bounded-retry policy with exponential backoff and jitter.
11#[derive(Debug, Clone, Copy)]
12pub struct RetryConfig {
13    /// Maximum number of attempts, including the first. `1` disables retries.
14    pub max_attempts: u32,
15    /// Backoff before the first retry; doubles each subsequent attempt.
16    pub base_delay: Duration,
17    /// Upper bound on a single backoff delay.
18    pub max_delay: Duration,
19    /// Whether to apply full jitter to each backoff delay.
20    pub jitter: bool,
21}
22
23impl Default for RetryConfig {
24    fn default() -> Self {
25        Self {
26            max_attempts: 3,
27            base_delay: Duration::from_millis(200),
28            max_delay: Duration::from_secs(10),
29            jitter: true,
30        }
31    }
32}
33
34/// Circuit-breaker policy.
35#[derive(Debug, Clone, Copy)]
36pub struct BreakerConfig {
37    /// Consecutive failures that trip the breaker from `Closed` to `Open`.
38    pub failure_threshold: u32,
39    /// How long the breaker stays `Open` before allowing a half-open probe.
40    pub open_cooldown: Duration,
41    /// Concurrent probes permitted while `HalfOpen`.
42    pub half_open_max_probes: u32,
43}
44
45impl Default for BreakerConfig {
46    fn default() -> Self {
47        Self {
48            failure_threshold: 5,
49            open_cooldown: Duration::from_secs(30),
50            half_open_max_probes: 1,
51        }
52    }
53}
54
55/// Concurrency-limit (bulkhead) policy.
56#[derive(Debug, Clone, Copy)]
57pub struct BulkheadConfig {
58    /// Maximum number of in-flight calls; further calls are rejected.
59    pub max_concurrent: usize,
60}
61
62impl Default for BulkheadConfig {
63    fn default() -> Self {
64        Self { max_concurrent: 16 }
65    }
66}
67
68/// The full resilience policy applied to one logical dependency.
69#[derive(Debug, Clone, Copy)]
70pub struct ResilienceConfig {
71    /// Timeout applied to each individual attempt of a non-streaming call.
72    pub request_timeout: Duration,
73    /// Maximum gap between two chunks of a streaming response before it is
74    /// aborted.
75    pub stream_idle_timeout: Duration,
76    /// Retry policy.
77    pub retry: RetryConfig,
78    /// Circuit-breaker policy.
79    pub breaker: BreakerConfig,
80    /// Bulkhead policy.
81    pub bulkhead: BulkheadConfig,
82}
83
84impl Default for ResilienceConfig {
85    fn default() -> Self {
86        Self {
87            request_timeout: Duration::from_secs(60),
88            stream_idle_timeout: Duration::from_secs(60),
89            retry: RetryConfig::default(),
90            breaker: BreakerConfig::default(),
91            bulkhead: BulkheadConfig::default(),
92        }
93    }
94}
95
96impl From<&systemprompt_models::services::ResilienceSettings> for ResilienceConfig {
97    /// Translate the disk-loaded [`ResilienceSettings`] (milliseconds and raw
98    /// counts) into the runtime `Duration`-typed policy. Count fields are
99    /// clamped to a minimum of `1`, since a zero attempt/probe/permit budget
100    /// would deadlock every guarded call.
101    ///
102    /// [`ResilienceSettings`]: systemprompt_models::services::ResilienceSettings
103    fn from(settings: &systemprompt_models::services::ResilienceSettings) -> Self {
104        Self {
105            request_timeout: Duration::from_millis(settings.request_timeout_ms),
106            stream_idle_timeout: Duration::from_millis(settings.stream_idle_timeout_ms),
107            retry: RetryConfig {
108                max_attempts: settings.retry_attempts.max(1),
109                base_delay: Duration::from_millis(settings.retry_base_delay_ms),
110                max_delay: Duration::from_millis(settings.retry_max_delay_ms),
111                jitter: true,
112            },
113            breaker: BreakerConfig {
114                failure_threshold: settings.breaker_failure_threshold.max(1),
115                open_cooldown: Duration::from_millis(settings.breaker_open_cooldown_ms),
116                half_open_max_probes: settings.breaker_half_open_probes.max(1),
117            },
118            bulkhead: BulkheadConfig {
119                max_concurrent: settings.max_concurrent.max(1),
120            },
121        }
122    }
123}