Skip to main content

codex_helper_core/
config_retry.rs

1use super::*;
2
3#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
4#[serde(rename_all = "kebab-case")]
5pub enum RetryProfileName {
6    Balanced,
7    SameUpstream,
8    AggressiveFailover,
9    CostPrimary,
10}
11
12#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
13pub struct ResolvedRetryLayerConfig {
14    pub max_attempts: u32,
15    pub backoff_ms: u64,
16    pub backoff_max_ms: u64,
17    pub jitter_ms: u64,
18    pub on_status: String,
19    pub on_class: Vec<String>,
20    pub strategy: RetryStrategy,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
24pub struct ResolvedRetryConfig {
25    pub upstream: ResolvedRetryLayerConfig,
26    pub route: ResolvedRetryLayerConfig,
27    /// Guarded cross-station failover before any upstream output is committed to the client.
28    pub allow_cross_station_before_first_output: bool,
29    pub never_on_status: String,
30    pub never_on_class: Vec<String>,
31    pub cloudflare_challenge_cooldown_secs: u64,
32    pub cloudflare_timeout_cooldown_secs: u64,
33    pub transport_cooldown_secs: u64,
34    pub cooldown_backoff_factor: u64,
35    pub cooldown_backoff_max_secs: u64,
36}
37
38#[derive(Debug, Clone, Serialize, Deserialize, Default)]
39#[serde(deny_unknown_fields)]
40pub struct RetryLayerConfig {
41    #[serde(default)]
42    pub max_attempts: Option<u32>,
43    #[serde(default)]
44    pub backoff_ms: Option<u64>,
45    #[serde(default)]
46    pub backoff_max_ms: Option<u64>,
47    #[serde(default)]
48    pub jitter_ms: Option<u64>,
49    #[serde(default)]
50    pub on_status: Option<String>,
51    #[serde(default)]
52    pub on_class: Option<Vec<String>>,
53    #[serde(default)]
54    pub strategy: Option<RetryStrategy>,
55}
56
57#[derive(Debug, Clone, Serialize, Deserialize)]
58#[serde(deny_unknown_fields)]
59pub struct RetryConfig {
60    /// Curated retry policy preset. When set, codex-helper starts from the profile defaults,
61    /// then applies any explicitly configured fields below as overrides.
62    #[serde(default)]
63    pub profile: Option<RetryProfileName>,
64    #[serde(default)]
65    pub upstream: Option<RetryLayerConfig>,
66    #[serde(default)]
67    pub provider: Option<RetryLayerConfig>,
68    /// Allow automatic failover to another station, but only before any output has been
69    /// committed to the client. Session-pinned routes remain sticky regardless of this setting.
70    #[serde(default)]
71    pub allow_cross_station_before_first_output: Option<bool>,
72    #[serde(default)]
73    pub never_on_status: Option<String>,
74    #[serde(default)]
75    pub never_on_class: Option<Vec<String>>,
76    #[serde(default)]
77    pub cloudflare_challenge_cooldown_secs: Option<u64>,
78    #[serde(default)]
79    pub cloudflare_timeout_cooldown_secs: Option<u64>,
80    #[serde(default)]
81    pub transport_cooldown_secs: Option<u64>,
82    /// Optional exponential backoff for cooldown penalties.
83    /// When factor > 1, repeated penalties will increase cooldown up to max_secs.
84    #[serde(default)]
85    pub cooldown_backoff_factor: Option<u64>,
86    #[serde(default)]
87    pub cooldown_backoff_max_secs: Option<u64>,
88}
89
90#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
91#[serde(rename_all = "snake_case")]
92pub enum RetryStrategy {
93    /// Prefer switching to another upstream on retry (default).
94    #[default]
95    Failover,
96    /// Prefer retrying the same upstream (opt-in).
97    SameUpstream,
98}
99
100impl Default for RetryConfig {
101    fn default() -> Self {
102        Self {
103            profile: Some(RetryProfileName::Balanced),
104            upstream: None,
105            provider: None,
106            allow_cross_station_before_first_output: None,
107            never_on_status: None,
108            never_on_class: None,
109            cloudflare_challenge_cooldown_secs: None,
110            cloudflare_timeout_cooldown_secs: None,
111            transport_cooldown_secs: None,
112            cooldown_backoff_factor: None,
113            cooldown_backoff_max_secs: None,
114        }
115    }
116}
117
118impl RetryProfileName {
119    pub fn defaults(self) -> ResolvedRetryConfig {
120        match self {
121            RetryProfileName::Balanced => ResolvedRetryConfig {
122                upstream: ResolvedRetryLayerConfig {
123                    max_attempts: 2,
124                    backoff_ms: 200,
125                    backoff_max_ms: 2_000,
126                    jitter_ms: 100,
127                    on_status: "429,500-599,524".to_string(),
128                    on_class: vec![
129                        "upstream_transport_error".to_string(),
130                        "cloudflare_timeout".to_string(),
131                        "cloudflare_challenge".to_string(),
132                    ],
133                    strategy: RetryStrategy::SameUpstream,
134                },
135                route: ResolvedRetryLayerConfig {
136                    max_attempts: 2,
137                    backoff_ms: 0,
138                    backoff_max_ms: 0,
139                    jitter_ms: 0,
140                    on_status: "401,403,404,408,429,500-599,524".to_string(),
141                    on_class: vec![
142                        "upstream_transport_error".to_string(),
143                        "routing_mismatch_capability".to_string(),
144                    ],
145                    strategy: RetryStrategy::Failover,
146                },
147                allow_cross_station_before_first_output: false,
148                never_on_status: "413,415,422".to_string(),
149                never_on_class: vec!["client_error_non_retryable".to_string()],
150                cloudflare_challenge_cooldown_secs: 300,
151                cloudflare_timeout_cooldown_secs: 60,
152                transport_cooldown_secs: 30,
153                cooldown_backoff_factor: 1,
154                cooldown_backoff_max_secs: 600,
155            },
156            RetryProfileName::SameUpstream => ResolvedRetryConfig {
157                upstream: ResolvedRetryLayerConfig {
158                    max_attempts: 3,
159                    ..RetryProfileName::Balanced.defaults().upstream
160                },
161                route: ResolvedRetryLayerConfig {
162                    max_attempts: 1,
163                    ..RetryProfileName::Balanced.defaults().route
164                },
165                ..RetryProfileName::Balanced.defaults()
166            },
167            RetryProfileName::AggressiveFailover => ResolvedRetryConfig {
168                upstream: ResolvedRetryLayerConfig {
169                    max_attempts: 2,
170                    backoff_ms: 200,
171                    backoff_max_ms: 2_500,
172                    jitter_ms: 150,
173                    on_status: "429,500-599,524".to_string(),
174                    on_class: vec![
175                        "upstream_transport_error".to_string(),
176                        "cloudflare_timeout".to_string(),
177                        "cloudflare_challenge".to_string(),
178                    ],
179                    strategy: RetryStrategy::SameUpstream,
180                },
181                route: ResolvedRetryLayerConfig {
182                    max_attempts: 3,
183                    backoff_ms: 0,
184                    backoff_max_ms: 0,
185                    jitter_ms: 0,
186                    on_status: "401,403,404,408,429,500-599,524".to_string(),
187                    on_class: vec![
188                        "upstream_transport_error".to_string(),
189                        "routing_mismatch_capability".to_string(),
190                    ],
191                    strategy: RetryStrategy::Failover,
192                },
193                allow_cross_station_before_first_output: true,
194                ..RetryProfileName::Balanced.defaults()
195            },
196            RetryProfileName::CostPrimary => ResolvedRetryConfig {
197                route: ResolvedRetryLayerConfig {
198                    max_attempts: 2,
199                    ..RetryProfileName::Balanced.defaults().route
200                },
201                allow_cross_station_before_first_output: true,
202                transport_cooldown_secs: 30,
203                cooldown_backoff_factor: 2,
204                cooldown_backoff_max_secs: 900,
205                ..RetryProfileName::Balanced.defaults()
206            },
207        }
208    }
209}
210
211impl RetryConfig {
212    pub fn resolve(&self) -> ResolvedRetryConfig {
213        let mut out = self
214            .profile
215            .unwrap_or(RetryProfileName::Balanced)
216            .defaults();
217
218        if let Some(layer) = self.upstream.as_ref() {
219            if let Some(v) = layer.max_attempts {
220                out.upstream.max_attempts = v;
221            }
222            if let Some(v) = layer.backoff_ms {
223                out.upstream.backoff_ms = v;
224            }
225            if let Some(v) = layer.backoff_max_ms {
226                out.upstream.backoff_max_ms = v;
227            }
228            if let Some(v) = layer.jitter_ms {
229                out.upstream.jitter_ms = v;
230            }
231            if let Some(v) = layer.on_status.as_deref() {
232                out.upstream.on_status = v.to_string();
233            }
234            if let Some(v) = layer.on_class.as_ref() {
235                out.upstream.on_class = v.clone();
236            }
237            if let Some(v) = layer.strategy {
238                out.upstream.strategy = v;
239            }
240        }
241        if let Some(layer) = self.provider.as_ref() {
242            if let Some(v) = layer.max_attempts {
243                out.route.max_attempts = v;
244            }
245            if let Some(v) = layer.backoff_ms {
246                out.route.backoff_ms = v;
247            }
248            if let Some(v) = layer.backoff_max_ms {
249                out.route.backoff_max_ms = v;
250            }
251            if let Some(v) = layer.jitter_ms {
252                out.route.jitter_ms = v;
253            }
254            if let Some(v) = layer.on_status.as_deref() {
255                out.route.on_status = v.to_string();
256            }
257            if let Some(v) = layer.on_class.as_ref() {
258                out.route.on_class = v.clone();
259            }
260            if let Some(v) = layer.strategy {
261                out.route.strategy = v;
262            }
263        }
264        if let Some(v) = self.allow_cross_station_before_first_output {
265            out.allow_cross_station_before_first_output = v;
266        }
267        if let Some(v) = self.never_on_status.as_deref() {
268            out.never_on_status = v.to_string();
269        }
270        if let Some(v) = self.never_on_class.as_ref() {
271            out.never_on_class = v.clone();
272        }
273        if let Some(v) = self.cloudflare_challenge_cooldown_secs {
274            out.cloudflare_challenge_cooldown_secs = v;
275        }
276        if let Some(v) = self.cloudflare_timeout_cooldown_secs {
277            out.cloudflare_timeout_cooldown_secs = v;
278        }
279        if let Some(v) = self.transport_cooldown_secs {
280            out.transport_cooldown_secs = v;
281        }
282        if let Some(v) = self.cooldown_backoff_factor {
283            out.cooldown_backoff_factor = v;
284        }
285        if let Some(v) = self.cooldown_backoff_max_secs {
286            out.cooldown_backoff_max_secs = v;
287        }
288
289        out
290    }
291}