ralph_workflow/agents/
fallback.rs

1//! Fallback chain configuration for agent fault tolerance.
2//!
3//! This module defines the `FallbackConfig` structure that controls how Ralph
4//! handles agent failures. It supports:
5//! - Agent-level fallback (try different agents)
6//! - Provider-level fallback (try different models within same agent)
7//! - Exponential backoff with cycling
8
9use serde::Deserialize;
10use std::collections::HashMap;
11
12/// Agent role (developer, reviewer, or commit).
13///
14/// Each role can have its own chain of fallback agents.
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub enum AgentRole {
17    /// Developer agent: implements features based on PROMPT.md.
18    Developer,
19    /// Reviewer agent: reviews code and fixes issues.
20    Reviewer,
21    /// Commit agent: generates commit messages from diffs.
22    Commit,
23}
24
25impl std::fmt::Display for AgentRole {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        match self {
28            Self::Developer => write!(f, "developer"),
29            Self::Reviewer => write!(f, "reviewer"),
30            Self::Commit => write!(f, "commit"),
31        }
32    }
33}
34
35/// Agent chain configuration for preferred agents and fallback switching.
36///
37/// The agent chain defines both:
38/// 1. The **preferred agent** (first in the list) for each role
39/// 2. The **fallback agents** (remaining in the list) to try if the preferred fails
40///
41/// This provides a unified way to configure which agents to use and in what order.
42/// Ralph automatically switches to the next agent in the chain when encountering
43/// errors like rate limits or auth failures.
44///
45/// ## Provider-Level Fallback
46///
47/// In addition to agent-level fallback, you can configure provider-level fallback
48/// within a single agent using the `provider_fallback` field. This is useful for
49/// agents like opencode that support multiple providers/models via the `-m` flag.
50///
51/// Example:
52/// ```toml
53/// [agent_chain]
54/// provider_fallback.opencode = ["-m opencode/glm-4.7-free", "-m opencode/claude-sonnet-4"]
55/// ```
56///
57/// ## Exponential Backoff and Cycling
58///
59/// When all fallbacks are exhausted, Ralph uses exponential backoff and cycles
60/// back to the first agent in the chain:
61/// - Base delay starts at `retry_delay_ms` (default: 1000ms)
62/// - Each cycle multiplies by `backoff_multiplier` (default: 2.0)
63/// - Capped at `max_backoff_ms` (default: 60000ms = 1 minute)
64/// - Maximum cycles controlled by `max_cycles` (default: 3)
65#[derive(Debug, Clone, Deserialize)]
66pub struct FallbackConfig {
67    /// Ordered list of agents for developer role (first = preferred, rest = fallbacks).
68    #[serde(default)]
69    pub developer: Vec<String>,
70    /// Ordered list of agents for reviewer role (first = preferred, rest = fallbacks).
71    #[serde(default)]
72    pub reviewer: Vec<String>,
73    /// Ordered list of agents for commit role (first = preferred, rest = fallbacks).
74    #[serde(default)]
75    pub commit: Vec<String>,
76    /// Provider-level fallback: maps agent name to list of model flags to try.
77    /// Example: `opencode = ["-m opencode/glm-4.7-free", "-m opencode/claude-sonnet-4"]`
78    #[serde(default)]
79    pub provider_fallback: HashMap<String, Vec<String>>,
80    /// Maximum number of retries per agent before moving to next.
81    #[serde(default = "default_max_retries")]
82    pub max_retries: u32,
83    /// Base delay between retries in milliseconds.
84    #[serde(default = "default_retry_delay_ms")]
85    pub retry_delay_ms: u64,
86    /// Multiplier for exponential backoff (default: 2.0).
87    #[serde(default = "default_backoff_multiplier")]
88    pub backoff_multiplier: f64,
89    /// Maximum backoff delay in milliseconds (default: 60000 = 1 minute).
90    #[serde(default = "default_max_backoff_ms")]
91    pub max_backoff_ms: u64,
92    /// Maximum number of cycles through all agents before giving up (default: 3).
93    #[serde(default = "default_max_cycles")]
94    pub max_cycles: u32,
95}
96
97const fn default_max_retries() -> u32 {
98    3
99}
100
101const fn default_retry_delay_ms() -> u64 {
102    1000
103}
104
105const fn default_backoff_multiplier() -> f64 {
106    2.0
107}
108
109const fn default_max_backoff_ms() -> u64 {
110    60000 // 1 minute
111}
112
113const fn default_max_cycles() -> u32 {
114    3
115}
116
117// IEEE 754 double precision constants for f64_to_u64_via_bits
118const IEEE_754_EXP_BIAS: i32 = 1023;
119const IEEE_754_EXP_MASK: u64 = 0x7FF;
120const IEEE_754_MANTISSA_MASK: u64 = 0x000F_FFFF_FFFF_FFFF;
121const IEEE_754_IMPLICIT_ONE: u64 = 1u64 << 52;
122
123/// Convert f64 to u64 using IEEE 754 bit manipulation to avoid cast lints.
124///
125/// This function handles the conversion by extracting the raw bits of the f64
126/// and manually decoding the IEEE 754 format. For values in the range [0, 100000],
127/// this produces correct results without triggering clippy's cast lints.
128fn f64_to_u64_via_bits(value: f64) -> u64 {
129    // Handle special cases first
130    if !value.is_finite() || value < 0.0 {
131        return 0;
132    }
133
134    // Use to_bits() to get the raw IEEE 754 representation
135    let bits = value.to_bits();
136
137    // IEEE 754 double precision:
138    // - Bit 63: sign (we know it's 0 for non-negative values)
139    // - Bits 52-62: exponent (biased by 1023)
140    // - Bits 0-51: mantissa (with implicit leading 1 for normalized numbers)
141    let exp_biased = ((bits >> 52) & IEEE_754_EXP_MASK) as i32;
142    let mantissa = bits & IEEE_754_MANTISSA_MASK;
143
144    // Check for denormal numbers (exponent == 0)
145    if exp_biased == 0 {
146        // Denormal: value = mantissa * 2^(-1022)
147        // For small values (< 1), this results in 0
148        return 0;
149    }
150
151    // Normalized number
152    let exp = exp_biased - IEEE_754_EXP_BIAS;
153
154    // For integer values, the exponent tells us where the binary point is
155    // If exp < 0, the value is < 1, so round to 0
156    if exp < 0 {
157        return 0;
158    }
159
160    // For exp >= 0, we have an integer value
161    // The value is (1.mantissa) * 2^exp where 1.mantissa has 53 bits
162    let full_mantissa = mantissa | IEEE_754_IMPLICIT_ONE;
163
164    // Shift to get the integer value
165    // We need to shift right by (52 - exp) to get the integer
166    let shift = 52i32 - exp;
167
168    if shift <= 0 {
169        // Value is very large, saturate at u64::MAX
170        // But our input is clamped to [0, 100000], so this won't happen
171        u64::MAX
172    } else if shift < 64 {
173        full_mantissa >> shift
174    } else {
175        0
176    }
177}
178
179impl Default for FallbackConfig {
180    fn default() -> Self {
181        Self {
182            developer: Vec::new(),
183            reviewer: Vec::new(),
184            commit: Vec::new(),
185            provider_fallback: HashMap::new(),
186            max_retries: default_max_retries(),
187            retry_delay_ms: default_retry_delay_ms(),
188            backoff_multiplier: default_backoff_multiplier(),
189            max_backoff_ms: default_max_backoff_ms(),
190            max_cycles: default_max_cycles(),
191        }
192    }
193}
194
195impl FallbackConfig {
196    /// Calculate exponential backoff delay for a given cycle.
197    ///
198    /// Uses the formula: min(base * multiplier^cycle, `max_backoff`)
199    ///
200    /// Uses integer arithmetic to avoid floating-point casting issues.
201    pub fn calculate_backoff(&self, cycle: u32) -> u64 {
202        // For common multiplier values, use direct integer computation
203        // to avoid f64->u64 conversion and associated clippy lints.
204        let multiplier_hundredths = self.get_multiplier_hundredths();
205        let base_hundredths = self.retry_delay_ms.saturating_mul(100);
206
207        // Calculate: base * (multiplier^cycle) / 100^cycle
208        // Use saturating arithmetic to avoid overflow
209        let mut delay_hundredths = base_hundredths;
210        for _ in 0..cycle {
211            delay_hundredths = delay_hundredths.saturating_mul(multiplier_hundredths);
212            delay_hundredths = delay_hundredths.saturating_div(100);
213        }
214
215        // Convert back to milliseconds
216        delay_hundredths.div_euclid(100).min(self.max_backoff_ms)
217    }
218
219    /// Get the multiplier as hundredths (e.g., 2.0 -> 200, 1.5 -> 150).
220    ///
221    /// Uses a lookup table for common values to avoid f64->u64 casts.
222    /// For uncommon values, uses a safe conversion with validation.
223    fn get_multiplier_hundredths(&self) -> u64 {
224        const EPSILON: f64 = 0.0001;
225
226        // Common multiplier values - use exact integer matches
227        // This avoids the cast for the vast majority of cases
228        let m = self.backoff_multiplier;
229        if (m - 1.0).abs() < EPSILON {
230            return 100;
231        } else if (m - 1.5).abs() < EPSILON {
232            return 150;
233        } else if (m - 2.0).abs() < EPSILON {
234            return 200;
235        } else if (m - 2.5).abs() < EPSILON {
236            return 250;
237        } else if (m - 3.0).abs() < EPSILON {
238            return 300;
239        } else if (m - 4.0).abs() < EPSILON {
240            return 400;
241        } else if (m - 5.0).abs() < EPSILON {
242            return 500;
243        } else if (m - 10.0).abs() < EPSILON {
244            return 1000;
245        }
246
247        // For uncommon values, compute using the original formula
248        // The value is clamped to [0.0, 1000.0] so the result is in [0.0, 100000.0]
249        // We use to_bits() and manual decoding to avoid cast lints
250        let clamped = m.clamp(0.0, 1000.0);
251        let multiplied = clamped * 100.0;
252        let rounded = multiplied.round();
253
254        // Manual f64 to u64 conversion using IEEE 754 bit representation
255        f64_to_u64_via_bits(rounded)
256    }
257
258    /// Get fallback agents for a role.
259    pub fn get_fallbacks(&self, role: AgentRole) -> &[String] {
260        match role {
261            AgentRole::Developer => &self.developer,
262            AgentRole::Reviewer => &self.reviewer,
263            AgentRole::Commit => self.get_effective_commit_fallbacks(),
264        }
265    }
266
267    /// Get effective fallback agents for commit role.
268    ///
269    /// Falls back to reviewer chain if commit chain is empty.
270    /// This ensures commit message generation can use the same agents
271    /// configured for code review when no dedicated commit agents are specified.
272    fn get_effective_commit_fallbacks(&self) -> &[String] {
273        if self.commit.is_empty() {
274            &self.reviewer
275        } else {
276            &self.commit
277        }
278    }
279
280    /// Check if fallback is configured for a role.
281    pub fn has_fallbacks(&self, role: AgentRole) -> bool {
282        !self.get_fallbacks(role).is_empty()
283    }
284
285    /// Get provider-level fallback model flags for an agent.
286    ///
287    /// Returns the list of model flags to try for the given agent name.
288    /// Empty slice if no provider fallback is configured for this agent.
289    pub fn get_provider_fallbacks(&self, agent_name: &str) -> &[String] {
290        self.provider_fallback
291            .get(agent_name)
292            .map_or(&[], std::vec::Vec::as_slice)
293    }
294
295    /// Check if provider-level fallback is configured for an agent.
296    pub fn has_provider_fallbacks(&self, agent_name: &str) -> bool {
297        self.provider_fallback
298            .get(agent_name)
299            .is_some_and(|v| !v.is_empty())
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    #[test]
308    fn test_agent_role_display() {
309        assert_eq!(format!("{}", AgentRole::Developer), "developer");
310        assert_eq!(format!("{}", AgentRole::Reviewer), "reviewer");
311        assert_eq!(format!("{}", AgentRole::Commit), "commit");
312    }
313
314    #[test]
315    fn test_fallback_config_defaults() {
316        let config = FallbackConfig::default();
317        assert!(config.developer.is_empty());
318        assert!(config.reviewer.is_empty());
319        assert!(config.commit.is_empty());
320        assert_eq!(config.max_retries, 3);
321        assert_eq!(config.retry_delay_ms, 1000);
322        // Use approximate comparison for floating point
323        assert!((config.backoff_multiplier - 2.0).abs() < f64::EPSILON);
324        assert_eq!(config.max_backoff_ms, 60000);
325        assert_eq!(config.max_cycles, 3);
326    }
327
328    #[test]
329    fn test_fallback_config_calculate_backoff() {
330        let config = FallbackConfig {
331            retry_delay_ms: 1000,
332            backoff_multiplier: 2.0,
333            max_backoff_ms: 60000,
334            ..Default::default()
335        };
336
337        assert_eq!(config.calculate_backoff(0), 1000);
338        assert_eq!(config.calculate_backoff(1), 2000);
339        assert_eq!(config.calculate_backoff(2), 4000);
340        assert_eq!(config.calculate_backoff(3), 8000);
341
342        // Should cap at max
343        assert_eq!(config.calculate_backoff(10), 60000);
344    }
345
346    #[test]
347    fn test_fallback_config_get_fallbacks() {
348        let config = FallbackConfig {
349            developer: vec!["claude".to_string(), "codex".to_string()],
350            reviewer: vec!["codex".to_string()],
351            ..Default::default()
352        };
353
354        assert_eq!(
355            config.get_fallbacks(AgentRole::Developer),
356            &["claude", "codex"]
357        );
358        assert_eq!(config.get_fallbacks(AgentRole::Reviewer), &["codex"]);
359    }
360
361    #[test]
362    fn test_fallback_config_has_fallbacks() {
363        let config = FallbackConfig {
364            developer: vec!["claude".to_string()],
365            reviewer: vec![],
366            ..Default::default()
367        };
368
369        assert!(config.has_fallbacks(AgentRole::Developer));
370        assert!(!config.has_fallbacks(AgentRole::Reviewer));
371    }
372
373    #[test]
374    fn test_fallback_config_defaults_provider_fallback() {
375        let config = FallbackConfig::default();
376        assert!(config.get_provider_fallbacks("opencode").is_empty());
377        assert!(!config.has_provider_fallbacks("opencode"));
378    }
379
380    #[test]
381    fn test_provider_fallback_config() {
382        let mut provider_fallback = HashMap::new();
383        provider_fallback.insert(
384            "opencode".to_string(),
385            vec![
386                "-m opencode/glm-4.7-free".to_string(),
387                "-m opencode/claude-sonnet-4".to_string(),
388            ],
389        );
390
391        let config = FallbackConfig {
392            provider_fallback,
393            ..Default::default()
394        };
395
396        let fallbacks = config.get_provider_fallbacks("opencode");
397        assert_eq!(fallbacks.len(), 2);
398        assert_eq!(fallbacks[0], "-m opencode/glm-4.7-free");
399        assert_eq!(fallbacks[1], "-m opencode/claude-sonnet-4");
400
401        assert!(config.has_provider_fallbacks("opencode"));
402        assert!(!config.has_provider_fallbacks("claude"));
403    }
404
405    #[test]
406    fn test_fallback_config_from_toml() {
407        let toml_str = r#"
408            developer = ["claude", "codex"]
409            reviewer = ["codex", "claude"]
410            max_retries = 5
411            retry_delay_ms = 2000
412
413            [provider_fallback]
414            opencode = ["-m opencode/glm-4.7-free", "-m zai/glm-4.7"]
415        "#;
416
417        let config: FallbackConfig = toml::from_str(toml_str).unwrap();
418        assert_eq!(config.developer, vec!["claude", "codex"]);
419        assert_eq!(config.reviewer, vec!["codex", "claude"]);
420        assert_eq!(config.max_retries, 5);
421        assert_eq!(config.retry_delay_ms, 2000);
422        assert_eq!(config.get_provider_fallbacks("opencode").len(), 2);
423    }
424
425    #[test]
426    fn test_commit_uses_reviewer_chain_when_empty() {
427        // When commit chain is empty, it should fall back to reviewer chain
428        let config = FallbackConfig {
429            commit: vec![],
430            reviewer: vec!["agent1".to_string(), "agent2".to_string()],
431            ..Default::default()
432        };
433
434        // Commit role should use reviewer chain when commit chain is empty
435        assert_eq!(
436            config.get_fallbacks(AgentRole::Commit),
437            &["agent1", "agent2"]
438        );
439        assert!(config.has_fallbacks(AgentRole::Commit));
440    }
441
442    #[test]
443    fn test_commit_uses_own_chain_when_configured() {
444        // When commit chain is configured, it should use its own chain
445        let config = FallbackConfig {
446            commit: vec!["commit-agent".to_string()],
447            reviewer: vec!["reviewer-agent".to_string()],
448            ..Default::default()
449        };
450
451        // Commit role should use its own chain
452        assert_eq!(config.get_fallbacks(AgentRole::Commit), &["commit-agent"]);
453    }
454}