Skip to main content

ralph_workflow/reducer/state/agent_chain/
transitions.rs

1// State transition methods for AgentChainState.
2//
3// These methods implement the fallback chain progression: advancing models,
4// switching agents, and starting retry cycles with backoff.
5
6use std::sync::Arc;
7
8use super::backoff::calculate_backoff_delay_ms;
9use super::{AgentChainState, AgentDrain, AgentRole, DrainMode, RateLimitContinuationPrompt};
10
11impl AgentChainState {
12    #[must_use]
13    pub fn advance_to_next_model(&self) -> Self {
14        let start_agent_index = self.current_agent_index;
15
16        // When models are configured, we try each model for the current agent once.
17        // If the models list is exhausted, advance to the next agent/retry cycle
18        // instead of looping models indefinitely.
19        let mut next = match self.models_per_agent.get(self.current_agent_index) {
20            Some(models) if !models.is_empty() => {
21                if self.current_model_index + 1 < models.len() {
22                    // Simple model advance - only increment model index
23                    Self {
24                        agents: Arc::clone(&self.agents),
25                        current_agent_index: self.current_agent_index,
26                        models_per_agent: Arc::clone(&self.models_per_agent),
27                        current_model_index: self.current_model_index + 1,
28                        retry_cycle: self.retry_cycle,
29                        max_cycles: self.max_cycles,
30                        retry_delay_ms: self.retry_delay_ms,
31                        backoff_multiplier: self.backoff_multiplier,
32                        max_backoff_ms: self.max_backoff_ms,
33                        backoff_pending_ms: self.backoff_pending_ms,
34                        current_role: self.current_role,
35                        current_drain: self.current_drain,
36                        current_mode: self.current_mode,
37                        rate_limit_continuation_prompt: self.rate_limit_continuation_prompt.clone(),
38                        last_session_id: self.last_session_id.clone(),
39                        last_failure_reason: self.last_failure_reason.clone(),
40                    }
41                } else {
42                    self.switch_to_next_agent()
43                }
44            }
45            _ => self.switch_to_next_agent(),
46        };
47
48        if next.current_agent_index != start_agent_index {
49            next.last_session_id = None;
50        }
51
52        next
53    }
54
55    #[must_use]
56    pub fn switch_to_next_agent(&self) -> Self {
57        if self.current_agent_index + 1 < self.agents.len() {
58            // Advance to next agent
59            Self {
60                agents: Arc::clone(&self.agents),
61                current_agent_index: self.current_agent_index + 1,
62                models_per_agent: Arc::clone(&self.models_per_agent),
63                current_model_index: 0,
64                retry_cycle: self.retry_cycle,
65                max_cycles: self.max_cycles,
66                retry_delay_ms: self.retry_delay_ms,
67                backoff_multiplier: self.backoff_multiplier,
68                max_backoff_ms: self.max_backoff_ms,
69                backoff_pending_ms: None,
70                current_role: self.current_role,
71                current_drain: self.current_drain,
72                current_mode: self.current_mode,
73                rate_limit_continuation_prompt: self.rate_limit_continuation_prompt.clone(),
74                last_session_id: self.last_session_id.clone(),
75                last_failure_reason: self.last_failure_reason.clone(),
76            }
77        } else {
78            // Wrap around to first agent and increment retry cycle
79            let new_retry_cycle = self.retry_cycle + 1;
80            let new_backoff_pending_ms = if new_retry_cycle >= self.max_cycles {
81                None
82            } else {
83                // Create temporary state to calculate backoff
84                let temp = Self {
85                    agents: Arc::clone(&self.agents),
86                    current_agent_index: 0,
87                    models_per_agent: Arc::clone(&self.models_per_agent),
88                    current_model_index: 0,
89                    retry_cycle: new_retry_cycle,
90                    max_cycles: self.max_cycles,
91                    retry_delay_ms: self.retry_delay_ms,
92                    backoff_multiplier: self.backoff_multiplier,
93                    max_backoff_ms: self.max_backoff_ms,
94                    backoff_pending_ms: None,
95                    current_role: self.current_role,
96                    current_drain: self.current_drain,
97                    current_mode: self.current_mode,
98                    rate_limit_continuation_prompt: None,
99                    last_session_id: None,
100                    last_failure_reason: None,
101                };
102                Some(temp.calculate_backoff_delay_ms_for_retry_cycle())
103            };
104
105            Self {
106                agents: Arc::clone(&self.agents),
107                current_agent_index: 0,
108                models_per_agent: Arc::clone(&self.models_per_agent),
109                current_model_index: 0,
110                retry_cycle: new_retry_cycle,
111                max_cycles: self.max_cycles,
112                retry_delay_ms: self.retry_delay_ms,
113                backoff_multiplier: self.backoff_multiplier,
114                max_backoff_ms: self.max_backoff_ms,
115                backoff_pending_ms: new_backoff_pending_ms,
116                current_role: self.current_role,
117                current_drain: self.current_drain,
118                current_mode: self.current_mode,
119                rate_limit_continuation_prompt: self.rate_limit_continuation_prompt.clone(),
120                last_session_id: self.last_session_id.clone(),
121                last_failure_reason: self.last_failure_reason.clone(),
122            }
123        }
124    }
125
126    /// Switch to a specific agent by name.
127    ///
128    /// If `to_agent` is unknown, falls back to `switch_to_next_agent()` to keep the
129    /// reducer deterministic.
130    #[must_use]
131    pub fn switch_to_agent_named(&self, to_agent: &str) -> Self {
132        let Some(target_index) = self.agents.iter().position(|a| a == to_agent) else {
133            return self.switch_to_next_agent();
134        };
135
136        if target_index == self.current_agent_index {
137            // Same agent - just reset model index
138            return Self {
139                agents: Arc::clone(&self.agents),
140                current_agent_index: self.current_agent_index,
141                models_per_agent: Arc::clone(&self.models_per_agent),
142                current_model_index: 0,
143                retry_cycle: self.retry_cycle,
144                max_cycles: self.max_cycles,
145                retry_delay_ms: self.retry_delay_ms,
146                backoff_multiplier: self.backoff_multiplier,
147                max_backoff_ms: self.max_backoff_ms,
148                backoff_pending_ms: None,
149                current_role: self.current_role,
150                current_drain: self.current_drain,
151                current_mode: self.current_mode,
152                rate_limit_continuation_prompt: self.rate_limit_continuation_prompt.clone(),
153                last_session_id: self.last_session_id.clone(),
154                last_failure_reason: self.last_failure_reason.clone(),
155            };
156        }
157
158        if target_index <= self.current_agent_index {
159            // Treat switching to an earlier agent as starting a new retry cycle.
160            let new_retry_cycle = self.retry_cycle + 1;
161            let new_backoff_pending_ms = if new_retry_cycle >= self.max_cycles && target_index == 0
162            {
163                None
164            } else {
165                // Create temporary state to calculate backoff
166                let temp = Self {
167                    agents: Arc::clone(&self.agents),
168                    current_agent_index: target_index,
169                    models_per_agent: Arc::clone(&self.models_per_agent),
170                    current_model_index: 0,
171                    retry_cycle: new_retry_cycle,
172                    max_cycles: self.max_cycles,
173                    retry_delay_ms: self.retry_delay_ms,
174                    backoff_multiplier: self.backoff_multiplier,
175                    max_backoff_ms: self.max_backoff_ms,
176                    backoff_pending_ms: None,
177                    current_role: self.current_role,
178                    current_drain: self.current_drain,
179                    current_mode: self.current_mode,
180                    rate_limit_continuation_prompt: None,
181                    last_session_id: None,
182                    last_failure_reason: None,
183                };
184                Some(temp.calculate_backoff_delay_ms_for_retry_cycle())
185            };
186
187            Self {
188                agents: Arc::clone(&self.agents),
189                current_agent_index: target_index,
190                models_per_agent: Arc::clone(&self.models_per_agent),
191                current_model_index: 0,
192                retry_cycle: new_retry_cycle,
193                max_cycles: self.max_cycles,
194                retry_delay_ms: self.retry_delay_ms,
195                backoff_multiplier: self.backoff_multiplier,
196                max_backoff_ms: self.max_backoff_ms,
197                backoff_pending_ms: new_backoff_pending_ms,
198                current_role: self.current_role,
199                current_drain: self.current_drain,
200                current_mode: self.current_mode,
201                rate_limit_continuation_prompt: self.rate_limit_continuation_prompt.clone(),
202                last_session_id: self.last_session_id.clone(),
203                last_failure_reason: self.last_failure_reason.clone(),
204            }
205        } else {
206            // Advancing to later agent
207            Self {
208                agents: Arc::clone(&self.agents),
209                current_agent_index: target_index,
210                models_per_agent: Arc::clone(&self.models_per_agent),
211                current_model_index: 0,
212                retry_cycle: self.retry_cycle,
213                max_cycles: self.max_cycles,
214                retry_delay_ms: self.retry_delay_ms,
215                backoff_multiplier: self.backoff_multiplier,
216                max_backoff_ms: self.max_backoff_ms,
217                backoff_pending_ms: None,
218                current_role: self.current_role,
219                current_drain: self.current_drain,
220                current_mode: self.current_mode,
221                rate_limit_continuation_prompt: self.rate_limit_continuation_prompt.clone(),
222                last_session_id: self.last_session_id.clone(),
223                last_failure_reason: self.last_failure_reason.clone(),
224            }
225        }
226    }
227
228    /// Switch to next agent after rate limit, preserving prompt for continuation.
229    ///
230    /// This is used when an agent hits a 429 rate limit error. Instead of
231    /// retrying with the same agent (which would likely hit rate limits again),
232    /// we switch to the next agent and preserve the prompt so the new agent
233    /// can continue the same work.
234    #[must_use]
235    pub fn switch_to_next_agent_with_prompt(&self, prompt: Option<String>) -> Self {
236        let base = self.switch_to_next_agent();
237        // Back-compat: older callers didn't track role. Preserve prompt only.
238        Self {
239            agents: base.agents,
240            current_agent_index: base.current_agent_index,
241            models_per_agent: base.models_per_agent,
242            current_model_index: base.current_model_index,
243            retry_cycle: base.retry_cycle,
244            max_cycles: base.max_cycles,
245            retry_delay_ms: base.retry_delay_ms,
246            backoff_multiplier: base.backoff_multiplier,
247            max_backoff_ms: base.max_backoff_ms,
248            backoff_pending_ms: base.backoff_pending_ms,
249            current_role: base.current_role,
250            current_drain: base.current_drain,
251            current_mode: base.current_mode,
252            rate_limit_continuation_prompt: prompt.map(|p| RateLimitContinuationPrompt {
253                drain: base.current_drain,
254                role: base.current_role,
255                prompt: p,
256            }),
257            last_session_id: base.last_session_id,
258            last_failure_reason: base.last_failure_reason.clone(),
259        }
260    }
261
262    /// Switch to next agent after rate limit, preserving prompt for continuation (role-scoped).
263    #[must_use]
264    pub fn switch_to_next_agent_with_prompt_for_role(
265        &self,
266        role: AgentRole,
267        prompt: Option<String>,
268    ) -> Self {
269        let base = self.switch_to_next_agent();
270        Self {
271            agents: base.agents,
272            current_agent_index: base.current_agent_index,
273            models_per_agent: base.models_per_agent,
274            current_model_index: base.current_model_index,
275            retry_cycle: base.retry_cycle,
276            max_cycles: base.max_cycles,
277            retry_delay_ms: base.retry_delay_ms,
278            backoff_multiplier: base.backoff_multiplier,
279            max_backoff_ms: base.max_backoff_ms,
280            backoff_pending_ms: base.backoff_pending_ms,
281            current_role: base.current_role,
282            current_drain: base.current_drain,
283            current_mode: base.current_mode,
284            rate_limit_continuation_prompt: prompt.map(|p| RateLimitContinuationPrompt {
285                drain: base.current_drain,
286                role,
287                prompt: p,
288            }),
289            last_session_id: base.last_session_id,
290            last_failure_reason: base.last_failure_reason.clone(),
291        }
292    }
293
294    /// Clear continuation prompt after successful execution.
295    ///
296    /// Called when an agent successfully completes its task, clearing any
297    /// saved prompt context from previous rate-limited agents.
298    #[must_use]
299    pub fn clear_continuation_prompt(&self) -> Self {
300        Self {
301            agents: Arc::clone(&self.agents),
302            current_agent_index: self.current_agent_index,
303            models_per_agent: Arc::clone(&self.models_per_agent),
304            current_model_index: self.current_model_index,
305            retry_cycle: self.retry_cycle,
306            max_cycles: self.max_cycles,
307            retry_delay_ms: self.retry_delay_ms,
308            backoff_multiplier: self.backoff_multiplier,
309            max_backoff_ms: self.max_backoff_ms,
310            backoff_pending_ms: self.backoff_pending_ms,
311            current_role: self.current_role,
312            current_drain: self.current_drain,
313            current_mode: self.current_mode,
314            rate_limit_continuation_prompt: None,
315            last_session_id: self.last_session_id.clone(),
316            last_failure_reason: None,
317        }
318    }
319
320    #[must_use]
321    pub fn reset_for_drain(&self, drain: AgentDrain) -> Self {
322        Self {
323            agents: Arc::clone(&self.agents),
324            current_agent_index: 0,
325            models_per_agent: Arc::clone(&self.models_per_agent),
326            current_model_index: 0,
327            retry_cycle: 0,
328            max_cycles: self.max_cycles,
329            retry_delay_ms: self.retry_delay_ms,
330            backoff_multiplier: self.backoff_multiplier,
331            max_backoff_ms: self.max_backoff_ms,
332            backoff_pending_ms: None,
333            current_role: drain.role(),
334            current_drain: drain,
335            current_mode: DrainMode::Normal,
336            rate_limit_continuation_prompt: None,
337            last_session_id: None,
338            last_failure_reason: None,
339        }
340    }
341
342    #[must_use]
343    pub fn reset_for_role(&self, role: AgentRole) -> Self {
344        self.reset_for_drain(match role {
345            AgentRole::Developer => AgentDrain::Development,
346            AgentRole::Reviewer => AgentDrain::Review,
347            AgentRole::Commit => AgentDrain::Commit,
348            AgentRole::Analysis => AgentDrain::Analysis,
349        })
350    }
351
352    #[must_use]
353    pub fn reset(&self) -> Self {
354        Self {
355            agents: Arc::clone(&self.agents),
356            current_agent_index: 0,
357            models_per_agent: Arc::clone(&self.models_per_agent),
358            current_model_index: 0,
359            retry_cycle: self.retry_cycle,
360            max_cycles: self.max_cycles,
361            retry_delay_ms: self.retry_delay_ms,
362            backoff_multiplier: self.backoff_multiplier,
363            max_backoff_ms: self.max_backoff_ms,
364            backoff_pending_ms: None,
365            current_role: self.current_role,
366            current_drain: self.current_drain,
367            current_mode: DrainMode::Normal,
368            rate_limit_continuation_prompt: None,
369            last_session_id: None,
370            last_failure_reason: None,
371        }
372    }
373
374    /// Store session ID from agent response for potential reuse.
375    #[must_use]
376    pub fn with_session_id(&self, session_id: Option<String>) -> Self {
377        Self {
378            agents: Arc::clone(&self.agents),
379            current_agent_index: self.current_agent_index,
380            models_per_agent: Arc::clone(&self.models_per_agent),
381            current_model_index: self.current_model_index,
382            retry_cycle: self.retry_cycle,
383            max_cycles: self.max_cycles,
384            retry_delay_ms: self.retry_delay_ms,
385            backoff_multiplier: self.backoff_multiplier,
386            max_backoff_ms: self.max_backoff_ms,
387            backoff_pending_ms: self.backoff_pending_ms,
388            current_role: self.current_role,
389            current_drain: self.current_drain,
390            current_mode: self.current_mode,
391            rate_limit_continuation_prompt: self.rate_limit_continuation_prompt.clone(),
392            last_session_id: session_id,
393            last_failure_reason: self.last_failure_reason.clone(),
394        }
395    }
396
397    /// Store last failure reason for CLI output context.
398    #[must_use]
399    pub fn with_failure_reason(&self, reason: Option<String>) -> Self {
400        Self {
401            agents: Arc::clone(&self.agents),
402            current_agent_index: self.current_agent_index,
403            models_per_agent: Arc::clone(&self.models_per_agent),
404            current_model_index: self.current_model_index,
405            retry_cycle: self.retry_cycle,
406            max_cycles: self.max_cycles,
407            retry_delay_ms: self.retry_delay_ms,
408            backoff_multiplier: self.backoff_multiplier,
409            max_backoff_ms: self.max_backoff_ms,
410            backoff_pending_ms: self.backoff_pending_ms,
411            current_role: self.current_role,
412            current_drain: self.current_drain,
413            current_mode: self.current_mode,
414            rate_limit_continuation_prompt: self.rate_limit_continuation_prompt.clone(),
415            last_session_id: self.last_session_id.clone(),
416            last_failure_reason: reason,
417        }
418    }
419
420    /// Clear session ID (e.g., when switching agents or starting new work).
421    #[must_use]
422    pub fn clear_session_id(&self) -> Self {
423        Self {
424            agents: Arc::clone(&self.agents),
425            current_agent_index: self.current_agent_index,
426            models_per_agent: Arc::clone(&self.models_per_agent),
427            current_model_index: self.current_model_index,
428            retry_cycle: self.retry_cycle,
429            max_cycles: self.max_cycles,
430            retry_delay_ms: self.retry_delay_ms,
431            backoff_multiplier: self.backoff_multiplier,
432            max_backoff_ms: self.max_backoff_ms,
433            backoff_pending_ms: self.backoff_pending_ms,
434            current_role: self.current_role,
435            current_drain: self.current_drain,
436            current_mode: self.current_mode,
437            rate_limit_continuation_prompt: self.rate_limit_continuation_prompt.clone(),
438            last_session_id: None,
439            last_failure_reason: self.last_failure_reason.clone(),
440        }
441    }
442
443    #[must_use]
444    pub fn start_retry_cycle(&self) -> Self {
445        let new_retry_cycle = self.retry_cycle + 1;
446        let new_backoff_pending_ms = if new_retry_cycle >= self.max_cycles {
447            None
448        } else {
449            // Create temporary state to calculate backoff
450            let temp = Self {
451                agents: Arc::clone(&self.agents),
452                current_agent_index: 0,
453                models_per_agent: Arc::clone(&self.models_per_agent),
454                current_model_index: 0,
455                retry_cycle: new_retry_cycle,
456                max_cycles: self.max_cycles,
457                retry_delay_ms: self.retry_delay_ms,
458                backoff_multiplier: self.backoff_multiplier,
459                max_backoff_ms: self.max_backoff_ms,
460                backoff_pending_ms: None,
461                current_role: self.current_role,
462                current_drain: self.current_drain,
463                current_mode: self.current_mode,
464                rate_limit_continuation_prompt: None,
465                last_session_id: None,
466                last_failure_reason: None,
467            };
468            Some(temp.calculate_backoff_delay_ms_for_retry_cycle())
469        };
470
471        Self {
472            agents: Arc::clone(&self.agents),
473            current_agent_index: 0,
474            models_per_agent: Arc::clone(&self.models_per_agent),
475            current_model_index: 0,
476            retry_cycle: new_retry_cycle,
477            max_cycles: self.max_cycles,
478            retry_delay_ms: self.retry_delay_ms,
479            backoff_multiplier: self.backoff_multiplier,
480            max_backoff_ms: self.max_backoff_ms,
481            backoff_pending_ms: new_backoff_pending_ms,
482            current_role: self.current_role,
483            current_drain: self.current_drain,
484            current_mode: self.current_mode,
485            rate_limit_continuation_prompt: self.rate_limit_continuation_prompt.clone(),
486            last_session_id: self.last_session_id.clone(),
487            last_failure_reason: self.last_failure_reason.clone(),
488        }
489    }
490
491    #[must_use]
492    pub fn clear_backoff_pending(&self) -> Self {
493        Self {
494            agents: Arc::clone(&self.agents),
495            current_agent_index: self.current_agent_index,
496            models_per_agent: Arc::clone(&self.models_per_agent),
497            current_model_index: self.current_model_index,
498            retry_cycle: self.retry_cycle,
499            max_cycles: self.max_cycles,
500            retry_delay_ms: self.retry_delay_ms,
501            backoff_multiplier: self.backoff_multiplier,
502            max_backoff_ms: self.max_backoff_ms,
503            backoff_pending_ms: None,
504            current_role: self.current_role,
505            current_drain: self.current_drain,
506            current_mode: self.current_mode,
507            rate_limit_continuation_prompt: self.rate_limit_continuation_prompt.clone(),
508            last_session_id: self.last_session_id.clone(),
509            last_failure_reason: self.last_failure_reason.clone(),
510        }
511    }
512
513    pub(super) fn calculate_backoff_delay_ms_for_retry_cycle(&self) -> u64 {
514        // The first retry cycle should use the base delay.
515        let cycle_index = self.retry_cycle.saturating_sub(1);
516        calculate_backoff_delay_ms(
517            self.retry_delay_ms,
518            self.backoff_multiplier,
519            self.max_backoff_ms,
520            cycle_index,
521        )
522    }
523}
524
525#[cfg(test)]
526mod backoff_semantics_tests {
527    use super::*;
528
529    #[test]
530    fn test_switch_to_agent_named_preserves_backoff_when_retry_cycle_hits_max_but_state_is_not_exhausted(
531    ) {
532        let mut state = AgentChainState::initial().with_agents(
533            vec!["a".to_string(), "b".to_string(), "c".to_string()],
534            vec![vec![], vec![], vec![]],
535            AgentRole::Developer,
536        );
537        state.max_cycles = 2;
538        state.retry_cycle = 1;
539        state.current_agent_index = 2;
540
541        let next = state.switch_to_agent_named("b");
542
543        assert_eq!(next.current_agent_index, 1);
544        assert_eq!(next.retry_cycle, 2);
545        assert!(
546            next.backoff_pending_ms.is_some(),
547            "backoff should remain pending unless the state is fully exhausted"
548        );
549    }
550}