1use crate::agents::{validate_model_flag, AgentConfig, AgentRegistry, AgentRole, JsonParserType};
4use crate::common::split_command;
5use std::sync::Arc;
6
7use super::fallback::try_agent_with_retries;
8use super::fallback::TryAgentResult;
9use super::model_flag::resolve_model_with_provider;
10use super::prompt::PipelineRuntime;
11
12fn build_agents_to_try<'a>(fallbacks: &'a [&'a str], primary_agent: &'a str) -> Vec<&'a str> {
14 let mut agents_to_try: Vec<&'a str> = vec![primary_agent];
15 for fb in fallbacks {
16 if *fb != primary_agent && !agents_to_try.contains(fb) {
17 agents_to_try.push(fb);
18 }
19 }
20 agents_to_try
21}
22
23fn get_cli_overrides(
25 role: AgentRole,
26 runtime: &PipelineRuntime<'_>,
27) -> (Option<String>, Option<String>) {
28 match role {
29 AgentRole::Developer => (
30 runtime.config.developer_model.clone(),
31 runtime.config.developer_provider.clone(),
32 ),
33 AgentRole::Reviewer => (
34 runtime.config.reviewer_model.clone(),
35 runtime.config.reviewer_provider.clone(),
36 ),
37 AgentRole::Commit => (None, None), }
39}
40
41struct ModelFlagBuildContext<'a> {
43 agent_index: usize,
44 cli_model_override: Option<&'a String>,
45 cli_provider_override: Option<&'a String>,
46 agent_config: &'a AgentConfig,
47 agent_name: &'a str,
48 fallback_config: &'a crate::agents::fallback::FallbackConfig,
49 display_name: &'a str,
50 runtime: &'a PipelineRuntime<'a>,
51}
52
53fn build_model_flags_list(ctx: &ModelFlagBuildContext<'_>) -> Vec<Option<String>> {
55 let mut model_flags_to_try: Vec<Option<String>> = Vec::new();
56
57 if ctx.agent_index == 0
60 && (ctx.cli_model_override.is_some() || ctx.cli_provider_override.is_some())
61 {
62 let resolved = resolve_model_with_provider(
63 ctx.cli_provider_override.map(std::string::String::as_str),
64 ctx.cli_model_override.map(std::string::String::as_str),
65 ctx.agent_config.model_flag.as_deref(),
66 );
67 if resolved.is_some() {
68 model_flags_to_try.push(resolved);
69 }
70 }
71
72 if model_flags_to_try.is_empty() {
74 model_flags_to_try.push(None);
75 }
76
77 if ctx.fallback_config.has_provider_fallbacks(ctx.agent_name) {
79 let provider_fallbacks = ctx.fallback_config.get_provider_fallbacks(ctx.agent_name);
80 ctx.runtime.logger.info(&format!(
81 "Agent '{}' has {} provider fallback(s) configured",
82 ctx.display_name,
83 provider_fallbacks.len()
84 ));
85 for model in provider_fallbacks {
86 model_flags_to_try.push(Some(model.clone()));
87 }
88 }
89
90 model_flags_to_try
91}
92
93fn build_command_for_model(ctx: &TryModelContext<'_>, runtime: &PipelineRuntime<'_>) -> String {
95 let model_ref = ctx.model_flag.map(std::string::String::as_str);
96 let yolo = true;
99
100 if ctx.agent_index == 0 && ctx.cycle == 0 && ctx.model_index == 0 {
101 match ctx.role {
103 AgentRole::Developer => runtime.config.developer_cmd.clone().unwrap_or_else(|| {
104 ctx.agent_config
105 .build_cmd_with_model(true, true, true, model_ref)
106 }),
107 AgentRole::Reviewer => runtime.config.reviewer_cmd.clone().unwrap_or_else(|| {
108 ctx.agent_config
109 .build_cmd_with_model(true, true, yolo, model_ref)
110 }),
111 AgentRole::Commit => runtime.config.commit_cmd.clone().unwrap_or_else(|| {
112 ctx.agent_config
113 .build_cmd_with_model(true, true, yolo, model_ref)
114 }),
115 }
116 } else {
117 ctx.agent_config
118 .build_cmd_with_model(true, true, yolo, model_ref)
119 }
120}
121
122fn validate_glm_print_flag(
128 agent_name: &str,
129 agent_config: &AgentConfig,
130 cmd_str: &str,
131 agent_index: usize,
132 cycle: u32,
133 model_index: usize,
134 runtime: &PipelineRuntime<'_>,
135) {
136 if !crate::agents::is_glm_like_agent(agent_name)
139 || agent_index != 0
140 || cycle != 0
141 || model_index != 0
142 {
143 return;
144 }
145
146 let cmd_argv = split_command(cmd_str).ok();
147 let has_print_flag = cmd_argv
148 .as_ref()
149 .is_some_and(|argv| argv.iter().any(|arg| arg == "-p"));
150 if !has_print_flag {
151 if agent_config.print_flag.is_empty() {
152 runtime.logger.warn(&format!(
153 "GLM agent '{agent_name}' is missing '-p' flag: print_flag is empty in configuration. \
154 Add 'print_flag = \"-p\"' to [ccs] section in ~/.config/ralph-workflow.toml"
155 ));
156 } else {
157 runtime.logger.warn(&format!(
158 "GLM agent '{agent_name}' may be missing '-p' flag in command. Check configuration."
159 ));
160 }
161 }
162}
163
164fn build_execution_metadata(
166 model_flag: Option<&String>,
167 display_name: &str,
168 base_label: &str,
169 agent_name: &str,
170 logfile_prefix: &str,
171 model_index: usize,
172) -> (String, String, String) {
173 let model_suffix = model_flag.map(|m| format!(" [{m}]")).unwrap_or_default();
174 let display_name_with_suffix = format!("{display_name}{model_suffix}");
175 let label = format!("{base_label} ({display_name_with_suffix})");
176 let logfile = super::logfile::build_logfile_path(logfile_prefix, agent_name, model_index);
177 (label, logfile, display_name_with_suffix)
178}
179
180#[derive(Debug, Clone, Copy, PartialEq, Eq)]
182enum TrySingleAgentResult {
183 Success,
185 Unrecoverable(i32),
187 Fallback,
189 NoRetry,
191}
192
193struct TryModelContext<'a> {
195 agent_config: &'a AgentConfig,
196 agent_name: &'a str,
197 display_name: &'a str,
198 agent_index: usize,
199 cycle: u32,
200 model_index: usize,
201 role: AgentRole,
202 model_flag: Option<&'a String>,
203 base_label: &'a str,
204 prompt: &'a str,
205 logfile_prefix: &'a str,
206 fallback_config: &'a crate::agents::fallback::FallbackConfig,
207 output_validator: Option<crate::pipeline::fallback::OutputValidator>,
208 retry_timer: Arc<dyn crate::agents::RetryTimerProvider>,
209 workspace: &'a dyn crate::workspace::Workspace,
210}
211
212fn try_single_model(
214 ctx: &TryModelContext<'_>,
215 runtime: &mut PipelineRuntime<'_>,
216) -> std::io::Result<TrySingleAgentResult> {
217 let mut parser_type = ctx.agent_config.json_parser;
218
219 if ctx.role == AgentRole::Reviewer {
220 if let Some(ref parser_override) = runtime.config.reviewer_json_parser {
221 parser_type = JsonParserType::parse(parser_override);
222 if ctx.agent_index == 0 && ctx.cycle == 0 && ctx.model_index == 0 {
223 runtime.logger.info(&format!(
224 "Using JSON parser override '{parser_override}' for reviewer"
225 ));
226 }
227 }
228 }
229
230 let cmd_str = build_command_for_model(ctx, runtime);
231
232 validate_glm_print_flag(
233 ctx.agent_name,
234 ctx.agent_config,
235 &cmd_str,
236 ctx.agent_index,
237 ctx.cycle,
238 ctx.model_index,
239 runtime,
240 );
241
242 let (label, logfile, display_name_with_suffix) = build_execution_metadata(
243 ctx.model_flag,
244 ctx.display_name,
245 ctx.base_label,
246 ctx.agent_name,
247 ctx.logfile_prefix,
248 ctx.model_index,
249 );
250
251 let attempt_config = crate::pipeline::fallback::AgentAttemptConfig {
252 agent_name: ctx.agent_name,
253 model_flag: ctx.model_flag.map(std::string::String::as_str),
254 label: &label,
255 display_name: &display_name_with_suffix,
256 cmd_str: &cmd_str,
257 prompt: ctx.prompt,
258 logfile: &logfile,
259 logfile_prefix: ctx.logfile_prefix,
260 parser_type,
261 env_vars: &ctx.agent_config.env_vars,
262 model_index: ctx.model_index,
263 agent_index: ctx.agent_index,
264 cycle: ctx.cycle as usize,
265 fallback_config: ctx.fallback_config,
266 output_validator: ctx.output_validator,
267 retry_timer: Arc::clone(&ctx.retry_timer),
268 workspace: ctx.workspace,
269 };
270 let result = try_agent_with_retries(&attempt_config, runtime)?;
271
272 match result {
273 TryAgentResult::Success => Ok(TrySingleAgentResult::Success),
274 TryAgentResult::Unrecoverable(exit_code) => {
275 Ok(TrySingleAgentResult::Unrecoverable(exit_code))
276 }
277 TryAgentResult::Fallback => Ok(TrySingleAgentResult::Fallback),
278 TryAgentResult::NoRetry => Ok(TrySingleAgentResult::NoRetry),
279 }
280}
281
282struct TryAgentContext<'a> {
284 agent_name: &'a str,
285 agent_index: usize,
286 cycle: u32,
287 role: AgentRole,
288 base_label: &'a str,
289 prompt: &'a str,
290 logfile_prefix: &'a str,
291 cli_model_override: Option<&'a String>,
292 cli_provider_override: Option<&'a String>,
293 output_validator: Option<crate::pipeline::fallback::OutputValidator>,
294 retry_timer: Arc<dyn crate::agents::RetryTimerProvider>,
295 workspace: &'a dyn crate::workspace::Workspace,
296}
297
298fn try_single_agent(
300 ctx: &TryAgentContext<'_>,
301 runtime: &mut PipelineRuntime<'_>,
302 registry: &AgentRegistry,
303 fallback_config: &crate::agents::fallback::FallbackConfig,
304) -> std::io::Result<TrySingleAgentResult> {
305 let Some(agent_config) = registry.resolve_config(ctx.agent_name) else {
306 runtime.logger.warn(&format!(
307 "Agent '{}' not found in registry, skipping",
308 ctx.agent_name
309 ));
310 return Ok(TrySingleAgentResult::Fallback);
311 };
312
313 let display_name = registry.display_name(ctx.agent_name);
314 let model_ctx = ModelFlagBuildContext {
315 agent_index: ctx.agent_index,
316 cli_model_override: ctx.cli_model_override,
317 cli_provider_override: ctx.cli_provider_override,
318 agent_config: &agent_config,
319 agent_name: ctx.agent_name,
320 fallback_config,
321 display_name: &display_name,
322 runtime,
323 };
324 let model_flags_to_try = build_model_flags_list(&model_ctx);
325
326 if ctx.agent_index == 0 && ctx.cycle == 0 {
327 for model_flag in model_flags_to_try.iter().flatten() {
328 for warning in validate_model_flag(model_flag) {
329 runtime.logger.warn(&warning);
330 }
331 }
332 }
333
334 for (model_index, model_flag) in model_flags_to_try.iter().enumerate() {
335 let model_ctx = TryModelContext {
336 agent_config: &agent_config,
337 agent_name: ctx.agent_name,
338 display_name: &display_name,
339 agent_index: ctx.agent_index,
340 cycle: ctx.cycle,
341 model_index,
342 role: ctx.role,
343 model_flag: model_flag.as_ref(),
344 base_label: ctx.base_label,
345 prompt: ctx.prompt,
346 logfile_prefix: ctx.logfile_prefix,
347 fallback_config,
348 output_validator: ctx.output_validator,
349 retry_timer: Arc::clone(&ctx.retry_timer),
350 workspace: ctx.workspace,
351 };
352 let result = try_single_model(&model_ctx, runtime)?;
353
354 match result {
355 TrySingleAgentResult::Success => return Ok(TrySingleAgentResult::Success),
356 TrySingleAgentResult::Unrecoverable(exit_code) => {
357 return Ok(TrySingleAgentResult::Unrecoverable(exit_code))
358 }
359 TrySingleAgentResult::Fallback => return Ok(TrySingleAgentResult::Fallback),
360 TrySingleAgentResult::NoRetry => {}
361 }
362 }
363
364 Ok(TrySingleAgentResult::NoRetry)
365}
366
367pub struct FallbackConfig<'a, 'b> {
369 pub role: AgentRole,
370 pub base_label: &'a str,
371 pub prompt: &'a str,
372 pub logfile_prefix: &'a str,
373 pub runtime: &'a mut PipelineRuntime<'b>,
374 pub registry: &'a AgentRegistry,
375 pub primary_agent: &'a str,
376 pub output_validator: Option<crate::pipeline::fallback::OutputValidator>,
377 pub workspace: &'a dyn crate::workspace::Workspace,
378}
379
380pub fn run_with_fallback_and_validator(
388 config: &mut FallbackConfig<'_, '_>,
389) -> std::io::Result<i32> {
390 run_with_fallback_internal(config)
391}
392
393fn run_with_fallback_internal(config: &mut FallbackConfig<'_, '_>) -> std::io::Result<i32> {
398 let fallback_config = config.registry.fallback_config();
399 let fallbacks = config.registry.available_fallbacks(config.role);
400 if fallback_config.has_fallbacks(config.role) {
401 config.runtime.logger.info(&format!(
402 "Agent fallback chain for {}: {}",
403 config.role,
404 fallbacks.join(", ")
405 ));
406 } else {
407 config.runtime.logger.info(&format!(
408 "No configured fallbacks for {}, using primary only",
409 config.role
410 ));
411 }
412
413 let agents_to_try = build_agents_to_try(&fallbacks, config.primary_agent);
414 let (cli_model_override, cli_provider_override) =
415 get_cli_overrides(config.role, config.runtime);
416
417 for cycle in 0..fallback_config.max_cycles {
418 if cycle > 0 {
419 let backoff_ms = fallback_config.calculate_backoff(cycle - 1);
420 config.runtime.logger.info(&format!(
421 "Cycle {}/{}: All agents exhausted, waiting {}ms before retry (exponential backoff)...",
422 cycle + 1,
423 fallback_config.max_cycles,
424 backoff_ms
425 ));
426 config
427 .registry
428 .retry_timer()
429 .sleep(std::time::Duration::from_millis(backoff_ms));
430 }
431
432 for (agent_index, agent_name) in agents_to_try.iter().enumerate() {
433 let ctx = TryAgentContext {
434 agent_name,
435 agent_index,
436 cycle,
437 role: config.role,
438 base_label: config.base_label,
439 prompt: config.prompt,
440 logfile_prefix: config.logfile_prefix,
441 cli_model_override: cli_model_override.as_ref(),
442 cli_provider_override: cli_provider_override.as_ref(),
443 output_validator: config.output_validator,
444 retry_timer: config.registry.retry_timer(),
445 workspace: config.workspace,
446 };
447 let result = try_single_agent(&ctx, config.runtime, config.registry, fallback_config)?;
448
449 match result {
450 TrySingleAgentResult::Success => return Ok(0),
451 TrySingleAgentResult::Unrecoverable(exit_code) => return Ok(exit_code),
452 TrySingleAgentResult::Fallback | TrySingleAgentResult::NoRetry => {}
453 }
454 }
455 }
456
457 config.runtime.logger.error(&format!(
458 "All agents exhausted after {} cycles with exponential backoff",
459 fallback_config.max_cycles
460 ));
461 Ok(1)
462}
463
464#[derive(Debug)]
488pub enum SessionContinuationResult {
489 Ran { exit_code: i32 },
494 Fallback,
497}
498
499pub struct XsdRetryConfig<'a, 'b> {
501 pub role: AgentRole,
503 pub base_label: &'a str,
505 pub prompt: &'a str,
507 pub logfile_prefix: &'a str,
509 pub runtime: &'a mut PipelineRuntime<'b>,
511 pub registry: &'a AgentRegistry,
513 pub primary_agent: &'a str,
515 pub session_info: Option<&'a crate::pipeline::session::SessionInfo>,
518 pub retry_num: usize,
520 pub output_validator: Option<crate::pipeline::fallback::OutputValidator>,
523 pub workspace: &'a dyn crate::workspace::Workspace,
525}
526
527pub fn run_xsd_retry_with_session(config: &mut XsdRetryConfig<'_, '_>) -> std::io::Result<i32> {
558 if config.retry_num > 0 {
560 if let Some(session_info) = config.session_info {
561 config.runtime.logger.info(&format!(
563 " Attempting session continuation with {} (session: {}...)",
564 session_info.agent_name,
565 &session_info.session_id[..8.min(session_info.session_id.len())]
566 ));
567 match try_session_continuation(config, session_info) {
568 SessionContinuationResult::Ran { exit_code } => {
569 config
573 .runtime
574 .logger
575 .info(" Session continuation succeeded");
576 return Ok(exit_code);
577 }
578 SessionContinuationResult::Fallback => {
579 config
581 .runtime
582 .logger
583 .warn(" Session continuation failed, falling back to new session");
584 }
585 }
586 } else {
587 config
588 .runtime
589 .logger
590 .warn(" No session info available for retry, starting new session");
591 }
592 }
593
594 let mut fallback_config = FallbackConfig {
596 role: config.role,
597 base_label: config.base_label,
598 prompt: config.prompt,
599 logfile_prefix: config.logfile_prefix,
600 runtime: config.runtime,
601 registry: config.registry,
602 primary_agent: config.primary_agent,
603 output_validator: config.output_validator,
604 workspace: config.workspace,
605 };
606 run_with_fallback_and_validator(&mut fallback_config)
607}
608
609fn try_session_continuation(
619 config: &mut XsdRetryConfig<'_, '_>,
620 session_info: &crate::pipeline::session::SessionInfo,
621) -> SessionContinuationResult {
622 let registry_name = config
627 .registry
628 .resolve_from_logfile_name(&session_info.agent_name)
629 .unwrap_or_else(|| session_info.agent_name.clone());
630
631 let agent_config = match config.registry.resolve_config(®istry_name) {
633 Some(cfg) => cfg,
634 None => {
635 return SessionContinuationResult::Fallback;
637 }
638 };
639
640 if !agent_config.supports_session_continuation() {
641 return SessionContinuationResult::Fallback;
643 }
644
645 let yolo = matches!(config.role, AgentRole::Developer);
647 let cmd_str = agent_config.build_cmd_with_session(
648 true, yolo, true, None, Some(&session_info.session_id),
653 );
654
655 let sanitized_agent = super::logfile::sanitize_agent_name(&session_info.agent_name);
658 let logfile = format!(
659 "{}_{}_session_{}.log",
660 config.logfile_prefix, sanitized_agent, config.retry_num
661 );
662
663 if config.runtime.config.verbosity.is_debug() {
665 config.runtime.logger.info(&format!(
666 " Attempting session continuation with {} (session: {})",
667 session_info.agent_name, session_info.session_id
668 ));
669 }
670
671 let cmd = crate::pipeline::PromptCommand {
673 cmd_str: &cmd_str,
674 prompt: config.prompt,
675 label: &format!("{} (session)", config.base_label),
676 display_name: &session_info.agent_name,
677 logfile: &logfile,
678 parser_type: agent_config.json_parser,
679 env_vars: &agent_config.env_vars,
680 };
681
682 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
685 crate::pipeline::run_with_prompt(&cmd, config.runtime)
686 }));
687
688 match result {
689 Ok(Ok(cmd_result)) => {
690 SessionContinuationResult::Ran {
693 exit_code: cmd_result.exit_code,
694 }
695 }
696 Ok(Err(_io_error)) => {
697 SessionContinuationResult::Fallback
700 }
701 Err(_panic) => {
702 SessionContinuationResult::Fallback
705 }
706 }
707}