Skip to main content

vtcode_core/core/agent/runner/
retry.rs

1use super::AgentRunner;
2use crate::core::agent::task::{ContextItem, Task, TaskResults};
3use crate::error::{ErrorCode, Result as VtCodeResult, VtCodeError};
4use crate::retry::{RetryEvent, RetryPolicy, run_with_retry};
5use crate::utils::colors::style;
6use tracing::{info, warn};
7
8/// Per-retry-loop context for the agent runner. Held by `&mut` inside
9/// `run_with_retry` so the `on_event` and `operation` callbacks can
10/// share access to the runner's mutable state without splitting borrows.
11struct AgentRetryContext<'a> {
12    runner: &'a mut AgentRunner,
13    metrics: std::sync::Arc<crate::metrics::MetricsCollector>,
14    policy_max_attempts: u32,
15    task_id: String,
16}
17
18impl AgentRunner {
19    /// Execute a task with automatic retry on transient failures
20    ///
21    /// Wraps `execute_task` with retry logic using exponential backoff.
22    /// Retries only occur for transient errors (timeouts, network issues, 5xx errors).
23    /// Non-retryable errors (auth failures, invalid requests) fail immediately.
24    pub async fn execute_task_with_retry(
25        &mut self,
26        task: &Task,
27        contexts: &[ContextItem],
28        max_retries: u32,
29    ) -> VtCodeResult<TaskResults> {
30        use std::time::Duration;
31
32        let policy = RetryPolicy::from_retries(
33            max_retries,
34            Duration::from_secs(2),
35            Duration::from_secs(30),
36            2.0,
37        );
38        let metrics = self.tool_registry.metrics_collector();
39        let task_id = task.id.clone();
40        let mut ctx = AgentRetryContext {
41            runner: self,
42            metrics,
43            policy_max_attempts: policy.max_attempts,
44            task_id: task_id.clone(),
45        };
46
47        run_with_retry(
48            &policy,
49            &mut ctx,
50            |ctx, event| match event {
51                RetryEvent::AttemptStart {
52                    attempt,
53                    max_attempts,
54                } => {
55                    info!(
56                        attempt = attempt + 1,
57                        max_attempts,
58                        task_id = %ctx.task_id,
59                        "agent task attempt starting"
60                    );
61                }
62                RetryEvent::Success { attempt } if attempt > 0 => {
63                    ctx.metrics.record_retry_success();
64                    ctx.runner.runner_println(format_args!(
65                        "{} Task succeeded after {} attempt(s)",
66                        style("[✓]").green().bold(),
67                        attempt + 1
68                    ));
69                    info!(
70                        attempt = attempt + 1,
71                        task_id = %ctx.task_id,
72                        "agent task succeeded after retry"
73                    );
74                }
75                RetryEvent::Success { .. } => {}
76                RetryEvent::GiveUp {
77                    attempt,
78                    error,
79                    decision,
80                    category_was_retryable,
81                } => {
82                    if category_was_retryable && attempt + 1 == ctx.policy_max_attempts {
83                        ctx.metrics.record_retry_exhausted();
84                    }
85                    warn!(
86                        attempt = attempt + 1,
87                        max_attempts = ctx.policy_max_attempts,
88                        task_id = %ctx.task_id,
89                        error = %error,
90                        category = ?decision.category,
91                        "agent task attempt failed (non-retryable)"
92                    );
93                }
94                RetryEvent::Backoff {
95                    attempt,
96                    error,
97                    decision,
98                    delay,
99                    ..
100                } => {
101                    warn!(
102                        attempt = attempt + 1,
103                        max_attempts = ctx.policy_max_attempts,
104                        task_id = %ctx.task_id,
105                        error = %error,
106                        category = ?decision.category,
107                        "agent task attempt failed"
108                    );
109                    ctx.metrics.record_retry_attempt();
110                    ctx.runner.runner_println(format_args!(
111                        "{} Task failed (attempt {}/{}), retrying in {}s...",
112                        style("[Warning]").red().bold(),
113                        attempt + 1,
114                        ctx.policy_max_attempts,
115                        delay.as_secs()
116                    ));
117                    info!(
118                        delay_ms = delay.as_millis() as u64,
119                        next_attempt = attempt + 2,
120                        task_id = %ctx.task_id,
121                        category = ?decision.category,
122                        "backing off before retry"
123                    );
124                }
125                RetryEvent::Exhausted { .. } => {
126                    warn!(
127                        task_id = %ctx.task_id,
128                        attempts = ctx.policy_max_attempts,
129                        "agent task failed after all retries"
130                    );
131                    ctx.runner.runner_println(format_args!(
132                        "{} Task failed after {} attempts",
133                        style("[Error]").red().bold(),
134                        ctx.policy_max_attempts
135                    ));
136                }
137            },
138            |ctx| {
139                let task = task.clone();
140                let contexts = contexts.to_vec();
141                let runner = &mut *ctx.runner;
142                Box::pin(async move { runner.execute_task(&task, &contexts).await })
143            },
144            move |_policy| {
145                VtCodeError::execution(
146                    ErrorCode::ToolExecutionFailed,
147                    format!(
148                        "agent task '{task_id}' exhausted the retry loop without an error payload"
149                    ),
150                )
151            },
152        )
153        .await
154    }
155}