use std::fmt;
use anyhow::anyhow;
use crate::contracts::Runner;
use crate::redaction::RedactedString;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum RunnerFailureClass {
Retryable(RetryableReason),
RequiresUserInput(UserInputReason),
NonRetryable(NonRetryableReason),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum RetryableReason {
RateLimited,
TemporaryUnavailable,
TransientIo,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum UserInputReason {
Auth,
MissingBinary,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum NonRetryableReason {
InvalidInvocation,
FatalExit,
}
#[derive(Debug, thiserror::Error)]
pub enum RunnerError {
#[error("runner binary not found: {bin}")]
BinaryMissing {
bin: String,
#[source]
source: std::io::Error,
},
#[error("runner failed to spawn: {bin}")]
SpawnFailed {
bin: String,
#[source]
source: std::io::Error,
},
#[error("runner exited non-zero (code={code})\nstdout: {stdout}\nstderr: {stderr}")]
NonZeroExit {
code: i32,
stdout: RedactedString,
stderr: RedactedString,
session_id: Option<String>,
},
#[error("runner terminated by signal (signal={signal:?})\nstdout: {stdout}\nstderr: {stderr}")]
TerminatedBySignal {
signal: Option<i32>,
stdout: RedactedString,
stderr: RedactedString,
session_id: Option<String>,
},
#[error("runner interrupted")]
Interrupted,
#[error("runner timed out")]
Timeout,
#[error("io error: {0}")]
Io(#[from] std::io::Error),
#[error("other error: {0}")]
Other(#[from] anyhow::Error),
}
fn runner_label(runner: &Runner) -> String {
match runner {
Runner::Codex => "codex".to_string(),
Runner::Opencode => "opencode".to_string(),
Runner::Gemini => "gemini".to_string(),
Runner::Cursor => "cursor".to_string(),
Runner::Claude => "claude".to_string(),
Runner::Kimi => "kimi".to_string(),
Runner::Pi => "pi".to_string(),
Runner::Plugin(id) => format!("plugin:{}", id),
}
}
fn looks_like_rate_limit(text: &str) -> bool {
let lower = text.to_lowercase();
lower.contains("429")
|| lower.contains("rate limit")
|| lower.contains("too many requests")
|| lower.contains("quota exceeded")
|| lower.contains("throttled")
}
fn looks_like_temporary_unavailable(text: &str) -> bool {
let lower = text.to_lowercase();
lower.contains("503")
|| lower.contains("service unavailable")
|| lower.contains("temporarily unavailable")
|| lower.contains("gateway timeout")
|| lower.contains("502")
|| lower.contains("504")
}
fn looks_like_auth_required(_runner: &Runner, text: &str) -> bool {
let lower = text.to_lowercase();
lower.contains("401")
|| lower.contains("unauthorized")
|| lower.contains("invalid api key")
|| lower.contains("not logged in")
|| lower.contains("authentication failed")
|| lower.contains("access denied")
}
fn classify_textual_failure(
runner: &Runner,
_code: i32,
stdout: &str,
stderr: &str,
) -> RunnerFailureClass {
let combined = format!("{} {}", stdout, stderr);
let text = combined.to_lowercase();
if looks_like_rate_limit(&text) {
return RunnerFailureClass::Retryable(RetryableReason::RateLimited);
}
if looks_like_temporary_unavailable(&text) {
return RunnerFailureClass::Retryable(RetryableReason::TemporaryUnavailable);
}
if looks_like_auth_required(runner, &text) {
return RunnerFailureClass::RequiresUserInput(UserInputReason::Auth);
}
RunnerFailureClass::NonRetryable(NonRetryableReason::FatalExit)
}
impl RunnerError {
pub(crate) fn classify(&self, runner: &Runner) -> RunnerFailureClass {
match self {
RunnerError::BinaryMissing { .. } => {
RunnerFailureClass::RequiresUserInput(UserInputReason::MissingBinary)
}
RunnerError::SpawnFailed { .. } => {
RunnerFailureClass::NonRetryable(NonRetryableReason::InvalidInvocation)
}
RunnerError::Interrupted => {
RunnerFailureClass::NonRetryable(NonRetryableReason::FatalExit)
}
RunnerError::Timeout => {
RunnerFailureClass::Retryable(RetryableReason::TemporaryUnavailable)
}
RunnerError::Io(e) => {
use std::io::ErrorKind;
match e.kind() {
ErrorKind::TimedOut
| ErrorKind::ConnectionReset
| ErrorKind::ConnectionAborted
| ErrorKind::ConnectionRefused
| ErrorKind::NotConnected
| ErrorKind::UnexpectedEof
| ErrorKind::WouldBlock => {
RunnerFailureClass::Retryable(RetryableReason::TransientIo)
}
_ => RunnerFailureClass::NonRetryable(NonRetryableReason::FatalExit),
}
}
RunnerError::NonZeroExit {
code,
stdout,
stderr,
..
} => classify_textual_failure(runner, *code, &stdout.to_string(), &stderr.to_string()),
RunnerError::TerminatedBySignal { .. } => {
RunnerFailureClass::NonRetryable(NonRetryableReason::FatalExit)
}
RunnerError::Other(err) => {
let msg = format!("{:#}", err).to_lowercase();
if looks_like_rate_limit(&msg) {
RunnerFailureClass::Retryable(RetryableReason::RateLimited)
} else if looks_like_temporary_unavailable(&msg) {
RunnerFailureClass::Retryable(RetryableReason::TemporaryUnavailable)
} else if looks_like_auth_required(runner, &msg) {
RunnerFailureClass::RequiresUserInput(UserInputReason::Auth)
} else {
RunnerFailureClass::NonRetryable(NonRetryableReason::FatalExit)
}
}
}
}
}
pub(crate) fn runner_execution_error(runner: &Runner, bin: &str, step: &str) -> RunnerError {
RunnerError::Other(anyhow!(
"Runner execution failed (runner={}, bin={}): {}.",
runner_label(runner),
bin,
step
))
}
pub(crate) fn runner_execution_error_with_source(
runner: &Runner,
bin: &str,
step: &str,
source: impl fmt::Display,
) -> RunnerError {
RunnerError::Other(anyhow!(
"Runner execution failed (runner={}, bin={}): {}: {}.",
runner_label(runner),
bin,
step,
source
))
}
#[cfg(test)]
mod tests;