use super::glm_detection::is_glm_like_agent;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AgentErrorKind {
RateLimited,
TokenExhausted,
ApiUnavailable,
NetworkError,
AuthFailure,
CommandNotFound,
DiskFull,
ProcessKilled,
InvalidResponse,
Timeout,
ToolExecutionFailed,
AgentSpecificQuirk,
RetryableAgentQuirk,
Transient,
Permanent,
}
impl AgentErrorKind {
#[must_use]
pub const fn should_retry(self) -> bool {
matches!(
self,
Self::ApiUnavailable
| Self::NetworkError
| Self::Timeout
| Self::InvalidResponse
| Self::RetryableAgentQuirk
| Self::Transient
)
}
#[must_use]
pub const fn should_immediate_agent_fallback(self) -> bool {
matches!(self, Self::RateLimited)
}
#[must_use]
pub const fn should_fallback(self) -> bool {
matches!(
self,
Self::TokenExhausted
| Self::AuthFailure
| Self::CommandNotFound
| Self::ProcessKilled
| Self::ToolExecutionFailed
| Self::AgentSpecificQuirk
)
}
#[must_use]
pub const fn is_unrecoverable(self) -> bool {
matches!(self, Self::DiskFull | Self::Permanent)
}
#[must_use]
pub const fn is_command_not_found(self) -> bool {
matches!(self, Self::CommandNotFound)
}
#[must_use]
pub const fn is_network_error(self) -> bool {
matches!(self, Self::NetworkError | Self::Timeout)
}
#[must_use]
pub const fn suggests_smaller_context(self) -> bool {
matches!(self, Self::TokenExhausted | Self::ProcessKilled)
}
#[must_use]
pub const fn suggested_wait_ms(self) -> u64 {
match self {
Self::ApiUnavailable => 3000, Self::NetworkError => 2000, Self::Timeout | Self::Transient | Self::RetryableAgentQuirk => 1000, Self::InvalidResponse => 500, Self::RateLimited
| Self::TokenExhausted
| Self::AuthFailure
| Self::CommandNotFound
| Self::DiskFull
| Self::ProcessKilled
| Self::ToolExecutionFailed
| Self::AgentSpecificQuirk
| Self::Permanent => 0,
}
}
#[must_use]
pub const fn description(self) -> &'static str {
match self {
Self::RateLimited => "API rate limit exceeded",
Self::TokenExhausted => "Token/context limit exceeded",
Self::ApiUnavailable => "API service temporarily unavailable",
Self::NetworkError => "Network connectivity issue",
Self::AuthFailure => "Authentication failure",
Self::CommandNotFound => "Command not found",
Self::DiskFull => "Disk space exhausted",
Self::ProcessKilled => "Process terminated (possibly OOM)",
Self::InvalidResponse => "Invalid response from agent",
Self::Timeout => "Request timed out",
Self::ToolExecutionFailed => "Tool execution failed (e.g., file write)",
Self::AgentSpecificQuirk => "Known agent-specific issue",
Self::RetryableAgentQuirk => "Agent-specific issue (may be transient)",
Self::Transient => "Transient error",
Self::Permanent => "Permanent error",
}
}
#[must_use]
pub const fn recovery_advice(self) -> &'static str {
match self {
Self::RateLimited => {
"Switching to next agent immediately. Rate limit indicates provider exhaustion."
}
Self::TokenExhausted => {
"Switching to alternative agent. Tip: Try RALPH_DEVELOPER_CONTEXT=0 or RALPH_REVIEWER_CONTEXT=0"
}
Self::ApiUnavailable => {
"API server issue. Will retry automatically. Tip: Check status page or try different provider."
}
Self::NetworkError => {
"Check your internet connection. Will retry automatically. Tip: Check firewall/VPN settings."
}
Self::AuthFailure => {
"Check API key or run 'agent auth' to authenticate. Tip: Verify credentials for this provider."
}
Self::CommandNotFound => {
"Agent binary not installed. See installation guidance below. Tip: Run 'ralph --list-available-agents'"
}
Self::DiskFull => "Free up disk space and try again. Tip: Check .agent directory size.",
Self::ProcessKilled => {
"Process was killed (possible OOM). Trying with smaller context. Tip: Reduce context with RALPH_*_CONTEXT=0"
}
Self::InvalidResponse => {
"Received malformed response. Retrying... Tip: May indicate parser mismatch with this agent."
}
Self::Timeout => {
"Request timed out. Will retry with longer timeout. Tip: Try reducing prompt size or context."
}
Self::ToolExecutionFailed => {
"Tool execution failed (file write/permissions). Switching agent. Tip: Check directory write permissions."
}
Self::AgentSpecificQuirk => {
"Known agent-specific issue. Switching to alternative agent. Tip: See docs/agent-compatibility.md"
}
Self::RetryableAgentQuirk => {
"Agent-specific issue that may be transient. Retrying... Tip: See docs/agent-compatibility.md"
}
Self::Transient => "Temporary issue. Will retry automatically.",
Self::Permanent => {
"Unrecoverable error. Check agent logs (.agent/logs/) and see docs/agent-compatibility.md for help."
}
}
}
pub fn classify_with_agent(
exit_code: i32,
stderr: &str,
agent_name: Option<&str>,
model_flag: Option<&str>,
) -> Self {
let stderr_lower = stderr.to_lowercase();
if let Some(err) = Self::check_api_errors(&stderr_lower) {
return err;
}
if let Some(err) = Self::check_network_errors(&stderr_lower) {
return err;
}
if let Some(err) = Self::check_resource_errors(exit_code, &stderr_lower) {
return err;
}
if let Some(err) = Self::check_tool_failures(&stderr_lower) {
return err;
}
let is_problematic_agent =
agent_name.is_some_and(is_glm_like_agent) || model_flag.is_some_and(is_glm_like_agent);
if is_problematic_agent && exit_code == 1 {
let has_known_problematic_pattern = stderr_lower.contains("permission")
|| stderr_lower.contains("denied")
|| stderr_lower.contains("unauthorized")
|| stderr_lower.contains("auth")
|| stderr_lower.contains("token")
|| stderr_lower.contains("limit")
|| stderr_lower.contains("quota")
|| stderr_lower.contains("disk")
|| stderr_lower.contains("space")
|| (stderr_lower.contains("glm") && stderr_lower.contains("failed"))
|| (stderr_lower.contains("ccs") && stderr_lower.contains("failed"));
if has_known_problematic_pattern {
return Self::AgentSpecificQuirk;
}
return Self::RetryableAgentQuirk;
}
if let Some(err) = Self::check_agent_specific_quirks(&stderr_lower, exit_code) {
return err;
}
if let Some(err) = Self::check_command_not_found(exit_code, &stderr_lower) {
return err;
}
if exit_code == 1 && stderr_lower.contains("error") {
return Self::Transient;
}
Self::Permanent
}
fn check_api_errors(stderr_lower: &str) -> Option<Self> {
if stderr_lower.contains("rate limit")
|| stderr_lower.contains("too many requests")
|| stderr_lower.contains("429")
|| stderr_lower.contains("quota exceeded")
{
return Some(Self::RateLimited);
}
if stderr_lower.contains("unauthorized")
|| stderr_lower.contains("authentication")
|| stderr_lower.contains("401")
|| stderr_lower.contains("api key")
|| stderr_lower.contains("invalid token")
|| stderr_lower.contains("forbidden")
|| stderr_lower.contains("403")
|| stderr_lower.contains("access denied")
|| stderr_lower.contains("credential")
{
return Some(Self::AuthFailure);
}
if stderr_lower.contains("context length")
|| stderr_lower.contains("maximum context")
|| stderr_lower.contains("max context")
|| stderr_lower.contains("context window")
|| stderr_lower.contains("maximum tokens")
|| stderr_lower.contains("max tokens")
|| stderr_lower.contains("too many tokens")
|| stderr_lower.contains("token limit")
|| stderr_lower.contains("context_length_exceeded")
|| stderr_lower.contains("input too large")
|| stderr_lower.contains("prompt is too long")
|| (stderr_lower.contains("too long")
&& !stderr_lower.contains("argument list too long"))
{
return Some(Self::TokenExhausted);
}
None
}
fn check_network_errors(stderr_lower: &str) -> Option<Self> {
if stderr_lower.contains("connection refused")
|| stderr_lower.contains("network unreachable")
|| stderr_lower.contains("dns resolution")
|| stderr_lower.contains("name resolution")
|| stderr_lower.contains("no route to host")
|| stderr_lower.contains("network is down")
|| stderr_lower.contains("host unreachable")
|| stderr_lower.contains("connection reset")
|| stderr_lower.contains("broken pipe")
|| stderr_lower.contains("econnrefused")
|| stderr_lower.contains("enetunreach")
{
return Some(Self::NetworkError);
}
if stderr_lower.contains("service unavailable")
|| stderr_lower.contains("503")
|| stderr_lower.contains("502")
|| stderr_lower.contains("504")
|| stderr_lower.contains("500")
|| stderr_lower.contains("internal server error")
|| stderr_lower.contains("bad gateway")
|| stderr_lower.contains("gateway timeout")
|| stderr_lower.contains("overloaded")
|| stderr_lower.contains("maintenance")
{
return Some(Self::ApiUnavailable);
}
if stderr_lower.contains("timeout")
|| stderr_lower.contains("timed out")
|| stderr_lower.contains("request timeout")
|| stderr_lower.contains("deadline exceeded")
{
return Some(Self::Timeout);
}
None
}
fn check_resource_errors(exit_code: i32, stderr_lower: &str) -> Option<Self> {
if stderr_lower.contains("no space left")
|| stderr_lower.contains("disk full")
|| stderr_lower.contains("enospc")
|| stderr_lower.contains("out of disk")
|| stderr_lower.contains("insufficient storage")
{
return Some(Self::DiskFull);
}
if exit_code == 7
|| stderr_lower.contains("argument list too long")
|| stderr_lower.contains("e2big")
{
return Some(Self::ToolExecutionFailed);
}
if exit_code == 137
|| exit_code == 139
|| exit_code == -9
|| stderr_lower.contains("killed")
|| stderr_lower.contains("oom")
|| stderr_lower.contains("out of memory")
|| stderr_lower.contains("memory exhausted")
|| stderr_lower.contains("cannot allocate")
|| stderr_lower.contains("segmentation fault")
|| stderr_lower.contains("sigsegv")
|| stderr_lower.contains("sigkill")
{
return Some(Self::ProcessKilled);
}
None
}
fn check_tool_failures(stderr_lower: &str) -> Option<Self> {
if stderr_lower.contains("invalid json")
|| stderr_lower.contains("json parse")
|| stderr_lower.contains("unexpected token")
|| stderr_lower.contains("malformed")
|| stderr_lower.contains("truncated response")
|| stderr_lower.contains("incomplete response")
{
return Some(Self::InvalidResponse);
}
if stderr_lower.contains("write error")
|| stderr_lower.contains("cannot write")
|| stderr_lower.contains("failed to write")
|| stderr_lower.contains("unable to create file")
|| stderr_lower.contains("file creation failed")
|| stderr_lower.contains("i/o error")
|| stderr_lower.contains("io error")
|| stderr_lower.contains("tool failed")
|| stderr_lower.contains("tool execution failed")
|| stderr_lower.contains("tool call failed")
{
return Some(Self::ToolExecutionFailed);
}
if stderr_lower.contains("permission denied")
|| stderr_lower.contains("operation not permitted")
|| stderr_lower.contains("insufficient permissions")
|| stderr_lower.contains("eacces")
|| stderr_lower.contains("eperm")
{
return Some(Self::ToolExecutionFailed);
}
None
}
fn check_agent_specific_quirks(stderr_lower: &str, exit_code: i32) -> Option<Self> {
if stderr_lower.contains("ccs") || stderr_lower.contains("glm") {
if exit_code == 1 {
return Some(Self::AgentSpecificQuirk);
}
if stderr_lower.contains("ccs") && stderr_lower.contains("failed") {
return Some(Self::AgentSpecificQuirk);
}
if stderr_lower.contains("glm")
&& (stderr_lower.contains("permission")
|| stderr_lower.contains("denied")
|| stderr_lower.contains("unauthorized"))
{
return Some(Self::AgentSpecificQuirk);
}
}
if stderr_lower.contains("glm") && exit_code == 1 {
return Some(Self::AgentSpecificQuirk);
}
None
}
fn check_command_not_found(exit_code: i32, stderr_lower: &str) -> Option<Self> {
if exit_code == 127
|| exit_code == 126
|| stderr_lower.contains("command not found")
|| stderr_lower.contains("not found")
|| stderr_lower.contains("no such file")
{
return Some(Self::CommandNotFound);
}
None
}
}