Skip to main content

harn_vm/value/
error.rs

1use super::VmValue;
2
3#[derive(Debug, Clone)]
4pub enum VmError {
5    StackUnderflow,
6    StackOverflow,
7    UndefinedVariable(String),
8    UndefinedBuiltin(String),
9    ImmutableAssignment(String),
10    TypeError(String),
11    Runtime(String),
12    DivisionByZero,
13    Thrown(VmValue),
14    /// Thrown with error category for structured error handling.
15    CategorizedError {
16        message: String,
17        category: ErrorCategory,
18    },
19    DaemonQueueFull {
20        daemon_id: String,
21        capacity: usize,
22    },
23    Return(VmValue),
24    InvalidInstruction(u8),
25}
26
27/// Error categories for structured error handling in agent orchestration.
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum ErrorCategory {
30    /// Network/connection timeout
31    Timeout,
32    /// Authentication/authorization failure
33    Auth,
34    /// Rate limit exceeded (HTTP 429 / quota)
35    RateLimit,
36    /// Upstream provider is overloaded (HTTP 503 / 529).
37    /// Distinct from RateLimit: the client hasn't exceeded a quota — the
38    /// provider is shedding load and will recover on its own.
39    Overloaded,
40    /// Provider-side 5xx error (500, 502) that isn't specifically overload.
41    ServerError,
42    /// Network-level transient failure (connection reset, DNS hiccup,
43    /// partial stream) — retryable but not provider-status-coded.
44    TransientNetwork,
45    /// LLM output failed schema validation. Retryable via `schema_retries`.
46    SchemaValidation,
47    /// Tool execution failure
48    ToolError,
49    /// Tool was rejected by the host (not permitted / not in allowlist)
50    ToolRejected,
51    /// Operation was cancelled
52    Cancelled,
53    /// Resource not found
54    NotFound,
55    /// Circuit breaker is open
56    CircuitOpen,
57    /// Generic/unclassified error
58    Generic,
59}
60
61impl ErrorCategory {
62    pub fn as_str(&self) -> &'static str {
63        match self {
64            ErrorCategory::Timeout => "timeout",
65            ErrorCategory::Auth => "auth",
66            ErrorCategory::RateLimit => "rate_limit",
67            ErrorCategory::Overloaded => "overloaded",
68            ErrorCategory::ServerError => "server_error",
69            ErrorCategory::TransientNetwork => "transient_network",
70            ErrorCategory::SchemaValidation => "schema_validation",
71            ErrorCategory::ToolError => "tool_error",
72            ErrorCategory::ToolRejected => "tool_rejected",
73            ErrorCategory::Cancelled => "cancelled",
74            ErrorCategory::NotFound => "not_found",
75            ErrorCategory::CircuitOpen => "circuit_open",
76            ErrorCategory::Generic => "generic",
77        }
78    }
79
80    pub fn parse(s: &str) -> Self {
81        match s {
82            "timeout" => ErrorCategory::Timeout,
83            "auth" => ErrorCategory::Auth,
84            "rate_limit" => ErrorCategory::RateLimit,
85            "overloaded" => ErrorCategory::Overloaded,
86            "server_error" => ErrorCategory::ServerError,
87            "transient_network" => ErrorCategory::TransientNetwork,
88            "schema_validation" => ErrorCategory::SchemaValidation,
89            "tool_error" => ErrorCategory::ToolError,
90            "tool_rejected" => ErrorCategory::ToolRejected,
91            "cancelled" => ErrorCategory::Cancelled,
92            "not_found" => ErrorCategory::NotFound,
93            "circuit_open" => ErrorCategory::CircuitOpen,
94            _ => ErrorCategory::Generic,
95        }
96    }
97
98    /// Whether an error of this category is worth retrying for a transient
99    /// provider-side reason. Agent loops consult this to decide whether to
100    /// back off and retry vs surface the error to the user.
101    pub fn is_transient(&self) -> bool {
102        matches!(
103            self,
104            ErrorCategory::Timeout
105                | ErrorCategory::RateLimit
106                | ErrorCategory::Overloaded
107                | ErrorCategory::ServerError
108                | ErrorCategory::TransientNetwork
109        )
110    }
111}
112
113/// Create a categorized error conveniently.
114pub fn categorized_error(message: impl Into<String>, category: ErrorCategory) -> VmError {
115    VmError::CategorizedError {
116        message: message.into(),
117        category,
118    }
119}
120
121/// Extract error category from a VmError.
122///
123/// Classification priority:
124/// 1. Explicit CategorizedError variant (set by throw_error or internal code)
125/// 2. Thrown dict with a "category" field (user-created structured errors)
126/// 3. HTTP status code extraction (standard, unambiguous)
127/// 4. Deadline exceeded (VM-internal)
128/// 5. Fallback to Generic
129pub fn error_to_category(err: &VmError) -> ErrorCategory {
130    match err {
131        VmError::CategorizedError { category, .. } => category.clone(),
132        VmError::Thrown(VmValue::Dict(d)) => d
133            .get("category")
134            .map(|v| ErrorCategory::parse(&v.display()))
135            .unwrap_or(ErrorCategory::Generic),
136        VmError::Thrown(VmValue::String(s)) => classify_error_message(s),
137        VmError::Runtime(msg) => classify_error_message(msg),
138        _ => ErrorCategory::Generic,
139    }
140}
141
142/// Classify an error message using HTTP status codes and well-known patterns.
143/// Prefers unambiguous signals (status codes) over substring heuristics.
144pub fn classify_error_message(msg: &str) -> ErrorCategory {
145    // 1. HTTP status codes — most reliable signal
146    if let Some(cat) = classify_by_http_status(msg) {
147        return cat;
148    }
149    // 2. Well-known error identifiers from major APIs
150    //    (Anthropic, OpenAI, and standard HTTP patterns)
151    if msg.contains("Deadline exceeded") || msg.contains("context deadline exceeded") {
152        return ErrorCategory::Timeout;
153    }
154    if msg.contains("overloaded_error") {
155        // Anthropic overloaded_error surfaces as HTTP 529.
156        return ErrorCategory::Overloaded;
157    }
158    if msg.contains("api_error") {
159        // Anthropic catch-all server-side error.
160        return ErrorCategory::ServerError;
161    }
162    if msg.contains("insufficient_quota") || msg.contains("billing_hard_limit_reached") {
163        // OpenAI-specific quota error types.
164        return ErrorCategory::RateLimit;
165    }
166    if msg.contains("invalid_api_key") || msg.contains("authentication_error") {
167        return ErrorCategory::Auth;
168    }
169    if msg.contains("not_found_error") || msg.contains("model_not_found") {
170        return ErrorCategory::NotFound;
171    }
172    if msg.contains("circuit_open") {
173        return ErrorCategory::CircuitOpen;
174    }
175    // Network-level transient patterns (pre-HTTP-status, pre-provider-framing).
176    let lower = msg.to_lowercase();
177    if lower.contains("connection reset")
178        || lower.contains("connection refused")
179        || lower.contains("connection closed")
180        || lower.contains("broken pipe")
181        || lower.contains("dns error")
182        || lower.contains("stream error")
183        || lower.contains("unexpected eof")
184    {
185        return ErrorCategory::TransientNetwork;
186    }
187    ErrorCategory::Generic
188}
189
190/// Classify errors by HTTP status code if one appears in the message.
191/// This is the most reliable classification method since status codes
192/// are standardized (RFC 9110) and unambiguous.
193fn classify_by_http_status(msg: &str) -> Option<ErrorCategory> {
194    // Extract 3-digit HTTP status codes from common patterns:
195    // "HTTP 429", "status 429", "429 Too Many", "error: 401"
196    for code in extract_http_status_codes(msg) {
197        return Some(match code {
198            401 | 403 => ErrorCategory::Auth,
199            404 | 410 => ErrorCategory::NotFound,
200            408 | 504 | 522 | 524 => ErrorCategory::Timeout,
201            429 => ErrorCategory::RateLimit,
202            503 | 529 => ErrorCategory::Overloaded,
203            500 | 502 => ErrorCategory::ServerError,
204            _ => continue,
205        });
206    }
207    None
208}
209
210/// Extract plausible HTTP status codes from an error message.
211fn extract_http_status_codes(msg: &str) -> Vec<u16> {
212    let mut codes = Vec::new();
213    let bytes = msg.as_bytes();
214    for i in 0..bytes.len().saturating_sub(2) {
215        // Look for 3-digit sequences in the 100-599 range
216        if bytes[i].is_ascii_digit()
217            && bytes[i + 1].is_ascii_digit()
218            && bytes[i + 2].is_ascii_digit()
219        {
220            // Ensure it's not part of a longer number
221            let before_ok = i == 0 || !bytes[i - 1].is_ascii_digit();
222            let after_ok = i + 3 >= bytes.len() || !bytes[i + 3].is_ascii_digit();
223            if before_ok && after_ok {
224                if let Ok(code) = msg[i..i + 3].parse::<u16>() {
225                    if (400..=599).contains(&code) {
226                        codes.push(code);
227                    }
228                }
229            }
230        }
231    }
232    codes
233}
234
235impl std::fmt::Display for VmError {
236    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
237        match self {
238            VmError::StackUnderflow => write!(f, "Stack underflow"),
239            VmError::StackOverflow => write!(f, "Stack overflow: too many nested calls"),
240            VmError::UndefinedVariable(n) => write!(f, "Undefined variable: {n}"),
241            VmError::UndefinedBuiltin(n) => write!(f, "Undefined builtin: {n}"),
242            VmError::ImmutableAssignment(n) => {
243                write!(f, "Cannot assign to immutable binding: {n}")
244            }
245            VmError::TypeError(msg) => write!(f, "Type error: {msg}"),
246            VmError::Runtime(msg) => write!(f, "Runtime error: {msg}"),
247            VmError::DivisionByZero => write!(f, "Division by zero"),
248            VmError::Thrown(v) => write!(f, "Thrown: {}", v.display()),
249            VmError::CategorizedError { message, category } => {
250                write!(f, "Error [{}]: {}", category.as_str(), message)
251            }
252            VmError::DaemonQueueFull {
253                daemon_id,
254                capacity,
255            } => write!(
256                f,
257                "Daemon queue full: daemon '{daemon_id}' reached its event_queue_capacity of {capacity}"
258            ),
259            VmError::Return(_) => write!(f, "Return from function"),
260            VmError::InvalidInstruction(op) => write!(f, "Invalid instruction: 0x{op:02x}"),
261        }
262    }
263}
264
265impl std::error::Error for VmError {}