Skip to main content

harn_vm/value/
error.rs

1use super::VmValue;
2
3#[derive(Debug, Clone)]
4pub enum VmError {
5    StackUnderflow,
6    StackOverflow,
7    UndefinedVariable(String),
8    UndefinedBuiltin(String),
9    ImmutableAssignment(String),
10    TypeError(String),
11    Runtime(String),
12    DivisionByZero,
13    Thrown(VmValue),
14    /// Thrown with error category for structured error handling.
15    CategorizedError {
16        message: String,
17        category: ErrorCategory,
18    },
19    DaemonQueueFull {
20        daemon_id: String,
21        capacity: usize,
22    },
23    Return(VmValue),
24    InvalidInstruction(u8),
25}
26
27/// Error categories for structured error handling in agent orchestration.
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum ErrorCategory {
30    /// Network/connection timeout
31    Timeout,
32    /// Authentication/authorization failure
33    Auth,
34    /// Rate limit exceeded (HTTP 429 / quota)
35    RateLimit,
36    /// Upstream provider is overloaded (HTTP 503 / 529).
37    /// Distinct from RateLimit: the client hasn't exceeded a quota — the
38    /// provider is shedding load and will recover on its own.
39    Overloaded,
40    /// Provider-side 5xx error (500, 502) that isn't specifically overload.
41    ServerError,
42    /// Network-level transient failure (connection reset, DNS hiccup,
43    /// partial stream) — retryable but not provider-status-coded.
44    TransientNetwork,
45    /// LLM output failed schema validation. Retryable via `schema_retries`.
46    SchemaValidation,
47    /// Tool execution failure
48    ToolError,
49    /// Tool was rejected by the host (not permitted / not in allowlist)
50    ToolRejected,
51    /// Outbound network egress was blocked by policy.
52    EgressBlocked,
53    /// Operation was cancelled
54    Cancelled,
55    /// Resource not found
56    NotFound,
57    /// Circuit breaker is open
58    CircuitOpen,
59    /// Generic/unclassified error
60    Generic,
61}
62
63impl ErrorCategory {
64    pub fn as_str(&self) -> &'static str {
65        match self {
66            ErrorCategory::Timeout => "timeout",
67            ErrorCategory::Auth => "auth",
68            ErrorCategory::RateLimit => "rate_limit",
69            ErrorCategory::Overloaded => "overloaded",
70            ErrorCategory::ServerError => "server_error",
71            ErrorCategory::TransientNetwork => "transient_network",
72            ErrorCategory::SchemaValidation => "schema_validation",
73            ErrorCategory::ToolError => "tool_error",
74            ErrorCategory::ToolRejected => "tool_rejected",
75            ErrorCategory::EgressBlocked => "egress_blocked",
76            ErrorCategory::Cancelled => "cancelled",
77            ErrorCategory::NotFound => "not_found",
78            ErrorCategory::CircuitOpen => "circuit_open",
79            ErrorCategory::Generic => "generic",
80        }
81    }
82
83    pub fn parse(s: &str) -> Self {
84        match s {
85            "timeout" => ErrorCategory::Timeout,
86            "auth" => ErrorCategory::Auth,
87            "rate_limit" => ErrorCategory::RateLimit,
88            "overloaded" => ErrorCategory::Overloaded,
89            "server_error" => ErrorCategory::ServerError,
90            "transient_network" => ErrorCategory::TransientNetwork,
91            "schema_validation" => ErrorCategory::SchemaValidation,
92            "tool_error" => ErrorCategory::ToolError,
93            "tool_rejected" => ErrorCategory::ToolRejected,
94            "egress_blocked" => ErrorCategory::EgressBlocked,
95            "cancelled" => ErrorCategory::Cancelled,
96            "not_found" => ErrorCategory::NotFound,
97            "circuit_open" => ErrorCategory::CircuitOpen,
98            _ => ErrorCategory::Generic,
99        }
100    }
101
102    /// Whether an error of this category is worth retrying for a transient
103    /// provider-side reason. Agent loops consult this to decide whether to
104    /// back off and retry vs surface the error to the user.
105    pub fn is_transient(&self) -> bool {
106        matches!(
107            self,
108            ErrorCategory::Timeout
109                | ErrorCategory::RateLimit
110                | ErrorCategory::Overloaded
111                | ErrorCategory::ServerError
112                | ErrorCategory::TransientNetwork
113        )
114    }
115}
116
117/// Create a categorized error conveniently.
118pub fn categorized_error(message: impl Into<String>, category: ErrorCategory) -> VmError {
119    VmError::CategorizedError {
120        message: message.into(),
121        category,
122    }
123}
124
125/// Extract error category from a VmError.
126///
127/// Classification priority:
128/// 1. Explicit CategorizedError variant (set by throw_error or internal code)
129/// 2. Thrown dict with a "category" field (user-created structured errors)
130/// 3. HTTP status code extraction (standard, unambiguous)
131/// 4. Deadline exceeded (VM-internal)
132/// 5. Fallback to Generic
133pub fn error_to_category(err: &VmError) -> ErrorCategory {
134    match err {
135        VmError::CategorizedError { category, .. } => category.clone(),
136        VmError::Thrown(VmValue::Dict(d)) => d
137            .get("category")
138            .map(|v| ErrorCategory::parse(&v.display()))
139            .unwrap_or(ErrorCategory::Generic),
140        VmError::Thrown(VmValue::String(s)) => classify_error_message(s),
141        VmError::Runtime(msg) => classify_error_message(msg),
142        _ => ErrorCategory::Generic,
143    }
144}
145
146/// Classify an error message using HTTP status codes and well-known patterns.
147/// Prefers unambiguous signals (status codes) over substring heuristics.
148pub fn classify_error_message(msg: &str) -> ErrorCategory {
149    // 1. HTTP status codes — most reliable signal
150    if let Some(cat) = classify_by_http_status(msg) {
151        return cat;
152    }
153    // 2. Well-known error identifiers from major APIs
154    //    (Anthropic, OpenAI, and standard HTTP patterns)
155    if msg.contains("Deadline exceeded") || msg.contains("context deadline exceeded") {
156        return ErrorCategory::Timeout;
157    }
158    if msg.contains("overloaded_error") {
159        // Anthropic overloaded_error surfaces as HTTP 529.
160        return ErrorCategory::Overloaded;
161    }
162    if msg.contains("api_error") {
163        // Anthropic catch-all server-side error.
164        return ErrorCategory::ServerError;
165    }
166    if msg.contains("insufficient_quota") || msg.contains("billing_hard_limit_reached") {
167        // OpenAI-specific quota error types.
168        return ErrorCategory::RateLimit;
169    }
170    if msg.contains("invalid_api_key") || msg.contains("authentication_error") {
171        return ErrorCategory::Auth;
172    }
173    if msg.contains("not_found_error") || msg.contains("model_not_found") {
174        return ErrorCategory::NotFound;
175    }
176    if msg.contains("circuit_open") {
177        return ErrorCategory::CircuitOpen;
178    }
179    // Network-level transient patterns (pre-HTTP-status, pre-provider-framing).
180    let lower = msg.to_lowercase();
181    if lower.contains("connection reset")
182        || lower.contains("connection refused")
183        || lower.contains("connection closed")
184        || lower.contains("broken pipe")
185        || lower.contains("dns error")
186        || lower.contains("stream error")
187        || lower.contains("unexpected eof")
188    {
189        return ErrorCategory::TransientNetwork;
190    }
191    ErrorCategory::Generic
192}
193
194/// Classify errors by HTTP status code if one appears in the message.
195/// This is the most reliable classification method since status codes
196/// are standardized (RFC 9110) and unambiguous.
197fn classify_by_http_status(msg: &str) -> Option<ErrorCategory> {
198    // Extract 3-digit HTTP status codes from common patterns:
199    // "HTTP 429", "status 429", "429 Too Many", "error: 401"
200    for code in extract_http_status_codes(msg) {
201        return Some(match code {
202            401 | 403 => ErrorCategory::Auth,
203            404 | 410 => ErrorCategory::NotFound,
204            408 | 504 | 522 | 524 => ErrorCategory::Timeout,
205            429 => ErrorCategory::RateLimit,
206            503 | 529 => ErrorCategory::Overloaded,
207            500 | 502 => ErrorCategory::ServerError,
208            _ => continue,
209        });
210    }
211    None
212}
213
214/// Extract plausible HTTP status codes from an error message.
215fn extract_http_status_codes(msg: &str) -> Vec<u16> {
216    let mut codes = Vec::new();
217    let bytes = msg.as_bytes();
218    for i in 0..bytes.len().saturating_sub(2) {
219        // Look for 3-digit sequences in the 100-599 range
220        if bytes[i].is_ascii_digit()
221            && bytes[i + 1].is_ascii_digit()
222            && bytes[i + 2].is_ascii_digit()
223        {
224            // Ensure it's not part of a longer number
225            let before_ok = i == 0 || !bytes[i - 1].is_ascii_digit();
226            let after_ok = i + 3 >= bytes.len() || !bytes[i + 3].is_ascii_digit();
227            if before_ok && after_ok {
228                if let Ok(code) = msg[i..i + 3].parse::<u16>() {
229                    if (400..=599).contains(&code) {
230                        codes.push(code);
231                    }
232                }
233            }
234        }
235    }
236    codes
237}
238
239impl std::fmt::Display for VmError {
240    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
241        match self {
242            VmError::StackUnderflow => write!(f, "Stack underflow"),
243            VmError::StackOverflow => write!(f, "Stack overflow: too many nested calls"),
244            VmError::UndefinedVariable(n) => write!(f, "Undefined variable: {n}"),
245            VmError::UndefinedBuiltin(n) => write!(f, "Undefined builtin: {n}"),
246            VmError::ImmutableAssignment(n) => {
247                write!(f, "Cannot assign to immutable binding: {n}")
248            }
249            VmError::TypeError(msg) => write!(f, "Type error: {msg}"),
250            VmError::Runtime(msg) => write!(f, "Runtime error: {msg}"),
251            VmError::DivisionByZero => write!(f, "Division by zero"),
252            VmError::Thrown(v) => write!(f, "Thrown: {}", v.display()),
253            VmError::CategorizedError { message, category } => {
254                write!(f, "Error [{}]: {}", category.as_str(), message)
255            }
256            VmError::DaemonQueueFull {
257                daemon_id,
258                capacity,
259            } => write!(
260                f,
261                "Daemon queue full: daemon '{daemon_id}' reached its event_queue_capacity of {capacity}"
262            ),
263            VmError::Return(_) => write!(f, "Return from function"),
264            VmError::InvalidInstruction(op) => write!(f, "Invalid instruction: 0x{op:02x}"),
265        }
266    }
267}
268
269impl std::error::Error for VmError {}