Skip to main content

harn_vm/value/
error.rs

1use super::VmValue;
2
3#[derive(Debug, Clone)]
4pub enum VmError {
5    StackUnderflow,
6    StackOverflow,
7    UndefinedVariable(String),
8    UndefinedBuiltin(String),
9    ImmutableAssignment(String),
10    TypeError(String),
11    Runtime(String),
12    DivisionByZero,
13    Thrown(VmValue),
14    /// Thrown with error category for structured error handling.
15    CategorizedError {
16        message: String,
17        category: ErrorCategory,
18    },
19    DaemonQueueFull {
20        daemon_id: String,
21        capacity: usize,
22    },
23    Return(VmValue),
24    InvalidInstruction(u8),
25}
26
27/// Error categories for structured error handling in agent orchestration.
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum ErrorCategory {
30    /// Network/connection timeout
31    Timeout,
32    /// Authentication/authorization failure
33    Auth,
34    /// Rate limit exceeded (HTTP 429 / quota)
35    RateLimit,
36    /// Upstream provider is overloaded (HTTP 503 / 529).
37    /// Distinct from RateLimit: the client hasn't exceeded a quota — the
38    /// provider is shedding load and will recover on its own.
39    Overloaded,
40    /// Provider-side 5xx error (500, 502) that isn't specifically overload.
41    ServerError,
42    /// Network-level transient failure (connection reset, DNS hiccup,
43    /// partial stream) — retryable but not provider-status-coded.
44    TransientNetwork,
45    /// LLM output failed schema validation. Retryable via `schema_retries`.
46    SchemaValidation,
47    /// Tool execution failure
48    ToolError,
49    /// Tool was rejected by the host (not permitted / not in allowlist)
50    ToolRejected,
51    /// Outbound network egress was blocked by policy.
52    EgressBlocked,
53    /// Operation was cancelled
54    Cancelled,
55    /// Resource not found
56    NotFound,
57    /// Circuit breaker is open
58    CircuitOpen,
59    /// LLM cost or token budget would be exceeded
60    BudgetExceeded,
61    /// Generic/unclassified error
62    Generic,
63}
64
65impl ErrorCategory {
66    pub fn as_str(&self) -> &'static str {
67        match self {
68            ErrorCategory::Timeout => "timeout",
69            ErrorCategory::Auth => "auth",
70            ErrorCategory::RateLimit => "rate_limit",
71            ErrorCategory::Overloaded => "overloaded",
72            ErrorCategory::ServerError => "server_error",
73            ErrorCategory::TransientNetwork => "transient_network",
74            ErrorCategory::SchemaValidation => "schema_validation",
75            ErrorCategory::ToolError => "tool_error",
76            ErrorCategory::ToolRejected => "tool_rejected",
77            ErrorCategory::EgressBlocked => "egress_blocked",
78            ErrorCategory::Cancelled => "cancelled",
79            ErrorCategory::NotFound => "not_found",
80            ErrorCategory::CircuitOpen => "circuit_open",
81            ErrorCategory::BudgetExceeded => "budget_exceeded",
82            ErrorCategory::Generic => "generic",
83        }
84    }
85
86    pub fn parse(s: &str) -> Self {
87        match s {
88            "timeout" => ErrorCategory::Timeout,
89            "auth" => ErrorCategory::Auth,
90            "rate_limit" => ErrorCategory::RateLimit,
91            "overloaded" => ErrorCategory::Overloaded,
92            "server_error" => ErrorCategory::ServerError,
93            "transient_network" => ErrorCategory::TransientNetwork,
94            "schema_validation" => ErrorCategory::SchemaValidation,
95            "tool_error" => ErrorCategory::ToolError,
96            "tool_rejected" => ErrorCategory::ToolRejected,
97            "egress_blocked" => ErrorCategory::EgressBlocked,
98            "cancelled" => ErrorCategory::Cancelled,
99            "not_found" => ErrorCategory::NotFound,
100            "circuit_open" => ErrorCategory::CircuitOpen,
101            "budget_exceeded" => ErrorCategory::BudgetExceeded,
102            _ => ErrorCategory::Generic,
103        }
104    }
105
106    /// Whether an error of this category is worth retrying for a transient
107    /// provider-side reason. Agent loops consult this to decide whether to
108    /// back off and retry vs surface the error to the user.
109    pub fn is_transient(&self) -> bool {
110        matches!(
111            self,
112            ErrorCategory::Timeout
113                | ErrorCategory::RateLimit
114                | ErrorCategory::Overloaded
115                | ErrorCategory::ServerError
116                | ErrorCategory::TransientNetwork
117        )
118    }
119}
120
121/// Create a categorized error conveniently.
122pub fn categorized_error(message: impl Into<String>, category: ErrorCategory) -> VmError {
123    VmError::CategorizedError {
124        message: message.into(),
125        category,
126    }
127}
128
129/// Extract error category from a VmError.
130///
131/// Classification priority:
132/// 1. Explicit CategorizedError variant (set by throw_error or internal code)
133/// 2. Thrown dict with a "category" field (user-created structured errors)
134/// 3. HTTP status code extraction (standard, unambiguous)
135/// 4. Deadline exceeded (VM-internal)
136/// 5. Fallback to Generic
137pub fn error_to_category(err: &VmError) -> ErrorCategory {
138    match err {
139        VmError::CategorizedError { category, .. } => category.clone(),
140        VmError::Thrown(VmValue::Dict(d)) => d
141            .get("category")
142            .map(|v| ErrorCategory::parse(&v.display()))
143            .unwrap_or(ErrorCategory::Generic),
144        VmError::Thrown(VmValue::String(s)) => classify_error_message(s),
145        VmError::Runtime(msg) => classify_error_message(msg),
146        _ => ErrorCategory::Generic,
147    }
148}
149
150/// Classify an error message using HTTP status codes and well-known patterns.
151/// Prefers unambiguous signals (status codes) over substring heuristics.
152pub fn classify_error_message(msg: &str) -> ErrorCategory {
153    // 1. HTTP status codes — most reliable signal
154    if let Some(cat) = classify_by_http_status(msg) {
155        return cat;
156    }
157    // 2. Well-known error identifiers from major APIs
158    //    (Anthropic, OpenAI, and standard HTTP patterns)
159    if msg.contains("Deadline exceeded") || msg.contains("context deadline exceeded") {
160        return ErrorCategory::Timeout;
161    }
162    if msg.contains("overloaded_error") {
163        // Anthropic overloaded_error surfaces as HTTP 529.
164        return ErrorCategory::Overloaded;
165    }
166    if msg.contains("api_error") {
167        // Anthropic catch-all server-side error.
168        return ErrorCategory::ServerError;
169    }
170    if msg.contains("insufficient_quota") || msg.contains("billing_hard_limit_reached") {
171        // OpenAI-specific quota error types.
172        return ErrorCategory::RateLimit;
173    }
174    if msg.contains("invalid_api_key") || msg.contains("authentication_error") {
175        return ErrorCategory::Auth;
176    }
177    if msg.contains("not_found_error") || msg.contains("model_not_found") {
178        return ErrorCategory::NotFound;
179    }
180    if msg.contains("circuit_open") {
181        return ErrorCategory::CircuitOpen;
182    }
183    // Network-level transient patterns (pre-HTTP-status, pre-provider-framing).
184    let lower = msg.to_lowercase();
185    if lower.contains("connection reset")
186        || lower.contains("connection refused")
187        || lower.contains("connection closed")
188        || lower.contains("broken pipe")
189        || lower.contains("dns error")
190        || lower.contains("stream error")
191        || lower.contains("unexpected eof")
192    {
193        return ErrorCategory::TransientNetwork;
194    }
195    ErrorCategory::Generic
196}
197
198/// Classify errors by HTTP status code if one appears in the message.
199/// This is the most reliable classification method since status codes
200/// are standardized (RFC 9110) and unambiguous.
201fn classify_by_http_status(msg: &str) -> Option<ErrorCategory> {
202    // Extract 3-digit HTTP status codes from common patterns:
203    // "HTTP 429", "status 429", "429 Too Many", "error: 401"
204    for code in extract_http_status_codes(msg) {
205        return Some(match code {
206            401 | 403 => ErrorCategory::Auth,
207            404 | 410 => ErrorCategory::NotFound,
208            408 | 504 | 522 | 524 => ErrorCategory::Timeout,
209            429 => ErrorCategory::RateLimit,
210            503 | 529 => ErrorCategory::Overloaded,
211            500 | 502 => ErrorCategory::ServerError,
212            _ => continue,
213        });
214    }
215    None
216}
217
218/// Extract plausible HTTP status codes from an error message.
219fn extract_http_status_codes(msg: &str) -> Vec<u16> {
220    let mut codes = Vec::new();
221    let bytes = msg.as_bytes();
222    for i in 0..bytes.len().saturating_sub(2) {
223        // Look for 3-digit sequences in the 100-599 range
224        if bytes[i].is_ascii_digit()
225            && bytes[i + 1].is_ascii_digit()
226            && bytes[i + 2].is_ascii_digit()
227        {
228            // Ensure it's not part of a longer number
229            let before_ok = i == 0 || !bytes[i - 1].is_ascii_digit();
230            let after_ok = i + 3 >= bytes.len() || !bytes[i + 3].is_ascii_digit();
231            if before_ok && after_ok {
232                if let Ok(code) = msg[i..i + 3].parse::<u16>() {
233                    if (400..=599).contains(&code) {
234                        codes.push(code);
235                    }
236                }
237            }
238        }
239    }
240    codes
241}
242
243impl std::fmt::Display for VmError {
244    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
245        match self {
246            VmError::StackUnderflow => write!(f, "Stack underflow"),
247            VmError::StackOverflow => write!(f, "Stack overflow: too many nested calls"),
248            VmError::UndefinedVariable(n) => write!(f, "Undefined variable: {n}"),
249            VmError::UndefinedBuiltin(n) => write!(f, "Undefined builtin: {n}"),
250            VmError::ImmutableAssignment(n) => {
251                write!(f, "Cannot assign to immutable binding: {n}")
252            }
253            VmError::TypeError(msg) => write!(f, "Type error: {msg}"),
254            VmError::Runtime(msg) => write!(f, "Runtime error: {msg}"),
255            VmError::DivisionByZero => write!(f, "Division by zero"),
256            VmError::Thrown(v) => write!(f, "Thrown: {}", v.display()),
257            VmError::CategorizedError { message, category } => {
258                write!(f, "Error [{}]: {}", category.as_str(), message)
259            }
260            VmError::DaemonQueueFull {
261                daemon_id,
262                capacity,
263            } => write!(
264                f,
265                "Daemon queue full: daemon '{daemon_id}' reached its event_queue_capacity of {capacity}"
266            ),
267            VmError::Return(_) => write!(f, "Return from function"),
268            VmError::InvalidInstruction(op) => write!(f, "Invalid instruction: 0x{op:02x}"),
269        }
270    }
271}
272
273impl std::error::Error for VmError {}