Skip to main content

everruns_core/
error.rs

1// Error types for the agent loop
2//
3// StoreResultExt: extension trait to replace repeated .map_err(|e| AgentLoopError::store(...))? patterns
4// json_val / from_json: helpers to replace repeated serde_json::to_value/from_value boilerplate
5
6use crate::typed_id::{AgentId, HarnessId, SessionId};
7use crate::user_facing_error::{
8    UserFacingError, UserFacingErrorContext, classify_runtime_error_message,
9    codes as user_facing_error_codes,
10};
11use serde::{Serialize, de::DeserializeOwned};
12use thiserror::Error;
13
14/// Result type alias for agent loop operations
15pub type Result<T> = std::result::Result<T, AgentLoopError>;
16
17/// Errors that can occur during agent loop execution
18#[derive(Debug, Error)]
19pub enum AgentLoopError {
20    /// LLM provider error
21    #[error("LLM error: {0}")]
22    Llm(String),
23
24    /// Request too large error (context length exceeded, token limits, etc.)
25    /// Contains the original error message for logging
26    #[error("Request too large: {0}")]
27    RequestTooLarge(String),
28
29    /// Model not available (404, model not found, access denied for model)
30    /// Contains the model_id string that was requested
31    #[error("Model not available: {0}")]
32    ModelNotAvailable(String),
33
34    /// Tool execution error
35    #[error("Tool execution error: {0}")]
36    ToolExecution(String),
37
38    /// Message store error
39    #[error("Message store error: {0}")]
40    MessageStore(String),
41
42    /// Event emission error
43    #[error("Event emission error: {0}")]
44    EventEmission(String),
45
46    /// Configuration error
47    #[error("Configuration error: {0}")]
48    Configuration(String),
49
50    /// Loop terminated due to max iterations
51    #[error("Max iterations ({0}) reached")]
52    MaxIterationsReached(usize),
53
54    /// Loop was cancelled
55    #[error("Loop cancelled")]
56    Cancelled,
57
58    /// No messages to process
59    #[error("No messages to process")]
60    NoMessages,
61
62    /// Agent not found
63    #[error("Agent not found: {0}")]
64    AgentNotFound(AgentId),
65
66    /// Harness not found
67    #[error("Harness not found: {0}")]
68    HarnessNotFound(HarnessId),
69
70    /// Session not found
71    #[error("Session not found: {0}")]
72    SessionNotFound(SessionId),
73
74    /// Internal error
75    #[error("Internal error: {0}")]
76    Internal(#[from] anyhow::Error),
77
78    /// Driver not registered for provider type
79    #[error(
80        "No driver registered for provider type '{0}'. Make sure the driver is registered at startup."
81    )]
82    DriverNotRegistered(String),
83}
84
85impl AgentLoopError {
86    /// Create an LLM error
87    pub fn llm(msg: impl Into<String>) -> Self {
88        AgentLoopError::Llm(msg.into())
89    }
90
91    /// Create a tool execution error
92    pub fn tool(msg: impl Into<String>) -> Self {
93        AgentLoopError::ToolExecution(msg.into())
94    }
95
96    /// Create a message store error
97    pub fn store(msg: impl Into<String>) -> Self {
98        AgentLoopError::MessageStore(msg.into())
99    }
100
101    /// Create an event emission error
102    pub fn event(msg: impl Into<String>) -> Self {
103        AgentLoopError::EventEmission(msg.into())
104    }
105
106    /// Create a configuration error
107    pub fn config(msg: impl Into<String>) -> Self {
108        AgentLoopError::Configuration(msg.into())
109    }
110
111    /// Create an agent not found error
112    pub fn agent_not_found(agent_id: AgentId) -> Self {
113        AgentLoopError::AgentNotFound(agent_id)
114    }
115
116    /// Create a harness not found error
117    pub fn harness_not_found(harness_id: HarnessId) -> Self {
118        AgentLoopError::HarnessNotFound(harness_id)
119    }
120
121    /// Create a session not found error
122    pub fn session_not_found(session_id: SessionId) -> Self {
123        AgentLoopError::SessionNotFound(session_id)
124    }
125
126    /// Create a driver not registered error
127    pub fn driver_not_registered(provider_type: impl Into<String>) -> Self {
128        AgentLoopError::DriverNotRegistered(provider_type.into())
129    }
130
131    /// Create a request too large error
132    pub fn request_too_large(msg: impl Into<String>) -> Self {
133        AgentLoopError::RequestTooLarge(msg.into())
134    }
135
136    /// Create a model not available error
137    pub fn model_not_available(model_id: impl Into<String>) -> Self {
138        AgentLoopError::ModelNotAvailable(model_id.into())
139    }
140
141    /// Check if this is a request-too-large error
142    pub fn is_request_too_large(&self) -> bool {
143        matches!(self, AgentLoopError::RequestTooLarge(_))
144    }
145
146    /// Check if this is a model-not-available error
147    pub fn is_model_not_available(&self) -> bool {
148        matches!(self, AgentLoopError::ModelNotAvailable(_))
149    }
150
151    /// Get the model ID if this is a model-not-available error
152    pub fn model_not_available_id(&self) -> Option<&str> {
153        match self {
154            AgentLoopError::ModelNotAvailable(id) => Some(id),
155            _ => None,
156        }
157    }
158
159    /// Check if this is a rate-limit error (HTTP 429 or rate-limit keywords)
160    pub fn is_rate_limited(&self) -> bool {
161        match self {
162            AgentLoopError::Llm(msg) => {
163                let msg_lower = msg.to_ascii_lowercase();
164                msg_lower.contains("(429)")
165                    || msg_lower.contains("rate limit")
166                    || msg_lower.contains("too many requests")
167            }
168            _ => false,
169        }
170    }
171
172    /// Check if this is an authentication/authorization error (HTTP 401/403)
173    pub fn is_auth_error(&self) -> bool {
174        match self {
175            AgentLoopError::Llm(msg) => msg.contains("(401)") || msg.contains("(403)"),
176            _ => false,
177        }
178    }
179
180    /// Check if this is a server error (HTTP 5xx or transient provider issue)
181    pub fn is_server_error(&self) -> bool {
182        match self {
183            AgentLoopError::Llm(msg) => {
184                msg.contains("(500)")
185                    || msg.contains("(502)")
186                    || msg.contains("(503)")
187                    || msg.contains("(504)")
188                    || msg.contains("(529)")
189            }
190            _ => false,
191        }
192    }
193
194    /// Check if this error is deterministic and should never be retried.
195    ///
196    /// Non-retryable errors reference data that is permanently gone (e.g. a
197    /// deleted message, a missing agent). Retrying will never succeed and only
198    /// burns attempts while keeping the workflow stuck.
199    ///
200    /// Note: the durable worker currently uses string-matching via
201    /// `is_non_retryable_task_error` because task errors arrive as strings.
202    /// This method provides the typed equivalent for callers that have access
203    /// to a structured `AgentLoopError`.
204    pub fn is_non_retryable(&self) -> bool {
205        match self {
206            // Missing data is permanent — the entity was deleted.
207            AgentLoopError::AgentNotFound(_)
208            | AgentLoopError::HarnessNotFound(_)
209            | AgentLoopError::SessionNotFound(_)
210            | AgentLoopError::NoMessages => true,
211
212            // Config/driver errors won't self-heal within retries.
213            AgentLoopError::Configuration(_) | AgentLoopError::DriverNotRegistered(_) => true,
214
215            // MessageStore "not found" errors (deleted messages).
216            AgentLoopError::MessageStore(msg) => msg.to_ascii_lowercase().contains("not found"),
217
218            // Everything else is potentially transient.
219            _ => false,
220        }
221    }
222
223    /// Get user-facing error message based on error classification
224    pub fn user_facing_message(&self) -> String {
225        self.user_facing_error(UserFacingErrorContext::default())
226            .fallback_message()
227    }
228
229    /// Get structured user-facing error metadata based on error classification.
230    pub fn user_facing_error(&self, context: UserFacingErrorContext) -> UserFacingError {
231        match self {
232            AgentLoopError::ModelNotAvailable(model_id) => {
233                UserFacingError::new(user_facing_error_codes::MODEL_UNAVAILABLE)
234                    .with_field("model_id", model_id)
235                    .with_optional_field("provider", context.provider)
236            }
237            AgentLoopError::RequestTooLarge(_) => {
238                UserFacingError::new(user_facing_error_codes::REQUEST_TOO_LARGE)
239                    .with_optional_field("provider", context.provider)
240                    .with_optional_field("model_id", context.model_id)
241            }
242            AgentLoopError::MaxIterationsReached(max_iterations) => {
243                UserFacingError::new(user_facing_error_codes::MAX_ITERATIONS)
244                    .with_field("max_iterations", max_iterations)
245            }
246            AgentLoopError::Llm(message) => classify_runtime_error_message(message, &context),
247            _ => UserFacingError::new(user_facing_error_codes::PROCESSING_ERROR)
248                .with_optional_field("provider", context.provider)
249                .with_optional_field("model_id", context.model_id),
250        }
251    }
252}
253
254// ============================================================================
255// Store Result Extension Trait
256// ============================================================================
257
258/// Extension trait that converts any `Result<T, E: Display>` into `Result<T, AgentLoopError>`
259/// via `AgentLoopError::store(e.to_string())`.
260///
261/// Replaces the boilerplate pattern:
262/// ```ignore
263/// .map_err(|e| AgentLoopError::store(e.to_string()))?
264/// ```
265/// with:
266/// ```ignore
267/// .store_err()?
268/// ```
269pub trait StoreResultExt<T> {
270    fn store_err(self) -> Result<T>;
271}
272
273impl<T, E: std::fmt::Display> StoreResultExt<T> for std::result::Result<T, E> {
274    fn store_err(self) -> Result<T> {
275        self.map_err(|e| AgentLoopError::store(e.to_string()))
276    }
277}
278
279// ============================================================================
280// JSON Helpers
281// ============================================================================
282
283/// Convert a serializable value to `serde_json::Value`, falling back to `Value::Null` on error.
284///
285/// Replaces the boilerplate pattern:
286/// ```ignore
287/// serde_json::to_value(&x).unwrap_or_default()
288/// ```
289pub fn json_val<T: Serialize>(value: &T) -> serde_json::Value {
290    serde_json::to_value(value).unwrap_or_default()
291}
292
293/// Deserialize a `serde_json::Value` into `T`, falling back to `T::default()` on error.
294///
295/// Replaces the boilerplate pattern:
296/// ```ignore
297/// serde_json::from_value(v).unwrap_or_default()
298/// ```
299pub fn from_json<T: DeserializeOwned + Default>(value: serde_json::Value) -> T {
300    serde_json::from_value(value).unwrap_or_default()
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    #[test]
308    fn test_is_request_too_large_returns_true_for_typed_error() {
309        let err = AgentLoopError::request_too_large("context length exceeded");
310        assert!(err.is_request_too_large());
311    }
312
313    #[test]
314    fn test_is_request_too_large_returns_false_for_llm_error() {
315        let err = AgentLoopError::llm("OpenAI API error (500): Internal server error");
316        assert!(!err.is_request_too_large());
317    }
318
319    #[test]
320    fn test_is_request_too_large_returns_false_for_other_errors() {
321        let err = AgentLoopError::ToolExecution("some error".to_string());
322        assert!(!err.is_request_too_large());
323
324        let err = AgentLoopError::Cancelled;
325        assert!(!err.is_request_too_large());
326    }
327
328    #[test]
329    fn test_request_too_large_error_preserves_message() {
330        let original_msg = "OpenAI API error (429): Request too large for gpt-4";
331        let err = AgentLoopError::request_too_large(original_msg);
332        assert_eq!(
333            err.to_string(),
334            format!("Request too large: {}", original_msg)
335        );
336    }
337
338    #[test]
339    fn test_is_model_not_available_returns_true_for_typed_error() {
340        let err = AgentLoopError::model_not_available("claude-sonnet-4-6-20260217");
341        assert!(err.is_model_not_available());
342        assert_eq!(
343            err.model_not_available_id(),
344            Some("claude-sonnet-4-6-20260217")
345        );
346    }
347
348    #[test]
349    fn test_is_model_not_available_returns_false_for_llm_error() {
350        let err = AgentLoopError::llm("some error");
351        assert!(!err.is_model_not_available());
352        assert_eq!(err.model_not_available_id(), None);
353    }
354
355    #[test]
356    fn test_model_not_available_error_display() {
357        let err = AgentLoopError::model_not_available("gpt-99");
358        assert_eq!(err.to_string(), "Model not available: gpt-99");
359    }
360
361    #[test]
362    fn test_is_rate_limited_detects_429() {
363        let err = AgentLoopError::llm("Anthropic API error (429): rate limit exceeded");
364        assert!(err.is_rate_limited());
365    }
366
367    #[test]
368    fn test_is_rate_limited_detects_rate_limit_keyword() {
369        let err =
370            AgentLoopError::llm("Rate limit exceeded (after 2 retries, last error: too many)");
371        assert!(err.is_rate_limited());
372    }
373
374    #[test]
375    fn test_is_rate_limited_false_for_server_error() {
376        let err = AgentLoopError::llm("Anthropic API error (500): internal server error");
377        assert!(!err.is_rate_limited());
378    }
379
380    #[test]
381    fn test_is_auth_error_detects_401() {
382        let err = AgentLoopError::llm("Anthropic API error (401): invalid api key");
383        assert!(err.is_auth_error());
384    }
385
386    #[test]
387    fn test_is_auth_error_detects_403() {
388        let err = AgentLoopError::llm("OpenAI API error (403): forbidden");
389        assert!(err.is_auth_error());
390    }
391
392    #[test]
393    fn test_is_server_error_detects_500() {
394        let err = AgentLoopError::llm("Anthropic API error (500): internal server error");
395        assert!(err.is_server_error());
396    }
397
398    #[test]
399    fn test_is_server_error_detects_503() {
400        let err = AgentLoopError::llm("OpenAI API error (503): service unavailable");
401        assert!(err.is_server_error());
402    }
403
404    #[test]
405    fn test_user_facing_message_rate_limited() {
406        let err = AgentLoopError::llm("Anthropic API error (429): rate limit exceeded");
407        assert_eq!(
408            err.user_facing_message(),
409            "Rate limited by the AI provider. Please wait a moment."
410        );
411    }
412
413    #[test]
414    fn test_user_facing_message_auth_error() {
415        let err = AgentLoopError::llm("Anthropic API error (401): invalid api key");
416        assert_eq!(
417            err.user_facing_message(),
418            "There is a misconfiguration with the AI provider. Please contact support."
419        );
420    }
421
422    #[test]
423    fn test_user_facing_message_server_error() {
424        let err = AgentLoopError::llm("Anthropic API error (500): internal server error");
425        assert_eq!(
426            err.user_facing_message(),
427            "The AI provider is experiencing issues. Please try again shortly."
428        );
429    }
430
431    #[test]
432    fn test_user_facing_message_generic_fallback() {
433        let err = AgentLoopError::llm("Failed to send request: connection refused");
434        assert_eq!(
435            err.user_facing_message(),
436            "I encountered an error while processing your request. Please try again later."
437        );
438    }
439
440    #[test]
441    fn test_user_facing_message_model_not_available() {
442        let err = AgentLoopError::model_not_available("gpt-99");
443        assert!(err.user_facing_message().contains("gpt-99"));
444        assert!(err.user_facing_message().contains("not available"));
445    }
446
447    #[test]
448    fn test_user_facing_message_request_too_large() {
449        let err = AgentLoopError::request_too_large("context length exceeded");
450        assert!(err.user_facing_message().contains("too long"));
451    }
452
453    #[test]
454    fn test_user_facing_error_model_not_available_includes_model_id() {
455        let err = AgentLoopError::model_not_available("gpt-99");
456        let user_error = err.user_facing_error(UserFacingErrorContext::default());
457
458        assert_eq!(user_error.code, user_facing_error_codes::MODEL_UNAVAILABLE);
459        assert_eq!(
460            user_error.fields.get("model_id"),
461            Some(&serde_json::Value::String("gpt-99".to_string()))
462        );
463    }
464
465    #[test]
466    fn test_user_facing_error_rate_limited_includes_provider_context() {
467        let err = AgentLoopError::llm("Anthropic API error (429): rate limit exceeded");
468        let user_error = err.user_facing_error(
469            UserFacingErrorContext::default()
470                .with_provider("anthropic")
471                .with_model_id("claude-sonnet-4-5")
472                .with_retry_after(12),
473        );
474
475        assert_eq!(
476            user_error.code,
477            user_facing_error_codes::PROVIDER_RATE_LIMITED
478        );
479        assert_eq!(
480            user_error.fields.get("provider"),
481            Some(&serde_json::Value::String("anthropic".to_string()))
482        );
483        assert_eq!(
484            user_error.fields.get("model_id"),
485            Some(&serde_json::Value::String("claude-sonnet-4-5".to_string()))
486        );
487        assert_eq!(
488            user_error.fields.get("retry_after"),
489            Some(&serde_json::json!(12))
490        );
491    }
492
493    #[test]
494    fn test_store_result_ext_ok() {
495        let result: std::result::Result<i32, String> = Ok(42);
496        assert_eq!(result.store_err().unwrap(), 42);
497    }
498
499    #[test]
500    fn test_store_result_ext_err() {
501        let result: std::result::Result<i32, String> = Err("db error".to_string());
502        let err = result.store_err().unwrap_err();
503        assert!(matches!(err, AgentLoopError::MessageStore(_)));
504        assert!(err.to_string().contains("db error"));
505    }
506
507    #[test]
508    fn test_json_val() {
509        let v = json_val(&vec![1, 2, 3]);
510        assert_eq!(v, serde_json::json!([1, 2, 3]));
511    }
512
513    #[test]
514    fn test_from_json() {
515        let v = serde_json::json!(["a", "b"]);
516        let result: Vec<String> = from_json(v);
517        assert_eq!(result, vec!["a", "b"]);
518    }
519
520    #[test]
521    fn test_from_json_default_on_mismatch() {
522        let v = serde_json::json!("not a number");
523        let result: i32 = from_json(v);
524        assert_eq!(result, 0);
525    }
526}