Skip to main content

meerkat_core/
error.rs

1//! Core error types for Meerkat
2
3use crate::hooks::{HookPoint, HookReasonCode};
4use crate::types::SessionId;
5
6#[derive(Debug, Clone, PartialEq)]
7#[non_exhaustive]
8pub enum LlmFailureReason {
9    RateLimited {
10        retry_after: Option<std::time::Duration>,
11    },
12    ContextExceeded {
13        max: u32,
14        requested: u32,
15    },
16    AuthError,
17    InvalidModel(String),
18    ProviderError(serde_json::Value),
19    /// Provider/client-native network timeout (owned by client layer)
20    NetworkTimeout {
21        duration_ms: u64,
22    },
23    /// Agent-loop hard call timeout (owned by agent loop policy)
24    CallTimeout {
25        duration_ms: u64,
26    },
27}
28
29/// Errors that can occur during tool validation
30#[derive(Debug, Clone, thiserror::Error, PartialEq)]
31pub enum ToolValidationError {
32    /// The requested tool was not found
33    #[error("Tool not found: {name}")]
34    NotFound { name: String },
35    /// The tool arguments failed validation
36    #[error("Invalid arguments for tool '{name}': {reason}")]
37    InvalidArguments { name: String, reason: String },
38}
39
40impl ToolValidationError {
41    pub fn not_found(name: impl Into<String>) -> Self {
42        Self::NotFound { name: name.into() }
43    }
44    pub fn invalid_arguments(name: impl Into<String>, reason: impl Into<String>) -> Self {
45        Self::InvalidArguments {
46            name: name.into(),
47            reason: reason.into(),
48        }
49    }
50}
51
52/// Error returned by tool dispatch operations.
53#[derive(Debug, Clone, thiserror::Error)]
54pub enum ToolError {
55    /// The requested tool was not found
56    #[error("Tool not found: {name}")]
57    NotFound { name: String },
58
59    /// The tool exists but is currently unavailable
60    #[error("Tool '{name}' is currently unavailable: {reason}")]
61    Unavailable { name: String, reason: String },
62
63    /// The tool arguments failed validation
64    #[error("Invalid arguments for tool '{name}': {reason}")]
65    InvalidArguments { name: String, reason: String },
66
67    /// The tool execution failed
68    #[error("Tool execution failed: {message}")]
69    ExecutionFailed { message: String },
70
71    /// The tool execution timed out
72    #[error("Tool '{name}' timed out after {timeout_ms}ms")]
73    Timeout { name: String, timeout_ms: u64 },
74
75    /// Tool access was denied by policy
76    #[error("Tool '{name}' is not allowed by policy")]
77    AccessDenied { name: String },
78
79    /// A generic tool error with a message
80    #[error("{0}")]
81    Other(String),
82
83    /// Tool call must be routed externally (callback pending)
84    ///
85    /// This variant signals that a tool call cannot be handled internally
86    /// and must be routed to an external handler. The payload contains
87    /// serialized information about the pending tool call.
88    #[error("Callback pending for tool '{tool_name}'")]
89    CallbackPending {
90        tool_name: String,
91        args: serde_json::Value,
92    },
93}
94
95impl ToolError {
96    pub fn error_code(&self) -> &'static str {
97        match self {
98            Self::NotFound { .. } => "tool_not_found",
99            Self::Unavailable { .. } => "tool_unavailable",
100            Self::InvalidArguments { .. } => "invalid_arguments",
101            Self::ExecutionFailed { .. } => "execution_failed",
102            Self::Timeout { .. } => "timeout",
103            Self::AccessDenied { .. } => "access_denied",
104            Self::Other(_) => "tool_error",
105            Self::CallbackPending { .. } => "callback_pending",
106        }
107    }
108
109    pub fn to_error_payload(&self) -> serde_json::Value {
110        serde_json::json!({
111            "error": self.error_code(),
112            "message": self.to_string(),
113        })
114    }
115
116    pub fn not_found(name: impl Into<String>) -> Self {
117        Self::NotFound { name: name.into() }
118    }
119    pub fn unavailable(name: impl Into<String>, reason: impl Into<String>) -> Self {
120        Self::Unavailable {
121            name: name.into(),
122            reason: reason.into(),
123        }
124    }
125    pub fn invalid_arguments(name: impl Into<String>, reason: impl Into<String>) -> Self {
126        Self::InvalidArguments {
127            name: name.into(),
128            reason: reason.into(),
129        }
130    }
131    pub fn execution_failed(message: impl Into<String>) -> Self {
132        Self::ExecutionFailed {
133            message: message.into(),
134        }
135    }
136    pub fn timeout(name: impl Into<String>, timeout_ms: u64) -> Self {
137        Self::Timeout {
138            name: name.into(),
139            timeout_ms,
140        }
141    }
142    pub fn access_denied(name: impl Into<String>) -> Self {
143        Self::AccessDenied { name: name.into() }
144    }
145    pub fn other(message: impl Into<String>) -> Self {
146        Self::Other(message.into())
147    }
148
149    /// Create a callback pending error for external tool routing
150    pub fn callback_pending(tool_name: impl Into<String>, args: serde_json::Value) -> Self {
151        Self::CallbackPending {
152            tool_name: tool_name.into(),
153            args,
154        }
155    }
156
157    /// Check if this is a callback pending error
158    pub fn is_callback_pending(&self) -> bool {
159        matches!(self, Self::CallbackPending { .. })
160    }
161
162    /// Extract callback pending info if this is a CallbackPending error
163    pub fn as_callback_pending(&self) -> Option<(&str, &serde_json::Value)> {
164        match self {
165            Self::CallbackPending { tool_name, args } => Some((tool_name, args)),
166            _ => None,
167        }
168    }
169}
170
171impl From<String> for ToolError {
172    fn from(s: String) -> Self {
173        Self::Other(s)
174    }
175}
176impl From<&str> for ToolError {
177    fn from(s: &str) -> Self {
178        Self::Other(s.to_string())
179    }
180}
181
182/// Errors that can occur during agent execution
183#[derive(Debug, thiserror::Error)]
184#[non_exhaustive]
185pub enum AgentError {
186    #[error("LLM error ({provider}): {message}")]
187    Llm {
188        provider: &'static str,
189        reason: LlmFailureReason,
190        message: String,
191    },
192    #[error("Storage error: {0}")]
193    StoreError(String),
194    #[error("Tool error: {0}")]
195    ToolError(String),
196    #[error("MCP error: {0}")]
197    McpError(String),
198    #[error("Session not found: {0}")]
199    SessionNotFound(SessionId),
200    #[error("Token budget exceeded: used {used}, limit {limit}")]
201    TokenBudgetExceeded { used: u64, limit: u64 },
202    #[error("Time budget exceeded: {elapsed_secs}s > {limit_secs}s")]
203    TimeBudgetExceeded { elapsed_secs: u64, limit_secs: u64 },
204    #[error("Tool call budget exceeded: {count} calls > {limit} limit")]
205    ToolCallBudgetExceeded { count: usize, limit: usize },
206    #[error("Max tokens reached on turn {turn}, partial output: {partial}")]
207    MaxTokensReached { turn: u32, partial: String },
208    #[error("Content filtered on turn {turn}")]
209    ContentFiltered { turn: u32 },
210    #[error("Max turns reached: {turns}")]
211    MaxTurnsReached { turns: u32 },
212    #[error("Run was cancelled")]
213    Cancelled,
214    #[error("Invalid state transition: {from} -> {to}")]
215    InvalidStateTransition { from: String, to: String },
216    #[error("Operation not found: {0}")]
217    OperationNotFound(String),
218    #[error("Depth limit exceeded: {depth} > {max}")]
219    DepthLimitExceeded { depth: u32, max: u32 },
220    #[error("Concurrency limit exceeded")]
221    ConcurrencyLimitExceeded,
222    #[error("Configuration error: {0}")]
223    ConfigError(String),
224    #[error("Invalid tool in access policy: {tool}")]
225    InvalidToolAccess { tool: String },
226    #[error("Internal error: {0}")]
227    InternalError(String),
228
229    /// Agent construction failed (e.g. missing API key, unknown provider).
230    #[error("Build error: {0}")]
231    BuildError(String),
232
233    /// A tool call must be routed externally (callback pending)
234    #[error("Callback pending for tool '{tool_name}'")]
235    CallbackPending {
236        tool_name: String,
237        args: serde_json::Value,
238    },
239
240    /// Structured output validation failed after retries
241    #[error("Structured output validation failed after {attempts} attempts: {reason}")]
242    StructuredOutputValidationFailed {
243        attempts: u32,
244        reason: String,
245        last_output: String,
246    },
247
248    /// Invalid output schema provided
249    #[error("Invalid output schema: {0}")]
250    InvalidOutputSchema(String),
251
252    #[error("Hook denied at {point:?}: {reason_code:?} - {message}")]
253    HookDenied {
254        point: HookPoint,
255        reason_code: HookReasonCode,
256        message: String,
257        payload: Option<serde_json::Value>,
258    },
259
260    #[error("Hook '{hook_id}' timed out after {timeout_ms}ms")]
261    HookTimeout { hook_id: String, timeout_ms: u64 },
262
263    #[error("Hook execution failed for '{hook_id}': {reason}")]
264    HookExecutionFailed { hook_id: String, reason: String },
265
266    #[error("Hook configuration invalid: {reason}")]
267    HookConfigInvalid { reason: String },
268
269    /// Turn execution reached a terminal outcome classified as HardFailure.
270    #[error("Terminal failure: {outcome:?}")]
271    TerminalFailure {
272        outcome: crate::turn_execution_authority::TurnTerminalOutcome,
273    },
274
275    /// The session has no pending user/tool-results boundary for `run_pending`.
276    ///
277    /// Returned when `RuntimeExecutionKind::ResumePending` is requested but the
278    /// session's last message is not `User` or `ToolResults`. The caller should
279    /// treat this as a successful no-op (no turn ran, no output produced).
280    #[error("no pending boundary for resume")]
281    NoPendingBoundary,
282}
283
284impl AgentError {
285    pub fn llm(
286        provider: &'static str,
287        reason: LlmFailureReason,
288        message: impl Into<String>,
289    ) -> Self {
290        Self::Llm {
291            provider,
292            reason,
293            message: message.into(),
294        }
295    }
296    pub fn is_graceful(&self) -> bool {
297        matches!(
298            self,
299            Self::TokenBudgetExceeded { .. }
300                | Self::TimeBudgetExceeded { .. }
301                | Self::ToolCallBudgetExceeded { .. }
302                | Self::MaxTurnsReached { .. }
303        )
304    }
305    pub fn is_rate_limited(&self) -> bool {
306        matches!(
307            self,
308            Self::Llm {
309                reason: LlmFailureReason::RateLimited { .. },
310                ..
311            }
312        )
313    }
314
315    pub fn retry_after_hint(&self) -> Option<std::time::Duration> {
316        match self {
317            Self::Llm {
318                reason: LlmFailureReason::RateLimited { retry_after },
319                ..
320            } => *retry_after,
321            _ => None,
322        }
323    }
324
325    pub fn is_recoverable(&self) -> bool {
326        match self {
327            Self::Llm { reason, .. } => match reason {
328                LlmFailureReason::RateLimited { .. } => true,
329                LlmFailureReason::NetworkTimeout { .. } => true,
330                LlmFailureReason::CallTimeout { .. } => true,
331                LlmFailureReason::ProviderError(value) => {
332                    value.get("retryable").and_then(serde_json::Value::as_bool) == Some(true)
333                }
334                _ => false,
335            },
336            _ => false,
337        }
338    }
339}
340
341pub fn store_error(err: impl std::fmt::Display) -> AgentError {
342    AgentError::StoreError(store_error_message(err))
343}
344pub fn invalid_session_id(err: impl std::fmt::Display) -> AgentError {
345    AgentError::StoreError(invalid_session_id_message(err))
346}
347pub fn store_error_message(err: impl std::fmt::Display) -> String {
348    err.to_string()
349}
350pub fn invalid_session_id_message(err: impl std::fmt::Display) -> String {
351    format!("Invalid session ID: {err}")
352}
353
354#[cfg(test)]
355#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
356mod tests {
357    use super::*;
358
359    #[test]
360    fn test_network_timeout_is_recoverable() {
361        let err = AgentError::llm(
362            "anthropic",
363            LlmFailureReason::NetworkTimeout { duration_ms: 30000 },
364            "network timeout after 30s",
365        );
366        assert!(err.is_recoverable());
367    }
368
369    #[test]
370    fn test_call_timeout_is_recoverable() {
371        let err = AgentError::llm(
372            "anthropic",
373            LlmFailureReason::CallTimeout { duration_ms: 45000 },
374            "call timeout after 45s",
375        );
376        assert!(err.is_recoverable());
377    }
378
379    #[test]
380    fn test_network_timeout_typed_mapping() {
381        let reason = LlmFailureReason::NetworkTimeout { duration_ms: 5000 };
382        match reason {
383            LlmFailureReason::NetworkTimeout { duration_ms } => {
384                assert_eq!(duration_ms, 5000);
385            }
386            _ => panic!("expected NetworkTimeout"),
387        }
388    }
389
390    #[test]
391    fn test_call_timeout_typed_mapping() {
392        let reason = LlmFailureReason::CallTimeout { duration_ms: 60000 };
393        match reason {
394            LlmFailureReason::CallTimeout { duration_ms } => {
395                assert_eq!(duration_ms, 60000);
396            }
397            _ => panic!("expected CallTimeout"),
398        }
399    }
400
401    #[test]
402    fn test_timeout_variants_are_distinct() {
403        let net = LlmFailureReason::NetworkTimeout { duration_ms: 1000 };
404        let call = LlmFailureReason::CallTimeout { duration_ms: 1000 };
405        assert_ne!(net, call);
406    }
407
408    #[test]
409    fn test_auth_error_not_recoverable() {
410        let err = AgentError::llm("anthropic", LlmFailureReason::AuthError, "bad key");
411        assert!(!err.is_recoverable());
412    }
413
414    // -- Rate-limit helper tests (PR #156 port) --
415
416    #[test]
417    fn test_is_rate_limited_true_for_rate_limit_error() {
418        let err = AgentError::llm(
419            "anthropic",
420            LlmFailureReason::RateLimited {
421                retry_after: Some(std::time::Duration::from_secs(30)),
422            },
423            "rate limited",
424        );
425        assert!(err.is_rate_limited());
426    }
427
428    #[test]
429    fn test_is_rate_limited_false_for_other_errors() {
430        let err = AgentError::llm(
431            "anthropic",
432            LlmFailureReason::NetworkTimeout { duration_ms: 5000 },
433            "timeout",
434        );
435        assert!(!err.is_rate_limited());
436
437        let err = AgentError::llm("anthropic", LlmFailureReason::AuthError, "bad key");
438        assert!(!err.is_rate_limited());
439    }
440
441    #[test]
442    fn test_retry_after_hint_returns_duration_for_rate_limit() {
443        let err = AgentError::llm(
444            "anthropic",
445            LlmFailureReason::RateLimited {
446                retry_after: Some(std::time::Duration::from_secs(60)),
447            },
448            "rate limited",
449        );
450        assert_eq!(
451            err.retry_after_hint(),
452            Some(std::time::Duration::from_secs(60))
453        );
454    }
455
456    #[test]
457    fn test_retry_after_hint_returns_none_for_non_rate_limit() {
458        let err = AgentError::llm(
459            "anthropic",
460            LlmFailureReason::NetworkTimeout { duration_ms: 5000 },
461            "timeout",
462        );
463        assert_eq!(err.retry_after_hint(), None);
464    }
465
466    #[test]
467    fn test_timeout_variants_not_graceful() {
468        let err = AgentError::llm(
469            "anthropic",
470            LlmFailureReason::NetworkTimeout { duration_ms: 1000 },
471            "timeout",
472        );
473        assert!(!err.is_graceful());
474
475        let err = AgentError::llm(
476            "anthropic",
477            LlmFailureReason::CallTimeout { duration_ms: 1000 },
478            "timeout",
479        );
480        assert!(!err.is_graceful());
481    }
482
483    // -- P2-6: Typed BuildError variant --
484
485    #[test]
486    fn test_build_error_variant_exists_and_carries_message() {
487        let err = AgentError::BuildError("Missing API key for provider 'anthropic'".to_string());
488        match &err {
489            AgentError::BuildError(msg) => {
490                assert!(
491                    msg.contains("API key"),
492                    "message should contain source text"
493                );
494            }
495            other => panic!("expected BuildError, got: {other}"),
496        }
497    }
498
499    #[test]
500    fn test_build_error_is_not_recoverable() {
501        let err = AgentError::BuildError("Unknown provider for model 'llama-3'".to_string());
502        assert!(!err.is_recoverable(), "build errors are not recoverable");
503    }
504
505    #[test]
506    fn test_build_error_is_not_graceful() {
507        let err = AgentError::BuildError("Missing API key".to_string());
508        assert!(!err.is_graceful(), "build errors are not graceful");
509    }
510
511    #[test]
512    fn test_build_error_display() {
513        let err = AgentError::BuildError("Missing API key for provider 'anthropic'".to_string());
514        let display = err.to_string();
515        assert!(
516            display.contains("Build error")
517                || display.contains("build error")
518                || display.contains("Missing API key"),
519            "display should mention the build error: {display}"
520        );
521    }
522
523    // -- P2-7: Typed TerminalFailure outcome --
524
525    #[test]
526    fn test_terminal_failure_carries_typed_outcome() {
527        use crate::turn_execution_authority::TurnTerminalOutcome;
528
529        // TerminalFailure must carry the typed enum, not a Debug-formatted string.
530        let err = AgentError::TerminalFailure {
531            outcome: TurnTerminalOutcome::Failed,
532        };
533        match &err {
534            AgentError::TerminalFailure { outcome } => {
535                // If this compiles, outcome is TurnTerminalOutcome, not String.
536                assert_eq!(*outcome, TurnTerminalOutcome::Failed);
537            }
538            other => panic!("expected TerminalFailure, got: {other}"),
539        }
540    }
541
542    #[test]
543    fn test_terminal_failure_display_includes_outcome() {
544        use crate::turn_execution_authority::TurnTerminalOutcome;
545
546        let err = AgentError::TerminalFailure {
547            outcome: TurnTerminalOutcome::TimeBudgetExceeded,
548        };
549        let display = err.to_string();
550        assert!(
551            display.contains("TimeBudgetExceeded"),
552            "display should include the outcome variant name: {display}"
553        );
554    }
555
556    #[test]
557    fn test_terminal_failure_all_hard_failure_outcomes() {
558        use crate::turn_execution_authority::TurnTerminalOutcome;
559
560        // Both hard-failure outcomes should be representable.
561        for outcome in [
562            TurnTerminalOutcome::Failed,
563            TurnTerminalOutcome::TimeBudgetExceeded,
564        ] {
565            let err = AgentError::TerminalFailure { outcome };
566            assert!(
567                !err.is_graceful(),
568                "TerminalFailure({outcome:?}) should not be graceful"
569            );
570        }
571    }
572}