1use crate::hooks::{HookPoint, HookReasonCode};
4use crate::types::SessionId;
5
6#[derive(Debug, Clone, PartialEq)]
7#[non_exhaustive]
8pub enum LlmFailureReason {
9 RateLimited {
10 retry_after: Option<std::time::Duration>,
11 },
12 ContextExceeded {
13 max: u32,
14 requested: u32,
15 },
16 AuthError,
17 InvalidModel(String),
18 ProviderError(serde_json::Value),
19 NetworkTimeout {
21 duration_ms: u64,
22 },
23 CallTimeout {
25 duration_ms: u64,
26 },
27}
28
29#[derive(Debug, Clone, thiserror::Error, PartialEq)]
31pub enum ToolValidationError {
32 #[error("Tool not found: {name}")]
34 NotFound { name: String },
35 #[error("Invalid arguments for tool '{name}': {reason}")]
37 InvalidArguments { name: String, reason: String },
38}
39
40impl ToolValidationError {
41 pub fn not_found(name: impl Into<String>) -> Self {
42 Self::NotFound { name: name.into() }
43 }
44 pub fn invalid_arguments(name: impl Into<String>, reason: impl Into<String>) -> Self {
45 Self::InvalidArguments {
46 name: name.into(),
47 reason: reason.into(),
48 }
49 }
50}
51
52#[derive(Debug, Clone, thiserror::Error)]
54pub enum ToolError {
55 #[error("Tool not found: {name}")]
57 NotFound { name: String },
58
59 #[error("Tool '{name}' is currently unavailable: {reason}")]
61 Unavailable { name: String, reason: String },
62
63 #[error("Invalid arguments for tool '{name}': {reason}")]
65 InvalidArguments { name: String, reason: String },
66
67 #[error("Tool execution failed: {message}")]
69 ExecutionFailed { message: String },
70
71 #[error("Tool '{name}' timed out after {timeout_ms}ms")]
73 Timeout { name: String, timeout_ms: u64 },
74
75 #[error("Tool '{name}' is not allowed by policy")]
77 AccessDenied { name: String },
78
79 #[error("{0}")]
81 Other(String),
82
83 #[error("Callback pending for tool '{tool_name}'")]
89 CallbackPending {
90 tool_name: String,
91 args: serde_json::Value,
92 },
93}
94
95impl ToolError {
96 pub fn error_code(&self) -> &'static str {
97 match self {
98 Self::NotFound { .. } => "tool_not_found",
99 Self::Unavailable { .. } => "tool_unavailable",
100 Self::InvalidArguments { .. } => "invalid_arguments",
101 Self::ExecutionFailed { .. } => "execution_failed",
102 Self::Timeout { .. } => "timeout",
103 Self::AccessDenied { .. } => "access_denied",
104 Self::Other(_) => "tool_error",
105 Self::CallbackPending { .. } => "callback_pending",
106 }
107 }
108
109 pub fn to_error_payload(&self) -> serde_json::Value {
110 serde_json::json!({
111 "error": self.error_code(),
112 "message": self.to_string(),
113 })
114 }
115
116 pub fn not_found(name: impl Into<String>) -> Self {
117 Self::NotFound { name: name.into() }
118 }
119 pub fn unavailable(name: impl Into<String>, reason: impl Into<String>) -> Self {
120 Self::Unavailable {
121 name: name.into(),
122 reason: reason.into(),
123 }
124 }
125 pub fn invalid_arguments(name: impl Into<String>, reason: impl Into<String>) -> Self {
126 Self::InvalidArguments {
127 name: name.into(),
128 reason: reason.into(),
129 }
130 }
131 pub fn execution_failed(message: impl Into<String>) -> Self {
132 Self::ExecutionFailed {
133 message: message.into(),
134 }
135 }
136 pub fn timeout(name: impl Into<String>, timeout_ms: u64) -> Self {
137 Self::Timeout {
138 name: name.into(),
139 timeout_ms,
140 }
141 }
142 pub fn access_denied(name: impl Into<String>) -> Self {
143 Self::AccessDenied { name: name.into() }
144 }
145 pub fn other(message: impl Into<String>) -> Self {
146 Self::Other(message.into())
147 }
148
149 pub fn callback_pending(tool_name: impl Into<String>, args: serde_json::Value) -> Self {
151 Self::CallbackPending {
152 tool_name: tool_name.into(),
153 args,
154 }
155 }
156
157 pub fn is_callback_pending(&self) -> bool {
159 matches!(self, Self::CallbackPending { .. })
160 }
161
162 pub fn as_callback_pending(&self) -> Option<(&str, &serde_json::Value)> {
164 match self {
165 Self::CallbackPending { tool_name, args } => Some((tool_name, args)),
166 _ => None,
167 }
168 }
169}
170
171impl From<String> for ToolError {
172 fn from(s: String) -> Self {
173 Self::Other(s)
174 }
175}
176impl From<&str> for ToolError {
177 fn from(s: &str) -> Self {
178 Self::Other(s.to_string())
179 }
180}
181
182#[derive(Debug, thiserror::Error)]
184#[non_exhaustive]
185pub enum AgentError {
186 #[error("LLM error ({provider}): {message}")]
187 Llm {
188 provider: &'static str,
189 reason: LlmFailureReason,
190 message: String,
191 },
192 #[error("Storage error: {0}")]
193 StoreError(String),
194 #[error("Tool error: {0}")]
195 ToolError(String),
196 #[error("MCP error: {0}")]
197 McpError(String),
198 #[error("Session not found: {0}")]
199 SessionNotFound(SessionId),
200 #[error("Token budget exceeded: used {used}, limit {limit}")]
201 TokenBudgetExceeded { used: u64, limit: u64 },
202 #[error("Time budget exceeded: {elapsed_secs}s > {limit_secs}s")]
203 TimeBudgetExceeded { elapsed_secs: u64, limit_secs: u64 },
204 #[error("Tool call budget exceeded: {count} calls > {limit} limit")]
205 ToolCallBudgetExceeded { count: usize, limit: usize },
206 #[error("Max tokens reached on turn {turn}, partial output: {partial}")]
207 MaxTokensReached { turn: u32, partial: String },
208 #[error("Content filtered on turn {turn}")]
209 ContentFiltered { turn: u32 },
210 #[error("Max turns reached: {turns}")]
211 MaxTurnsReached { turns: u32 },
212 #[error("Run was cancelled")]
213 Cancelled,
214 #[error("Invalid state transition: {from} -> {to}")]
215 InvalidStateTransition { from: String, to: String },
216 #[error("Operation not found: {0}")]
217 OperationNotFound(String),
218 #[error("Depth limit exceeded: {depth} > {max}")]
219 DepthLimitExceeded { depth: u32, max: u32 },
220 #[error("Concurrency limit exceeded")]
221 ConcurrencyLimitExceeded,
222 #[error("Configuration error: {0}")]
223 ConfigError(String),
224 #[error("Invalid tool in access policy: {tool}")]
225 InvalidToolAccess { tool: String },
226 #[error("Internal error: {0}")]
227 InternalError(String),
228
229 #[error("Build error: {0}")]
231 BuildError(String),
232
233 #[error("Callback pending for tool '{tool_name}'")]
235 CallbackPending {
236 tool_name: String,
237 args: serde_json::Value,
238 },
239
240 #[error("Structured output validation failed after {attempts} attempts: {reason}")]
242 StructuredOutputValidationFailed {
243 attempts: u32,
244 reason: String,
245 last_output: String,
246 },
247
248 #[error("Invalid output schema: {0}")]
250 InvalidOutputSchema(String),
251
252 #[error("Hook denied at {point:?}: {reason_code:?} - {message}")]
253 HookDenied {
254 point: HookPoint,
255 reason_code: HookReasonCode,
256 message: String,
257 payload: Option<serde_json::Value>,
258 },
259
260 #[error("Hook '{hook_id}' timed out after {timeout_ms}ms")]
261 HookTimeout { hook_id: String, timeout_ms: u64 },
262
263 #[error("Hook execution failed for '{hook_id}': {reason}")]
264 HookExecutionFailed { hook_id: String, reason: String },
265
266 #[error("Hook configuration invalid: {reason}")]
267 HookConfigInvalid { reason: String },
268
269 #[error("Terminal failure: {outcome:?}")]
271 TerminalFailure {
272 outcome: crate::turn_execution_authority::TurnTerminalOutcome,
273 },
274
275 #[error("no pending boundary for resume")]
281 NoPendingBoundary,
282}
283
284impl AgentError {
285 pub fn llm(
286 provider: &'static str,
287 reason: LlmFailureReason,
288 message: impl Into<String>,
289 ) -> Self {
290 Self::Llm {
291 provider,
292 reason,
293 message: message.into(),
294 }
295 }
296 pub fn is_graceful(&self) -> bool {
297 matches!(
298 self,
299 Self::TokenBudgetExceeded { .. }
300 | Self::TimeBudgetExceeded { .. }
301 | Self::ToolCallBudgetExceeded { .. }
302 | Self::MaxTurnsReached { .. }
303 )
304 }
305 pub fn is_rate_limited(&self) -> bool {
306 matches!(
307 self,
308 Self::Llm {
309 reason: LlmFailureReason::RateLimited { .. },
310 ..
311 }
312 )
313 }
314
315 pub fn retry_after_hint(&self) -> Option<std::time::Duration> {
316 match self {
317 Self::Llm {
318 reason: LlmFailureReason::RateLimited { retry_after },
319 ..
320 } => *retry_after,
321 _ => None,
322 }
323 }
324
325 pub fn is_recoverable(&self) -> bool {
326 match self {
327 Self::Llm { reason, .. } => match reason {
328 LlmFailureReason::RateLimited { .. } => true,
329 LlmFailureReason::NetworkTimeout { .. } => true,
330 LlmFailureReason::CallTimeout { .. } => true,
331 LlmFailureReason::ProviderError(value) => {
332 value.get("retryable").and_then(serde_json::Value::as_bool) == Some(true)
333 }
334 _ => false,
335 },
336 _ => false,
337 }
338 }
339}
340
341pub fn store_error(err: impl std::fmt::Display) -> AgentError {
342 AgentError::StoreError(store_error_message(err))
343}
344pub fn invalid_session_id(err: impl std::fmt::Display) -> AgentError {
345 AgentError::StoreError(invalid_session_id_message(err))
346}
347pub fn store_error_message(err: impl std::fmt::Display) -> String {
348 err.to_string()
349}
350pub fn invalid_session_id_message(err: impl std::fmt::Display) -> String {
351 format!("Invalid session ID: {err}")
352}
353
354#[cfg(test)]
355#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
356mod tests {
357 use super::*;
358
359 #[test]
360 fn test_network_timeout_is_recoverable() {
361 let err = AgentError::llm(
362 "anthropic",
363 LlmFailureReason::NetworkTimeout { duration_ms: 30000 },
364 "network timeout after 30s",
365 );
366 assert!(err.is_recoverable());
367 }
368
369 #[test]
370 fn test_call_timeout_is_recoverable() {
371 let err = AgentError::llm(
372 "anthropic",
373 LlmFailureReason::CallTimeout { duration_ms: 45000 },
374 "call timeout after 45s",
375 );
376 assert!(err.is_recoverable());
377 }
378
379 #[test]
380 fn test_network_timeout_typed_mapping() {
381 let reason = LlmFailureReason::NetworkTimeout { duration_ms: 5000 };
382 match reason {
383 LlmFailureReason::NetworkTimeout { duration_ms } => {
384 assert_eq!(duration_ms, 5000);
385 }
386 _ => panic!("expected NetworkTimeout"),
387 }
388 }
389
390 #[test]
391 fn test_call_timeout_typed_mapping() {
392 let reason = LlmFailureReason::CallTimeout { duration_ms: 60000 };
393 match reason {
394 LlmFailureReason::CallTimeout { duration_ms } => {
395 assert_eq!(duration_ms, 60000);
396 }
397 _ => panic!("expected CallTimeout"),
398 }
399 }
400
401 #[test]
402 fn test_timeout_variants_are_distinct() {
403 let net = LlmFailureReason::NetworkTimeout { duration_ms: 1000 };
404 let call = LlmFailureReason::CallTimeout { duration_ms: 1000 };
405 assert_ne!(net, call);
406 }
407
408 #[test]
409 fn test_auth_error_not_recoverable() {
410 let err = AgentError::llm("anthropic", LlmFailureReason::AuthError, "bad key");
411 assert!(!err.is_recoverable());
412 }
413
414 #[test]
417 fn test_is_rate_limited_true_for_rate_limit_error() {
418 let err = AgentError::llm(
419 "anthropic",
420 LlmFailureReason::RateLimited {
421 retry_after: Some(std::time::Duration::from_secs(30)),
422 },
423 "rate limited",
424 );
425 assert!(err.is_rate_limited());
426 }
427
428 #[test]
429 fn test_is_rate_limited_false_for_other_errors() {
430 let err = AgentError::llm(
431 "anthropic",
432 LlmFailureReason::NetworkTimeout { duration_ms: 5000 },
433 "timeout",
434 );
435 assert!(!err.is_rate_limited());
436
437 let err = AgentError::llm("anthropic", LlmFailureReason::AuthError, "bad key");
438 assert!(!err.is_rate_limited());
439 }
440
441 #[test]
442 fn test_retry_after_hint_returns_duration_for_rate_limit() {
443 let err = AgentError::llm(
444 "anthropic",
445 LlmFailureReason::RateLimited {
446 retry_after: Some(std::time::Duration::from_secs(60)),
447 },
448 "rate limited",
449 );
450 assert_eq!(
451 err.retry_after_hint(),
452 Some(std::time::Duration::from_secs(60))
453 );
454 }
455
456 #[test]
457 fn test_retry_after_hint_returns_none_for_non_rate_limit() {
458 let err = AgentError::llm(
459 "anthropic",
460 LlmFailureReason::NetworkTimeout { duration_ms: 5000 },
461 "timeout",
462 );
463 assert_eq!(err.retry_after_hint(), None);
464 }
465
466 #[test]
467 fn test_timeout_variants_not_graceful() {
468 let err = AgentError::llm(
469 "anthropic",
470 LlmFailureReason::NetworkTimeout { duration_ms: 1000 },
471 "timeout",
472 );
473 assert!(!err.is_graceful());
474
475 let err = AgentError::llm(
476 "anthropic",
477 LlmFailureReason::CallTimeout { duration_ms: 1000 },
478 "timeout",
479 );
480 assert!(!err.is_graceful());
481 }
482
483 #[test]
486 fn test_build_error_variant_exists_and_carries_message() {
487 let err = AgentError::BuildError("Missing API key for provider 'anthropic'".to_string());
488 match &err {
489 AgentError::BuildError(msg) => {
490 assert!(
491 msg.contains("API key"),
492 "message should contain source text"
493 );
494 }
495 other => panic!("expected BuildError, got: {other}"),
496 }
497 }
498
499 #[test]
500 fn test_build_error_is_not_recoverable() {
501 let err = AgentError::BuildError("Unknown provider for model 'llama-3'".to_string());
502 assert!(!err.is_recoverable(), "build errors are not recoverable");
503 }
504
505 #[test]
506 fn test_build_error_is_not_graceful() {
507 let err = AgentError::BuildError("Missing API key".to_string());
508 assert!(!err.is_graceful(), "build errors are not graceful");
509 }
510
511 #[test]
512 fn test_build_error_display() {
513 let err = AgentError::BuildError("Missing API key for provider 'anthropic'".to_string());
514 let display = err.to_string();
515 assert!(
516 display.contains("Build error")
517 || display.contains("build error")
518 || display.contains("Missing API key"),
519 "display should mention the build error: {display}"
520 );
521 }
522
523 #[test]
526 fn test_terminal_failure_carries_typed_outcome() {
527 use crate::turn_execution_authority::TurnTerminalOutcome;
528
529 let err = AgentError::TerminalFailure {
531 outcome: TurnTerminalOutcome::Failed,
532 };
533 match &err {
534 AgentError::TerminalFailure { outcome } => {
535 assert_eq!(*outcome, TurnTerminalOutcome::Failed);
537 }
538 other => panic!("expected TerminalFailure, got: {other}"),
539 }
540 }
541
542 #[test]
543 fn test_terminal_failure_display_includes_outcome() {
544 use crate::turn_execution_authority::TurnTerminalOutcome;
545
546 let err = AgentError::TerminalFailure {
547 outcome: TurnTerminalOutcome::TimeBudgetExceeded,
548 };
549 let display = err.to_string();
550 assert!(
551 display.contains("TimeBudgetExceeded"),
552 "display should include the outcome variant name: {display}"
553 );
554 }
555
556 #[test]
557 fn test_terminal_failure_all_hard_failure_outcomes() {
558 use crate::turn_execution_authority::TurnTerminalOutcome;
559
560 for outcome in [
562 TurnTerminalOutcome::Failed,
563 TurnTerminalOutcome::TimeBudgetExceeded,
564 ] {
565 let err = AgentError::TerminalFailure { outcome };
566 assert!(
567 !err.is_graceful(),
568 "TerminalFailure({outcome:?}) should not be graceful"
569 );
570 }
571 }
572}