1use std::borrow::Cow;
22use std::fmt;
23use std::time::Duration;
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
28pub enum ErrorCategory {
29 Network,
32 Timeout,
34 RateLimit,
36 ServiceUnavailable,
38 CircuitOpen,
40
41 Authentication,
44 InvalidParameters,
46 ToolNotFound,
48 ResourceNotFound,
50 PermissionDenied,
52 PolicyViolation,
54 PlanModeViolation,
56 SandboxFailure,
58 ResourceExhausted,
60 Cancelled,
62 ExecutionError,
64}
65
66#[derive(Debug, Clone, PartialEq, Eq)]
68pub enum Retryability {
69 Retryable {
71 max_attempts: u32,
73 backoff: BackoffStrategy,
75 },
76 NonRetryable,
78 RequiresIntervention,
80}
81
82#[derive(Debug, Clone, PartialEq, Eq)]
84pub enum BackoffStrategy {
85 Exponential { base: Duration, max: Duration },
87 Fixed(Duration),
89}
90
91impl ErrorCategory {
92 #[inline]
94 pub const fn is_retryable(&self) -> bool {
95 matches!(
96 self,
97 ErrorCategory::Network
98 | ErrorCategory::Timeout
99 | ErrorCategory::RateLimit
100 | ErrorCategory::ServiceUnavailable
101 | ErrorCategory::CircuitOpen
102 )
103 }
104
105 #[inline]
108 pub const fn is_llm_mistake(&self) -> bool {
109 matches!(self, ErrorCategory::InvalidParameters)
110 }
111
112 #[inline]
114 pub const fn is_permanent(&self) -> bool {
115 matches!(
116 self,
117 ErrorCategory::Authentication
118 | ErrorCategory::PolicyViolation
119 | ErrorCategory::PlanModeViolation
120 | ErrorCategory::ResourceExhausted
121 )
122 }
123
124 pub fn retryability(&self) -> Retryability {
126 match self {
127 ErrorCategory::Network | ErrorCategory::ServiceUnavailable => Retryability::Retryable {
128 max_attempts: 3,
129 backoff: BackoffStrategy::Exponential {
130 base: Duration::from_millis(500),
131 max: Duration::from_secs(10),
132 },
133 },
134 ErrorCategory::Timeout => Retryability::Retryable {
135 max_attempts: 2,
136 backoff: BackoffStrategy::Exponential {
137 base: Duration::from_millis(1000),
138 max: Duration::from_secs(15),
139 },
140 },
141 ErrorCategory::RateLimit => Retryability::Retryable {
142 max_attempts: 3,
143 backoff: BackoffStrategy::Exponential {
144 base: Duration::from_secs(1),
145 max: Duration::from_secs(30),
146 },
147 },
148 ErrorCategory::CircuitOpen => Retryability::Retryable {
149 max_attempts: 1,
150 backoff: BackoffStrategy::Fixed(Duration::from_secs(10)),
151 },
152 ErrorCategory::PermissionDenied => Retryability::RequiresIntervention,
153 _ => Retryability::NonRetryable,
154 }
155 }
156
157 pub fn recovery_suggestions(&self) -> Vec<Cow<'static, str>> {
160 match self {
161 ErrorCategory::Network => vec![
162 Cow::Borrowed("Check network connectivity"),
163 Cow::Borrowed("Retry the operation after a brief delay"),
164 Cow::Borrowed("Verify external service availability"),
165 ],
166 ErrorCategory::Timeout => vec![
167 Cow::Borrowed("Increase timeout values if appropriate"),
168 Cow::Borrowed("Break large operations into smaller chunks"),
169 Cow::Borrowed("Check system resources and performance"),
170 ],
171 ErrorCategory::RateLimit => vec![
172 Cow::Borrowed("Wait before retrying the request"),
173 Cow::Borrowed("Reduce request frequency"),
174 Cow::Borrowed("Check provider rate limit documentation"),
175 ],
176 ErrorCategory::ServiceUnavailable => vec![
177 Cow::Borrowed("The service is temporarily unavailable"),
178 Cow::Borrowed("Retry after a brief delay"),
179 Cow::Borrowed("Check service status page if available"),
180 ],
181 ErrorCategory::CircuitOpen => vec![
182 Cow::Borrowed("This tool has been temporarily disabled due to repeated failures"),
183 Cow::Borrowed("Wait for the circuit breaker cooldown period"),
184 Cow::Borrowed("Try an alternative approach"),
185 ],
186 ErrorCategory::Authentication => vec![
187 Cow::Borrowed("Verify your API key or credentials"),
188 Cow::Borrowed("Check that your account is active and has sufficient permissions"),
189 Cow::Borrowed("Ensure environment variables for API keys are set correctly"),
190 ],
191 ErrorCategory::InvalidParameters => vec![
192 Cow::Borrowed("Check parameter names and types against the tool schema"),
193 Cow::Borrowed("Ensure required parameters are provided"),
194 Cow::Borrowed("Verify parameter values are within acceptable ranges"),
195 ],
196 ErrorCategory::ToolNotFound => vec![
197 Cow::Borrowed("Verify the tool name is spelled correctly"),
198 Cow::Borrowed("Check if the tool is available in the current context"),
199 ],
200 ErrorCategory::ResourceNotFound => vec![
201 Cow::Borrowed("Verify file paths and resource locations"),
202 Cow::Borrowed("Check if files exist and are accessible"),
203 Cow::Borrowed("Use list_dir to explore available resources"),
204 ],
205 ErrorCategory::PermissionDenied => vec![
206 Cow::Borrowed("Check file permissions and access rights"),
207 Cow::Borrowed("Ensure workspace boundaries are respected"),
208 ],
209 ErrorCategory::PolicyViolation => vec![
210 Cow::Borrowed("Review workspace policies and restrictions"),
211 Cow::Borrowed("Use alternative tools that comply with policies"),
212 ],
213 ErrorCategory::PlanModeViolation => vec![
214 Cow::Borrowed("This operation is not allowed in plan/read-only mode"),
215 Cow::Borrowed("Exit plan mode to perform mutating operations"),
216 ],
217 ErrorCategory::SandboxFailure => vec![
218 Cow::Borrowed("The sandbox denied this operation"),
219 Cow::Borrowed("Check sandbox configuration and permissions"),
220 ],
221 ErrorCategory::ResourceExhausted => vec![
222 Cow::Borrowed("Check your account usage limits and billing status"),
223 Cow::Borrowed("Review resource consumption and optimize if possible"),
224 ],
225 ErrorCategory::Cancelled => vec![Cow::Borrowed("The operation was cancelled")],
226 ErrorCategory::ExecutionError => vec![
227 Cow::Borrowed("Review error details for specific issues"),
228 Cow::Borrowed("Check tool documentation for known limitations"),
229 ],
230 }
231 }
232
233 pub const fn user_label(&self) -> &'static str {
235 match self {
236 ErrorCategory::Network => "Network error",
237 ErrorCategory::Timeout => "Request timed out",
238 ErrorCategory::RateLimit => "Rate limit exceeded",
239 ErrorCategory::ServiceUnavailable => "Service temporarily unavailable",
240 ErrorCategory::CircuitOpen => "Tool temporarily disabled",
241 ErrorCategory::Authentication => "Authentication failed",
242 ErrorCategory::InvalidParameters => "Invalid parameters",
243 ErrorCategory::ToolNotFound => "Tool not found",
244 ErrorCategory::ResourceNotFound => "Resource not found",
245 ErrorCategory::PermissionDenied => "Permission denied",
246 ErrorCategory::PolicyViolation => "Blocked by policy",
247 ErrorCategory::PlanModeViolation => "Not allowed in plan mode",
248 ErrorCategory::SandboxFailure => "Sandbox denied",
249 ErrorCategory::ResourceExhausted => "Resource limit reached",
250 ErrorCategory::Cancelled => "Operation cancelled",
251 ErrorCategory::ExecutionError => "Execution failed",
252 }
253 }
254}
255
256impl fmt::Display for ErrorCategory {
257 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
258 f.write_str(self.user_label())
259 }
260}
261
262pub fn classify_anyhow_error(err: &anyhow::Error) -> ErrorCategory {
272 let msg = err.to_string().to_ascii_lowercase();
273 classify_error_message(&msg)
274}
275
276pub fn classify_error_message(msg: &str) -> ErrorCategory {
281 let msg = if msg.as_bytes().iter().any(|b| b.is_ascii_uppercase()) {
282 Cow::Owned(msg.to_ascii_lowercase())
283 } else {
284 Cow::Borrowed(msg)
285 };
286
287 if contains_any(
289 &msg,
290 &[
291 "policy violation",
292 "denied by policy",
293 "tool permission denied",
294 "safety validation failed",
295 "not allowed in plan mode",
296 "only available when plan mode is active",
297 "workspace boundary",
298 "blocked by policy",
299 ],
300 ) {
301 return ErrorCategory::PolicyViolation;
302 }
303
304 if contains_any(
306 &msg,
307 &["plan mode", "read-only mode", "plan_mode_violation"],
308 ) {
309 return ErrorCategory::PlanModeViolation;
310 }
311
312 if contains_any(
314 &msg,
315 &[
316 "invalid api key",
317 "authentication failed",
318 "unauthorized",
319 "401",
320 "invalid credentials",
321 ],
322 ) {
323 return ErrorCategory::Authentication;
324 }
325
326 if contains_any(
328 &msg,
329 &[
330 "weekly usage limit",
331 "daily usage limit",
332 "monthly spending limit",
333 "insufficient credits",
334 "quota exceeded",
335 "billing",
336 "payment required",
337 ],
338 ) {
339 return ErrorCategory::ResourceExhausted;
340 }
341
342 if contains_any(
344 &msg,
345 &[
346 "invalid argument",
347 "invalid parameters",
348 "malformed",
349 "missing required",
350 "schema validation",
351 "argument validation failed",
352 "unknown field",
353 "type mismatch",
354 ],
355 ) {
356 return ErrorCategory::InvalidParameters;
357 }
358
359 if contains_any(
361 &msg,
362 &[
363 "tool not found",
364 "unknown tool",
365 "unsupported tool",
366 "no such tool",
367 ],
368 ) {
369 return ErrorCategory::ToolNotFound;
370 }
371
372 if contains_any(
374 &msg,
375 &[
376 "no such file",
377 "no such directory",
378 "file not found",
379 "directory not found",
380 "resource not found",
381 "path not found",
382 "enoent",
383 ],
384 ) {
385 return ErrorCategory::ResourceNotFound;
386 }
387
388 if contains_any(
390 &msg,
391 &[
392 "permission denied",
393 "access denied",
394 "operation not permitted",
395 "eacces",
396 "eperm",
397 "forbidden",
398 "403",
399 ],
400 ) {
401 return ErrorCategory::PermissionDenied;
402 }
403
404 if contains_any(&msg, &["cancelled", "interrupted", "canceled"]) {
406 return ErrorCategory::Cancelled;
407 }
408
409 if contains_any(&msg, &["circuit breaker", "circuit open"]) {
411 return ErrorCategory::CircuitOpen;
412 }
413
414 if contains_any(&msg, &["sandbox denied", "sandbox failure"]) {
416 return ErrorCategory::SandboxFailure;
417 }
418
419 if contains_any(&msg, &["rate limit", "too many requests", "429", "throttl"]) {
421 return ErrorCategory::RateLimit;
422 }
423
424 if contains_any(&msg, &["timeout", "timed out", "deadline exceeded"]) {
426 return ErrorCategory::Timeout;
427 }
428
429 if contains_any(
431 &msg,
432 &[
433 "invalid response format: missing choices",
434 "invalid response format: missing message",
435 "missing choices in response",
436 "missing message in choice",
437 "no choices in response",
438 "invalid response from ",
439 "empty response body",
440 "response did not contain",
441 "unexpected response format",
442 "failed to parse response",
443 ],
444 ) {
445 return ErrorCategory::ServiceUnavailable;
446 }
447
448 if contains_any(
450 &msg,
451 &[
452 "network",
453 "connection reset",
454 "connection refused",
455 "broken pipe",
456 "dns",
457 "name resolution",
458 "service unavailable",
459 "temporarily unavailable",
460 "internal server error",
461 "bad gateway",
462 "gateway timeout",
463 "overloaded",
464 "try again",
465 "retry later",
466 "500",
467 "502",
468 "503",
469 "504",
470 "upstream connect error",
471 "tls handshake",
472 "socket hang up",
473 "econnreset",
474 "etimedout",
475 ],
476 ) {
477 return ErrorCategory::Network;
478 }
479
480 if contains_any(&msg, &["out of memory", "disk full", "no space left"]) {
482 return ErrorCategory::ResourceExhausted;
483 }
484
485 ErrorCategory::ExecutionError
487}
488
489pub fn is_retryable_llm_error_message(msg: &str) -> bool {
494 let category = classify_error_message(msg);
495 category.is_retryable()
496}
497
498#[inline]
499fn contains_any(message: &str, markers: &[&str]) -> bool {
500 markers.iter().any(|marker| message.contains(marker))
501}
502
503impl From<&crate::llm::LLMError> for ErrorCategory {
508 fn from(err: &crate::llm::LLMError) -> Self {
509 match err {
510 crate::llm::LLMError::Authentication { .. } => ErrorCategory::Authentication,
511 crate::llm::LLMError::RateLimit { .. } => ErrorCategory::RateLimit,
512 crate::llm::LLMError::InvalidRequest { .. } => ErrorCategory::InvalidParameters,
513 crate::llm::LLMError::Network { .. } => ErrorCategory::Network,
514 crate::llm::LLMError::Provider { message, metadata } => {
515 if let Some(meta) = metadata
517 && let Some(status) = meta.status
518 {
519 return match status {
520 401 => ErrorCategory::Authentication,
521 403 => ErrorCategory::PermissionDenied,
522 404 => ErrorCategory::ResourceNotFound,
523 429 => ErrorCategory::RateLimit,
524 400 => ErrorCategory::InvalidParameters,
525 500 | 502 | 503 | 504 => ErrorCategory::ServiceUnavailable,
526 408 => ErrorCategory::Timeout,
527 _ => classify_error_message(message),
528 };
529 }
530 classify_error_message(message)
532 }
533 }
534 }
535}
536
537#[cfg(test)]
538mod tests {
539 use super::*;
540
541 #[test]
544 fn policy_violation_takes_priority_over_permission() {
545 assert_eq!(
546 classify_error_message("tool permission denied by policy"),
547 ErrorCategory::PolicyViolation
548 );
549 }
550
551 #[test]
552 fn rate_limit_classified_correctly() {
553 assert_eq!(
554 classify_error_message("provider returned 429 Too Many Requests"),
555 ErrorCategory::RateLimit
556 );
557 assert_eq!(
558 classify_error_message("rate limit exceeded"),
559 ErrorCategory::RateLimit
560 );
561 }
562
563 #[test]
564 fn service_unavailable_is_network() {
565 assert_eq!(
566 classify_error_message("503 service unavailable"),
567 ErrorCategory::Network
568 );
569 }
570
571 #[test]
572 fn authentication_errors() {
573 assert_eq!(
574 classify_error_message("invalid api key provided"),
575 ErrorCategory::Authentication
576 );
577 assert_eq!(
578 classify_error_message("401 unauthorized"),
579 ErrorCategory::Authentication
580 );
581 }
582
583 #[test]
584 fn billing_errors_are_resource_exhausted() {
585 assert_eq!(
586 classify_error_message("you have reached your weekly usage limit"),
587 ErrorCategory::ResourceExhausted
588 );
589 assert_eq!(
590 classify_error_message("quota exceeded for this model"),
591 ErrorCategory::ResourceExhausted
592 );
593 }
594
595 #[test]
596 fn timeout_errors() {
597 assert_eq!(
598 classify_error_message("connection timeout"),
599 ErrorCategory::Timeout
600 );
601 assert_eq!(
602 classify_error_message("request timed out after 30s"),
603 ErrorCategory::Timeout
604 );
605 }
606
607 #[test]
608 fn network_errors() {
609 assert_eq!(
610 classify_error_message("connection reset by peer"),
611 ErrorCategory::Network
612 );
613 assert_eq!(
614 classify_error_message("dns name resolution failed"),
615 ErrorCategory::Network
616 );
617 }
618
619 #[test]
620 fn tool_not_found() {
621 assert_eq!(
622 classify_error_message("unknown tool: ask_questions"),
623 ErrorCategory::ToolNotFound
624 );
625 }
626
627 #[test]
628 fn resource_not_found() {
629 assert_eq!(
630 classify_error_message("no such file or directory: /tmp/missing"),
631 ErrorCategory::ResourceNotFound
632 );
633 }
634
635 #[test]
636 fn permission_denied() {
637 assert_eq!(
638 classify_error_message("permission denied: /etc/shadow"),
639 ErrorCategory::PermissionDenied
640 );
641 }
642
643 #[test]
644 fn cancelled_operations() {
645 assert_eq!(
646 classify_error_message("operation cancelled by user"),
647 ErrorCategory::Cancelled
648 );
649 }
650
651 #[test]
652 fn plan_mode_violation() {
653 assert_eq!(
654 classify_error_message("not allowed in plan mode"),
655 ErrorCategory::PolicyViolation
656 );
657 }
658
659 #[test]
660 fn sandbox_failure() {
661 assert_eq!(
662 classify_error_message("sandbox denied this operation"),
663 ErrorCategory::SandboxFailure
664 );
665 }
666
667 #[test]
668 fn unknown_error_is_execution_error() {
669 assert_eq!(
670 classify_error_message("something went wrong"),
671 ErrorCategory::ExecutionError
672 );
673 }
674
675 #[test]
676 fn invalid_parameters() {
677 assert_eq!(
678 classify_error_message("invalid argument: missing path field"),
679 ErrorCategory::InvalidParameters
680 );
681 }
682
683 #[test]
686 fn retryable_categories() {
687 assert!(ErrorCategory::Network.is_retryable());
688 assert!(ErrorCategory::Timeout.is_retryable());
689 assert!(ErrorCategory::RateLimit.is_retryable());
690 assert!(ErrorCategory::ServiceUnavailable.is_retryable());
691 assert!(ErrorCategory::CircuitOpen.is_retryable());
692 }
693
694 #[test]
695 fn non_retryable_categories() {
696 assert!(!ErrorCategory::Authentication.is_retryable());
697 assert!(!ErrorCategory::InvalidParameters.is_retryable());
698 assert!(!ErrorCategory::PolicyViolation.is_retryable());
699 assert!(!ErrorCategory::ResourceExhausted.is_retryable());
700 assert!(!ErrorCategory::Cancelled.is_retryable());
701 }
702
703 #[test]
704 fn permanent_error_detection() {
705 assert!(ErrorCategory::Authentication.is_permanent());
706 assert!(ErrorCategory::PolicyViolation.is_permanent());
707 assert!(!ErrorCategory::Network.is_permanent());
708 assert!(!ErrorCategory::Timeout.is_permanent());
709 }
710
711 #[test]
712 fn llm_mistake_detection() {
713 assert!(ErrorCategory::InvalidParameters.is_llm_mistake());
714 assert!(!ErrorCategory::Network.is_llm_mistake());
715 assert!(!ErrorCategory::Timeout.is_llm_mistake());
716 }
717
718 #[test]
721 fn llm_error_authentication_converts() {
722 let err = crate::llm::LLMError::Authentication {
723 message: "bad key".to_string(),
724 metadata: None,
725 };
726 assert_eq!(ErrorCategory::from(&err), ErrorCategory::Authentication);
727 }
728
729 #[test]
730 fn llm_error_rate_limit_converts() {
731 let err = crate::llm::LLMError::RateLimit { metadata: None };
732 assert_eq!(ErrorCategory::from(&err), ErrorCategory::RateLimit);
733 }
734
735 #[test]
736 fn llm_error_network_converts() {
737 let err = crate::llm::LLMError::Network {
738 message: "connection refused".to_string(),
739 metadata: None,
740 };
741 assert_eq!(ErrorCategory::from(&err), ErrorCategory::Network);
742 }
743
744 #[test]
745 fn llm_error_provider_with_status_code() {
746 use crate::llm::LLMErrorMetadata;
747 let err = crate::llm::LLMError::Provider {
748 message: "error".to_string(),
749 metadata: Some(LLMErrorMetadata::new(
750 "openai",
751 Some(503),
752 None,
753 None,
754 None,
755 None,
756 None,
757 )),
758 };
759 assert_eq!(ErrorCategory::from(&err), ErrorCategory::ServiceUnavailable);
760 }
761
762 #[test]
763 fn minimax_invalid_response_is_service_unavailable() {
764 assert_eq!(
765 classify_error_message("Invalid response from MiniMax: missing choices"),
766 ErrorCategory::ServiceUnavailable
767 );
768 assert_eq!(
769 classify_error_message("Invalid response format: missing message"),
770 ErrorCategory::ServiceUnavailable
771 );
772 }
773
774 #[test]
777 fn retryable_llm_messages() {
778 assert!(is_retryable_llm_error_message("429 too many requests"));
779 assert!(is_retryable_llm_error_message("500 internal server error"));
780 assert!(is_retryable_llm_error_message("connection timeout"));
781 assert!(is_retryable_llm_error_message("network error"));
782 }
783
784 #[test]
785 fn non_retryable_llm_messages() {
786 assert!(!is_retryable_llm_error_message("invalid api key"));
787 assert!(!is_retryable_llm_error_message(
788 "weekly usage limit reached"
789 ));
790 assert!(!is_retryable_llm_error_message("permission denied"));
791 }
792
793 #[test]
796 fn recovery_suggestions_non_empty() {
797 for cat in [
798 ErrorCategory::Network,
799 ErrorCategory::Timeout,
800 ErrorCategory::RateLimit,
801 ErrorCategory::Authentication,
802 ErrorCategory::InvalidParameters,
803 ErrorCategory::ToolNotFound,
804 ErrorCategory::ResourceNotFound,
805 ErrorCategory::PermissionDenied,
806 ErrorCategory::PolicyViolation,
807 ErrorCategory::ExecutionError,
808 ] {
809 assert!(
810 !cat.recovery_suggestions().is_empty(),
811 "Missing recovery suggestions for {:?}",
812 cat
813 );
814 }
815 }
816
817 #[test]
820 fn user_labels_are_non_empty() {
821 assert!(!ErrorCategory::Network.user_label().is_empty());
822 assert!(!ErrorCategory::ExecutionError.user_label().is_empty());
823 }
824
825 #[test]
828 fn display_matches_user_label() {
829 assert_eq!(
830 format!("{}", ErrorCategory::RateLimit),
831 ErrorCategory::RateLimit.user_label()
832 );
833 }
834}