1use std::borrow::Cow;
22use std::fmt;
23use std::time::Duration;
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
28pub enum ErrorCategory {
29 Network,
32 Timeout,
34 RateLimit,
36 ServiceUnavailable,
38 CircuitOpen,
40
41 Authentication,
44 InvalidParameters,
46 ToolNotFound,
48 ResourceNotFound,
50 PermissionDenied,
52 PolicyViolation,
54 PlanModeViolation,
56 SandboxFailure,
58 ResourceExhausted,
60 Cancelled,
62 ExecutionError,
64}
65
66#[derive(Debug, Clone, PartialEq, Eq)]
68pub enum Retryability {
69 Retryable {
71 max_attempts: u32,
73 backoff: BackoffStrategy,
75 },
76 NonRetryable,
78 RequiresIntervention,
80}
81
82#[derive(Debug, Clone, PartialEq, Eq)]
84pub enum BackoffStrategy {
85 Exponential { base: Duration, max: Duration },
87 Fixed(Duration),
89}
90
91impl ErrorCategory {
92 #[inline]
94 pub const fn is_retryable(&self) -> bool {
95 matches!(
96 self,
97 ErrorCategory::Network
98 | ErrorCategory::Timeout
99 | ErrorCategory::RateLimit
100 | ErrorCategory::ServiceUnavailable
101 | ErrorCategory::CircuitOpen
102 )
103 }
104
105 #[inline]
107 pub const fn should_trip_circuit_breaker(&self) -> bool {
108 matches!(
109 self,
110 ErrorCategory::Network
111 | ErrorCategory::Timeout
112 | ErrorCategory::RateLimit
113 | ErrorCategory::ServiceUnavailable
114 | ErrorCategory::ExecutionError
115 )
116 }
117
118 #[inline]
121 pub const fn is_llm_mistake(&self) -> bool {
122 matches!(self, ErrorCategory::InvalidParameters)
123 }
124
125 #[inline]
127 pub const fn is_permanent(&self) -> bool {
128 matches!(
129 self,
130 ErrorCategory::Authentication
131 | ErrorCategory::PolicyViolation
132 | ErrorCategory::PlanModeViolation
133 | ErrorCategory::ResourceExhausted
134 )
135 }
136
137 pub fn retryability(&self) -> Retryability {
139 match self {
140 ErrorCategory::Network | ErrorCategory::ServiceUnavailable => Retryability::Retryable {
141 max_attempts: 3,
142 backoff: BackoffStrategy::Exponential {
143 base: Duration::from_millis(500),
144 max: Duration::from_secs(10),
145 },
146 },
147 ErrorCategory::Timeout => Retryability::Retryable {
148 max_attempts: 2,
149 backoff: BackoffStrategy::Exponential {
150 base: Duration::from_millis(1000),
151 max: Duration::from_secs(15),
152 },
153 },
154 ErrorCategory::RateLimit => Retryability::Retryable {
155 max_attempts: 3,
156 backoff: BackoffStrategy::Exponential {
157 base: Duration::from_secs(1),
158 max: Duration::from_secs(30),
159 },
160 },
161 ErrorCategory::CircuitOpen => Retryability::Retryable {
162 max_attempts: 1,
163 backoff: BackoffStrategy::Fixed(Duration::from_secs(10)),
164 },
165 ErrorCategory::PermissionDenied => Retryability::RequiresIntervention,
166 _ => Retryability::NonRetryable,
167 }
168 }
169
170 pub fn recovery_suggestions(&self) -> Vec<Cow<'static, str>> {
173 match self {
174 ErrorCategory::Network => vec![
175 Cow::Borrowed("Check network connectivity"),
176 Cow::Borrowed("Retry the operation after a brief delay"),
177 Cow::Borrowed("Verify external service availability"),
178 ],
179 ErrorCategory::Timeout => vec![
180 Cow::Borrowed("Increase timeout values if appropriate"),
181 Cow::Borrowed("Break large operations into smaller chunks"),
182 Cow::Borrowed("Check system resources and performance"),
183 ],
184 ErrorCategory::RateLimit => vec![
185 Cow::Borrowed("Wait before retrying the request"),
186 Cow::Borrowed("Reduce request frequency"),
187 Cow::Borrowed("Check provider rate limit documentation"),
188 ],
189 ErrorCategory::ServiceUnavailable => vec![
190 Cow::Borrowed("The service is temporarily unavailable"),
191 Cow::Borrowed("Retry after a brief delay"),
192 Cow::Borrowed("Check service status page if available"),
193 ],
194 ErrorCategory::CircuitOpen => vec![
195 Cow::Borrowed("This tool has been temporarily disabled due to repeated failures"),
196 Cow::Borrowed("Wait for the circuit breaker cooldown period"),
197 Cow::Borrowed("Try an alternative approach"),
198 ],
199 ErrorCategory::Authentication => vec![
200 Cow::Borrowed("Verify your API key or credentials"),
201 Cow::Borrowed("Check that your account is active and has sufficient permissions"),
202 Cow::Borrowed("Ensure environment variables for API keys are set correctly"),
203 ],
204 ErrorCategory::InvalidParameters => vec![
205 Cow::Borrowed("Check parameter names and types against the tool schema"),
206 Cow::Borrowed("Ensure required parameters are provided"),
207 Cow::Borrowed("Verify parameter values are within acceptable ranges"),
208 ],
209 ErrorCategory::ToolNotFound => vec![
210 Cow::Borrowed("Verify the tool name is spelled correctly"),
211 Cow::Borrowed("Check if the tool is available in the current context"),
212 ],
213 ErrorCategory::ResourceNotFound => vec![
214 Cow::Borrowed("Verify file paths and resource locations"),
215 Cow::Borrowed("Check if files exist and are accessible"),
216 Cow::Borrowed("Use list_dir to explore available resources"),
217 ],
218 ErrorCategory::PermissionDenied => vec![
219 Cow::Borrowed("Check file permissions and access rights"),
220 Cow::Borrowed("Ensure workspace boundaries are respected"),
221 ],
222 ErrorCategory::PolicyViolation => vec![
223 Cow::Borrowed("Review workspace policies and restrictions"),
224 Cow::Borrowed("Use alternative tools that comply with policies"),
225 ],
226 ErrorCategory::PlanModeViolation => vec![
227 Cow::Borrowed("This operation is not allowed in plan/read-only mode"),
228 Cow::Borrowed("Exit plan mode to perform mutating operations"),
229 ],
230 ErrorCategory::SandboxFailure => vec![
231 Cow::Borrowed("The sandbox denied this operation"),
232 Cow::Borrowed("Check sandbox configuration and permissions"),
233 ],
234 ErrorCategory::ResourceExhausted => vec![
235 Cow::Borrowed("Check your account usage limits and billing status"),
236 Cow::Borrowed("Review resource consumption and optimize if possible"),
237 ],
238 ErrorCategory::Cancelled => vec![Cow::Borrowed("The operation was cancelled")],
239 ErrorCategory::ExecutionError => vec![
240 Cow::Borrowed("Review error details for specific issues"),
241 Cow::Borrowed("Check tool documentation for known limitations"),
242 ],
243 }
244 }
245
246 pub const fn user_label(&self) -> &'static str {
248 match self {
249 ErrorCategory::Network => "Network error",
250 ErrorCategory::Timeout => "Request timed out",
251 ErrorCategory::RateLimit => "Rate limit exceeded",
252 ErrorCategory::ServiceUnavailable => "Service temporarily unavailable",
253 ErrorCategory::CircuitOpen => "Tool temporarily disabled",
254 ErrorCategory::Authentication => "Authentication failed",
255 ErrorCategory::InvalidParameters => "Invalid parameters",
256 ErrorCategory::ToolNotFound => "Tool not found",
257 ErrorCategory::ResourceNotFound => "Resource not found",
258 ErrorCategory::PermissionDenied => "Permission denied",
259 ErrorCategory::PolicyViolation => "Blocked by policy",
260 ErrorCategory::PlanModeViolation => "Not allowed in plan mode",
261 ErrorCategory::SandboxFailure => "Sandbox denied",
262 ErrorCategory::ResourceExhausted => "Resource limit reached",
263 ErrorCategory::Cancelled => "Operation cancelled",
264 ErrorCategory::ExecutionError => "Execution failed",
265 }
266 }
267}
268
269impl fmt::Display for ErrorCategory {
270 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
271 f.write_str(self.user_label())
272 }
273}
274
275pub fn classify_anyhow_error(err: &anyhow::Error) -> ErrorCategory {
285 let msg = err.to_string().to_ascii_lowercase();
286 classify_error_message(&msg)
287}
288
289pub fn classify_error_message(msg: &str) -> ErrorCategory {
294 let msg = if msg.as_bytes().iter().any(|b| b.is_ascii_uppercase()) {
295 Cow::Owned(msg.to_ascii_lowercase())
296 } else {
297 Cow::Borrowed(msg)
298 };
299
300 if contains_any(
302 &msg,
303 &[
304 "policy violation",
305 "denied by policy",
306 "tool permission denied",
307 "safety validation failed",
308 "not allowed in plan mode",
309 "only available when plan mode is active",
310 "workspace boundary",
311 "blocked by policy",
312 ],
313 ) {
314 return ErrorCategory::PolicyViolation;
315 }
316
317 if contains_any(
319 &msg,
320 &["plan mode", "read-only mode", "plan_mode_violation"],
321 ) {
322 return ErrorCategory::PlanModeViolation;
323 }
324
325 if contains_any(
327 &msg,
328 &[
329 "invalid api key",
330 "authentication failed",
331 "unauthorized",
332 "401",
333 "invalid credentials",
334 ],
335 ) {
336 return ErrorCategory::Authentication;
337 }
338
339 if contains_any(
341 &msg,
342 &[
343 "weekly usage limit",
344 "daily usage limit",
345 "monthly spending limit",
346 "insufficient credits",
347 "quota exceeded",
348 "billing",
349 "payment required",
350 ],
351 ) {
352 return ErrorCategory::ResourceExhausted;
353 }
354
355 if contains_any(
357 &msg,
358 &[
359 "invalid argument",
360 "invalid parameters",
361 "malformed",
362 "missing required",
363 "schema validation",
364 "argument validation failed",
365 "unknown field",
366 "type mismatch",
367 ],
368 ) {
369 return ErrorCategory::InvalidParameters;
370 }
371
372 if contains_any(
374 &msg,
375 &[
376 "tool not found",
377 "unknown tool",
378 "unsupported tool",
379 "no such tool",
380 ],
381 ) {
382 return ErrorCategory::ToolNotFound;
383 }
384
385 if contains_any(
387 &msg,
388 &[
389 "no such file",
390 "no such directory",
391 "file not found",
392 "directory not found",
393 "resource not found",
394 "path not found",
395 "enoent",
396 ],
397 ) {
398 return ErrorCategory::ResourceNotFound;
399 }
400
401 if contains_any(
403 &msg,
404 &[
405 "permission denied",
406 "access denied",
407 "operation not permitted",
408 "eacces",
409 "eperm",
410 "forbidden",
411 "403",
412 ],
413 ) {
414 return ErrorCategory::PermissionDenied;
415 }
416
417 if contains_any(&msg, &["cancelled", "interrupted", "canceled"]) {
419 return ErrorCategory::Cancelled;
420 }
421
422 if contains_any(&msg, &["circuit breaker", "circuit open"]) {
424 return ErrorCategory::CircuitOpen;
425 }
426
427 if contains_any(&msg, &["sandbox denied", "sandbox failure"]) {
429 return ErrorCategory::SandboxFailure;
430 }
431
432 if contains_any(&msg, &["rate limit", "too many requests", "429", "throttl"]) {
434 return ErrorCategory::RateLimit;
435 }
436
437 if contains_any(&msg, &["timeout", "timed out", "deadline exceeded"]) {
439 return ErrorCategory::Timeout;
440 }
441
442 if contains_any(
444 &msg,
445 &[
446 "invalid response format: missing choices",
447 "invalid response format: missing message",
448 "missing choices in response",
449 "missing message in choice",
450 "no choices in response",
451 "invalid response from ",
452 "empty response body",
453 "response did not contain",
454 "unexpected response format",
455 "failed to parse response",
456 ],
457 ) {
458 return ErrorCategory::ServiceUnavailable;
459 }
460
461 if contains_any(
463 &msg,
464 &[
465 "network",
466 "connection reset",
467 "connection refused",
468 "broken pipe",
469 "dns",
470 "name resolution",
471 "service unavailable",
472 "temporarily unavailable",
473 "internal server error",
474 "bad gateway",
475 "gateway timeout",
476 "overloaded",
477 "try again",
478 "retry later",
479 "500",
480 "502",
481 "503",
482 "504",
483 "upstream connect error",
484 "tls handshake",
485 "socket hang up",
486 "econnreset",
487 "etimedout",
488 ],
489 ) {
490 return ErrorCategory::Network;
491 }
492
493 if contains_any(&msg, &["out of memory", "disk full", "no space left"]) {
495 return ErrorCategory::ResourceExhausted;
496 }
497
498 ErrorCategory::ExecutionError
500}
501
502pub fn is_retryable_llm_error_message(msg: &str) -> bool {
507 let category = classify_error_message(msg);
508 category.is_retryable()
509}
510
511#[inline]
512fn contains_any(message: &str, markers: &[&str]) -> bool {
513 markers.iter().any(|marker| message.contains(marker))
514}
515
516impl From<&crate::llm::LLMError> for ErrorCategory {
521 fn from(err: &crate::llm::LLMError) -> Self {
522 match err {
523 crate::llm::LLMError::Authentication { .. } => ErrorCategory::Authentication,
524 crate::llm::LLMError::RateLimit { metadata } => {
525 classify_llm_metadata(metadata.as_deref(), ErrorCategory::RateLimit)
526 }
527 crate::llm::LLMError::InvalidRequest { .. } => ErrorCategory::InvalidParameters,
528 crate::llm::LLMError::Network { .. } => ErrorCategory::Network,
529 crate::llm::LLMError::Provider { message, metadata } => {
530 let metadata_category =
531 classify_llm_metadata(metadata.as_deref(), ErrorCategory::ExecutionError);
532 if metadata_category != ErrorCategory::ExecutionError {
533 return metadata_category;
534 }
535
536 if let Some(meta) = metadata
538 && let Some(status) = meta.status
539 {
540 return match status {
541 401 => ErrorCategory::Authentication,
542 403 => ErrorCategory::PermissionDenied,
543 404 => ErrorCategory::ResourceNotFound,
544 429 => ErrorCategory::RateLimit,
545 400 => ErrorCategory::InvalidParameters,
546 500 | 502 | 503 | 504 => ErrorCategory::ServiceUnavailable,
547 408 => ErrorCategory::Timeout,
548 _ => classify_error_message(message),
549 };
550 }
551 classify_error_message(message)
553 }
554 }
555 }
556}
557
558fn classify_llm_metadata(
559 metadata: Option<&crate::llm::LLMErrorMetadata>,
560 fallback: ErrorCategory,
561) -> ErrorCategory {
562 let Some(metadata) = metadata else {
563 return fallback;
564 };
565
566 let mut hint = String::new();
567 if let Some(code) = &metadata.code {
568 hint.push_str(code);
569 hint.push(' ');
570 }
571 if let Some(message) = &metadata.message {
572 hint.push_str(message);
573 hint.push(' ');
574 }
575 if let Some(status) = metadata.status {
576 use std::fmt::Write;
577 let _ = write!(&mut hint, "{status}");
578 }
579
580 let classified = classify_error_message(&hint);
581 if classified == ErrorCategory::ExecutionError {
582 fallback
583 } else {
584 classified
585 }
586}
587
588#[cfg(test)]
589mod tests {
590 use super::*;
591
592 #[test]
595 fn policy_violation_takes_priority_over_permission() {
596 assert_eq!(
597 classify_error_message("tool permission denied by policy"),
598 ErrorCategory::PolicyViolation
599 );
600 }
601
602 #[test]
603 fn rate_limit_classified_correctly() {
604 assert_eq!(
605 classify_error_message("provider returned 429 Too Many Requests"),
606 ErrorCategory::RateLimit
607 );
608 assert_eq!(
609 classify_error_message("rate limit exceeded"),
610 ErrorCategory::RateLimit
611 );
612 }
613
614 #[test]
615 fn service_unavailable_is_network() {
616 assert_eq!(
617 classify_error_message("503 service unavailable"),
618 ErrorCategory::Network
619 );
620 }
621
622 #[test]
623 fn authentication_errors() {
624 assert_eq!(
625 classify_error_message("invalid api key provided"),
626 ErrorCategory::Authentication
627 );
628 assert_eq!(
629 classify_error_message("401 unauthorized"),
630 ErrorCategory::Authentication
631 );
632 }
633
634 #[test]
635 fn billing_errors_are_resource_exhausted() {
636 assert_eq!(
637 classify_error_message("you have reached your weekly usage limit"),
638 ErrorCategory::ResourceExhausted
639 );
640 assert_eq!(
641 classify_error_message("quota exceeded for this model"),
642 ErrorCategory::ResourceExhausted
643 );
644 }
645
646 #[test]
647 fn timeout_errors() {
648 assert_eq!(
649 classify_error_message("connection timeout"),
650 ErrorCategory::Timeout
651 );
652 assert_eq!(
653 classify_error_message("request timed out after 30s"),
654 ErrorCategory::Timeout
655 );
656 }
657
658 #[test]
659 fn network_errors() {
660 assert_eq!(
661 classify_error_message("connection reset by peer"),
662 ErrorCategory::Network
663 );
664 assert_eq!(
665 classify_error_message("dns name resolution failed"),
666 ErrorCategory::Network
667 );
668 }
669
670 #[test]
671 fn tool_not_found() {
672 assert_eq!(
673 classify_error_message("unknown tool: ask_questions"),
674 ErrorCategory::ToolNotFound
675 );
676 }
677
678 #[test]
679 fn resource_not_found() {
680 assert_eq!(
681 classify_error_message("no such file or directory: /tmp/missing"),
682 ErrorCategory::ResourceNotFound
683 );
684 }
685
686 #[test]
687 fn permission_denied() {
688 assert_eq!(
689 classify_error_message("permission denied: /etc/shadow"),
690 ErrorCategory::PermissionDenied
691 );
692 }
693
694 #[test]
695 fn cancelled_operations() {
696 assert_eq!(
697 classify_error_message("operation cancelled by user"),
698 ErrorCategory::Cancelled
699 );
700 }
701
702 #[test]
703 fn plan_mode_violation() {
704 assert_eq!(
705 classify_error_message("not allowed in plan mode"),
706 ErrorCategory::PolicyViolation
707 );
708 }
709
710 #[test]
711 fn sandbox_failure() {
712 assert_eq!(
713 classify_error_message("sandbox denied this operation"),
714 ErrorCategory::SandboxFailure
715 );
716 }
717
718 #[test]
719 fn unknown_error_is_execution_error() {
720 assert_eq!(
721 classify_error_message("something went wrong"),
722 ErrorCategory::ExecutionError
723 );
724 }
725
726 #[test]
727 fn invalid_parameters() {
728 assert_eq!(
729 classify_error_message("invalid argument: missing path field"),
730 ErrorCategory::InvalidParameters
731 );
732 }
733
734 #[test]
737 fn retryable_categories() {
738 assert!(ErrorCategory::Network.is_retryable());
739 assert!(ErrorCategory::Timeout.is_retryable());
740 assert!(ErrorCategory::RateLimit.is_retryable());
741 assert!(ErrorCategory::ServiceUnavailable.is_retryable());
742 assert!(ErrorCategory::CircuitOpen.is_retryable());
743 }
744
745 #[test]
746 fn non_retryable_categories() {
747 assert!(!ErrorCategory::Authentication.is_retryable());
748 assert!(!ErrorCategory::InvalidParameters.is_retryable());
749 assert!(!ErrorCategory::PolicyViolation.is_retryable());
750 assert!(!ErrorCategory::ResourceExhausted.is_retryable());
751 assert!(!ErrorCategory::Cancelled.is_retryable());
752 }
753
754 #[test]
755 fn permanent_error_detection() {
756 assert!(ErrorCategory::Authentication.is_permanent());
757 assert!(ErrorCategory::PolicyViolation.is_permanent());
758 assert!(!ErrorCategory::Network.is_permanent());
759 assert!(!ErrorCategory::Timeout.is_permanent());
760 }
761
762 #[test]
763 fn llm_mistake_detection() {
764 assert!(ErrorCategory::InvalidParameters.is_llm_mistake());
765 assert!(!ErrorCategory::Network.is_llm_mistake());
766 assert!(!ErrorCategory::Timeout.is_llm_mistake());
767 }
768
769 #[test]
772 fn llm_error_authentication_converts() {
773 let err = crate::llm::LLMError::Authentication {
774 message: "bad key".to_string(),
775 metadata: None,
776 };
777 assert_eq!(ErrorCategory::from(&err), ErrorCategory::Authentication);
778 }
779
780 #[test]
781 fn llm_error_rate_limit_converts() {
782 let err = crate::llm::LLMError::RateLimit { metadata: None };
783 assert_eq!(ErrorCategory::from(&err), ErrorCategory::RateLimit);
784 }
785
786 #[test]
787 fn llm_error_quota_exhaustion_converts() {
788 let err = crate::llm::LLMError::RateLimit {
789 metadata: Some(crate::llm::LLMErrorMetadata::new(
790 "openai",
791 Some(429),
792 Some("insufficient_quota".to_string()),
793 None,
794 None,
795 None,
796 Some("quota exceeded".to_string()),
797 )),
798 };
799
800 assert_eq!(ErrorCategory::from(&err), ErrorCategory::ResourceExhausted);
801 }
802
803 #[test]
804 fn llm_error_network_converts() {
805 let err = crate::llm::LLMError::Network {
806 message: "connection refused".to_string(),
807 metadata: None,
808 };
809 assert_eq!(ErrorCategory::from(&err), ErrorCategory::Network);
810 }
811
812 #[test]
813 fn llm_error_provider_with_status_code() {
814 use crate::llm::LLMErrorMetadata;
815 let err = crate::llm::LLMError::Provider {
816 message: "error".to_string(),
817 metadata: Some(LLMErrorMetadata::new(
818 "openai",
819 Some(503),
820 None,
821 None,
822 None,
823 None,
824 None,
825 )),
826 };
827 assert_eq!(ErrorCategory::from(&err), ErrorCategory::ServiceUnavailable);
828 }
829
830 #[test]
831 fn minimax_invalid_response_is_service_unavailable() {
832 assert_eq!(
833 classify_error_message("Invalid response from MiniMax: missing choices"),
834 ErrorCategory::ServiceUnavailable
835 );
836 assert_eq!(
837 classify_error_message("Invalid response format: missing message"),
838 ErrorCategory::ServiceUnavailable
839 );
840 }
841
842 #[test]
845 fn retryable_llm_messages() {
846 assert!(is_retryable_llm_error_message("429 too many requests"));
847 assert!(is_retryable_llm_error_message("500 internal server error"));
848 assert!(is_retryable_llm_error_message("connection timeout"));
849 assert!(is_retryable_llm_error_message("network error"));
850 }
851
852 #[test]
853 fn non_retryable_llm_messages() {
854 assert!(!is_retryable_llm_error_message("invalid api key"));
855 assert!(!is_retryable_llm_error_message(
856 "weekly usage limit reached"
857 ));
858 assert!(!is_retryable_llm_error_message("permission denied"));
859 }
860
861 #[test]
864 fn recovery_suggestions_non_empty() {
865 for cat in [
866 ErrorCategory::Network,
867 ErrorCategory::Timeout,
868 ErrorCategory::RateLimit,
869 ErrorCategory::Authentication,
870 ErrorCategory::InvalidParameters,
871 ErrorCategory::ToolNotFound,
872 ErrorCategory::ResourceNotFound,
873 ErrorCategory::PermissionDenied,
874 ErrorCategory::PolicyViolation,
875 ErrorCategory::ExecutionError,
876 ] {
877 assert!(
878 !cat.recovery_suggestions().is_empty(),
879 "Missing recovery suggestions for {:?}",
880 cat
881 );
882 }
883 }
884
885 #[test]
888 fn user_labels_are_non_empty() {
889 assert!(!ErrorCategory::Network.user_label().is_empty());
890 assert!(!ErrorCategory::ExecutionError.user_label().is_empty());
891 }
892
893 #[test]
896 fn display_matches_user_label() {
897 assert_eq!(
898 format!("{}", ErrorCategory::RateLimit),
899 ErrorCategory::RateLimit.user_label()
900 );
901 }
902}