1use std::fmt;
3
4#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
6#[serde(rename_all = "snake_case")]
7pub enum ErrorCategory {
8 Network,
9 Authentication,
10 Authorization,
11 RateLimit,
12 Timeout,
13 InvalidInput,
14 Parse,
15 Tool,
16 State,
17 Internal,
18}
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
22#[serde(rename_all = "snake_case")]
23pub enum ErrorSeverity {
24 Info,
25 Warning,
26 Error,
27 Critical,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
32#[serde(rename_all = "snake_case")]
33pub enum ErrorRetryPolicy {
34 NetworkRetryable,
36 NotRetryable,
38}
39
40#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
42pub struct ErrorEnvelope {
43 pub category: ErrorCategory,
44 pub severity: ErrorSeverity,
45 pub recoverable: bool,
46 pub code: String,
47 pub message: String,
48 #[serde(default, skip_serializing_if = "Option::is_none")]
50 pub hint: Option<String>,
51}
52
53impl fmt::Display for ErrorCategory {
54 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55 let label = match self {
56 Self::Network => "network",
57 Self::Authentication => "authentication",
58 Self::Authorization => "authorization",
59 Self::RateLimit => "rate_limit",
60 Self::Timeout => "timeout",
61 Self::InvalidInput => "invalid_input",
62 Self::Parse => "parse",
63 Self::Tool => "tool",
64 Self::State => "state",
65 Self::Internal => "internal",
66 };
67 f.write_str(label)
68 }
69}
70
71impl fmt::Display for ErrorSeverity {
72 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73 let label = match self {
74 Self::Info => "info",
75 Self::Warning => "warning",
76 Self::Error => "error",
77 Self::Critical => "critical",
78 };
79 f.write_str(label)
80 }
81}
82
83impl fmt::Display for ErrorEnvelope {
84 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
85 write!(f, "[{}] {}: {}", self.severity, self.code, self.message)
86 }
87}
88
89impl std::error::Error for ErrorEnvelope {}
90
91impl ErrorCategory {
92 #[must_use]
94 pub fn retry_policy(self) -> ErrorRetryPolicy {
95 if is_category_network_retryable(self) {
96 ErrorRetryPolicy::NetworkRetryable
97 } else {
98 ErrorRetryPolicy::NotRetryable
99 }
100 }
101}
102
103#[must_use]
105pub fn user_hint_for_category(category: ErrorCategory) -> &'static str {
106 match category {
107 ErrorCategory::Network => "Check your network or proxy, then retry the message.",
108 ErrorCategory::Timeout => "The request timed out; retry or reduce context with /compact.",
109 ErrorCategory::RateLimit => "Wait briefly and retry, or switch to a lighter model.",
110 ErrorCategory::InvalidInput => {
111 "Fix model/thinking settings or compact context — this request cannot be retried automatically."
112 }
113 ErrorCategory::Authentication => {
114 "Set a valid API key in DEEPSEEK_API_KEY or ~/.deepseek/config.toml."
115 }
116 ErrorCategory::Authorization => {
117 "This action is not allowed in the current trust or approval mode."
118 }
119 ErrorCategory::Parse => {
120 "The response could not be parsed; retry once or report if it persists."
121 }
122 ErrorCategory::Tool => "Review the tool output in the transcript and adjust the request.",
123 ErrorCategory::State => {
124 "The thread or resource may have ended; refresh or start a new turn."
125 }
126 ErrorCategory::Internal => {
127 "Retry the message; if it persists, check logs or restart the runtime."
128 }
129 }
130}
131
132#[must_use]
133pub fn is_category_network_retryable(category: ErrorCategory) -> bool {
134 matches!(
135 category,
136 ErrorCategory::Network | ErrorCategory::Timeout | ErrorCategory::RateLimit
137 ) || category == ErrorCategory::Internal
138}
139
140impl ErrorEnvelope {
141 #[must_use]
142 pub fn new(
143 category: ErrorCategory,
144 severity: ErrorSeverity,
145 recoverable: bool,
146 code: impl Into<String>,
147 message: impl Into<String>,
148 ) -> Self {
149 Self {
150 category,
151 severity,
152 recoverable,
153 code: code.into(),
154 message: message.into(),
155 hint: Some(user_hint_for_category(category).to_string()),
156 }
157 }
158
159 #[must_use]
161 pub fn is_network_retryable(&self) -> bool {
162 is_category_network_retryable(self.category)
163 }
164
165 #[must_use]
167 pub fn to_wire_error_body(&self, http_status: u16) -> serde_json::Value {
168 let category = self.category.to_string();
169 serde_json::json!({
170 "error": {
171 "message": self.message,
172 "status": http_status,
173 "category": category,
174 "class": category,
175 "code": self.code,
176 "recoverable": self.recoverable,
177 "retryable": self.is_network_retryable(),
178 "retry_policy": self.category.retry_policy().as_str(),
179 "severity": self.severity.to_string(),
180 "hint": self.hint,
181 }
182 })
183 }
184
185 #[must_use]
189 pub fn transient(message: impl Into<String>) -> Self {
190 Self::new(
191 ErrorCategory::Internal,
192 ErrorSeverity::Warning,
193 true,
194 "transient",
195 message,
196 )
197 }
198
199 #[must_use]
202 pub fn fatal(message: impl Into<String>) -> Self {
203 Self::new(
204 ErrorCategory::Internal,
205 ErrorSeverity::Error,
206 false,
207 "fatal",
208 message,
209 )
210 }
211
212 #[must_use]
214 pub fn fatal_auth(message: impl Into<String>) -> Self {
215 Self::new(
216 ErrorCategory::Authentication,
217 ErrorSeverity::Critical,
218 false,
219 "auth_fatal",
220 message,
221 )
222 }
223
224 #[must_use]
226 pub fn context_overflow(message: impl Into<String>) -> Self {
227 Self::new(
228 ErrorCategory::InvalidInput,
229 ErrorSeverity::Error,
230 true,
231 "context_overflow",
232 message,
233 )
234 }
235
236 #[must_use]
238 pub fn network(message: impl Into<String>) -> Self {
239 Self::new(
240 ErrorCategory::Network,
241 ErrorSeverity::Warning,
242 true,
243 "network_transient",
244 message,
245 )
246 }
247
248 #[must_use]
250 pub fn tool(message: impl Into<String>) -> Self {
251 Self::new(
252 ErrorCategory::Tool,
253 ErrorSeverity::Error,
254 true,
255 "tool_failed",
256 message,
257 )
258 }
259}
260
261#[derive(Debug, Clone)]
263pub enum StreamError {
264 Stall { timeout_secs: u64 },
265 Overflow { limit_bytes: usize },
266 DurationLimit { limit_secs: u64 },
267}
268
269impl StreamError {
270 #[must_use]
271 pub fn into_envelope(self) -> ErrorEnvelope {
272 match self {
273 Self::Stall { timeout_secs } => ErrorEnvelope::new(
274 ErrorCategory::Timeout,
275 ErrorSeverity::Warning,
276 true,
277 "stream_stall",
278 format!("Stream stalled: no data received for {timeout_secs}s, closing stream"),
279 ),
280 Self::Overflow { limit_bytes } => ErrorEnvelope::new(
281 ErrorCategory::Internal,
282 ErrorSeverity::Error,
283 true,
284 "stream_overflow",
285 format!("Stream exceeded maximum content size of {limit_bytes} bytes, closing"),
286 ),
287 Self::DurationLimit { limit_secs } => ErrorEnvelope::new(
288 ErrorCategory::Timeout,
289 ErrorSeverity::Error,
290 true,
291 "stream_duration_limit",
292 format!("Stream exceeded maximum duration of {limit_secs}s, closing"),
293 ),
294 }
295 }
296}
297
298impl fmt::Display for StreamError {
299 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300 match self {
301 Self::Stall { timeout_secs } => write!(f, "Stream stalled after {timeout_secs}s idle"),
302 Self::Overflow { limit_bytes } => {
303 write!(f, "Stream exceeded {limit_bytes} bytes limit")
304 }
305 Self::DurationLimit { limit_secs } => {
306 write!(f, "Stream exceeded {limit_secs}s duration limit")
307 }
308 }
309 }
310}
311
312impl std::error::Error for StreamError {}
313
314impl ErrorEnvelope {
315 #[must_use]
318 pub fn classify(message: impl Into<String>, recoverable: bool) -> Self {
319 let message = message.into();
320 let category = classify_error_message(&message);
321 let severity = match category {
322 ErrorCategory::Authentication => ErrorSeverity::Critical,
323 ErrorCategory::RateLimit | ErrorCategory::Timeout | ErrorCategory::Network => {
324 ErrorSeverity::Warning
325 }
326 ErrorCategory::InvalidInput | ErrorCategory::Authorization | ErrorCategory::Parse => {
327 ErrorSeverity::Error
328 }
329 ErrorCategory::Tool | ErrorCategory::State | ErrorCategory::Internal => {
330 if recoverable {
331 ErrorSeverity::Warning
332 } else {
333 ErrorSeverity::Error
334 }
335 }
336 };
337 Self::new(
338 category,
339 severity,
340 recoverable,
341 category.to_string(),
342 message,
343 )
344 }
345}
346
347impl ErrorRetryPolicy {
348 #[must_use]
349 pub fn as_str(self) -> &'static str {
350 match self {
351 Self::NetworkRetryable => "network_retryable",
352 Self::NotRetryable => "not_retryable",
353 }
354 }
355}
356
357#[must_use]
362pub fn classify_error_message(message: &str) -> ErrorCategory {
363 let lower = message.to_lowercase();
364
365 if lower.contains("maximum context length")
366 || lower.contains("context length")
367 || lower.contains("context_length")
368 || lower.contains("prompt is too long")
369 || (lower.contains("requested") && lower.contains("tokens") && lower.contains("maximum"))
370 || lower.contains("context window")
371 || lower.contains("reasoning_content")
372 || lower.contains("reasoning_effort")
373 || lower.contains("thinking mode")
374 || lower.contains("thinking.type")
375 {
376 return ErrorCategory::InvalidInput;
377 }
378 if lower.contains("rate limit")
379 || lower.contains("too many requests")
380 || lower.contains("429")
381 || lower.contains("quota")
382 {
383 return ErrorCategory::RateLimit;
384 }
385 if lower.contains("timeout") || lower.contains("timed out") {
386 return ErrorCategory::Timeout;
387 }
388 if lower.contains("auth") || lower.contains("unauthorized") || lower.contains("api key") {
389 return ErrorCategory::Authentication;
390 }
391 if lower.contains("permission") || lower.contains("forbidden") || lower.contains("denied") {
392 return ErrorCategory::Authorization;
393 }
394 if lower.contains("network")
395 || lower.contains("connection")
396 || lower.contains("dns")
397 || lower.contains("temporarily unavailable")
398 || lower.contains(" 502 ")
399 || lower.contains(" 503 ")
400 || lower.contains(" 504 ")
401 || lower.starts_with("502 ")
402 || lower.starts_with("503 ")
403 || lower.starts_with("504 ")
404 || lower.ends_with(" 502")
405 || lower.ends_with(" 503")
406 || lower.ends_with(" 504")
407 || lower == "502"
408 || lower == "503"
409 || lower == "504"
410 {
411 return ErrorCategory::Network;
412 }
413 if lower.contains("decision must")
414 || lower.contains("expected rfc 3339")
415 || lower.starts_with("invalid ")
416 || lower.contains("invalid request")
417 {
418 return ErrorCategory::InvalidInput;
419 }
420 if lower.contains("parse") || lower.contains("syntax") || lower.contains("malformed") {
421 return ErrorCategory::Parse;
422 }
423 if lower.contains("not found")
424 || lower.contains("unavailable")
425 || lower.contains("not available")
426 {
427 return ErrorCategory::State;
428 }
429 if lower.contains("tool") {
430 return ErrorCategory::Tool;
431 }
432
433 ErrorCategory::Internal
434}
435
436#[must_use]
441pub fn is_stream_failure_retryable(message: &str) -> bool {
442 is_category_network_retryable(classify_error_message(message))
443}
444
445impl From<zagens_tools::ToolError> for ErrorEnvelope {
446 fn from(value: zagens_tools::ToolError) -> Self {
447 match value {
448 zagens_tools::ToolError::InvalidInput { message } => Self::new(
449 ErrorCategory::InvalidInput,
450 ErrorSeverity::Error,
451 false,
452 "tool_invalid_input",
453 message,
454 ),
455 zagens_tools::ToolError::MissingField { field } => Self::new(
456 ErrorCategory::InvalidInput,
457 ErrorSeverity::Error,
458 false,
459 "tool_missing_field",
460 format!("Missing required field: {field}"),
461 ),
462 zagens_tools::ToolError::PathEscape { path } => Self::new(
463 ErrorCategory::Authorization,
464 ErrorSeverity::Error,
465 false,
466 "tool_path_escape",
467 format!("Path escapes workspace: {}", path.display()),
468 ),
469 zagens_tools::ToolError::ExecutionFailed { message } => Self::new(
470 ErrorCategory::Tool,
471 ErrorSeverity::Error,
472 true,
473 "tool_execution_failed",
474 message,
475 ),
476 zagens_tools::ToolError::Timeout { seconds } => Self::new(
477 ErrorCategory::Timeout,
478 ErrorSeverity::Warning,
479 true,
480 "tool_timeout",
481 format!("Tool timed out after {seconds}s"),
482 ),
483 zagens_tools::ToolError::NotAvailable { message } => Self::new(
484 ErrorCategory::State,
485 ErrorSeverity::Error,
486 false,
487 "tool_not_available",
488 message,
489 ),
490 zagens_tools::ToolError::PermissionDenied { message } => Self::new(
491 ErrorCategory::Authorization,
492 ErrorSeverity::Error,
493 false,
494 "tool_permission_denied",
495 message,
496 ),
497 }
498 }
499}
500
501#[cfg(test)]
502mod tests {
503 use super::*;
504 use zagens_tools::ToolError;
505
506 #[test]
509 fn context_length_exact() {
510 assert_eq!(
511 classify_error_message("maximum context length exceeded"),
512 ErrorCategory::InvalidInput
513 );
514 }
515
516 #[test]
517 fn context_length_underscore() {
518 assert_eq!(
519 classify_error_message("context_length_error: too many tokens"),
520 ErrorCategory::InvalidInput
521 );
522 }
523
524 #[test]
525 fn context_length_variants() {
526 assert_eq!(
527 classify_error_message("context length is 128000 but messages used 250000"),
528 ErrorCategory::InvalidInput
529 );
530 assert_eq!(
531 classify_error_message("prompt is too long for this model"),
532 ErrorCategory::InvalidInput
533 );
534 }
535
536 #[test]
537 fn rate_limit_variants() {
538 assert_eq!(
539 classify_error_message("too many requests, please try again later"),
540 ErrorCategory::RateLimit
541 );
542 assert_eq!(
543 classify_error_message("HTTP 429: you have been rate limited"),
544 ErrorCategory::RateLimit
545 );
546 assert_eq!(
547 classify_error_message("quota exceeded"),
548 ErrorCategory::RateLimit
549 );
550 }
551
552 #[test]
553 fn timeout_wins_over_auth_substring() {
554 assert_eq!(
555 classify_error_message("auth error: connection timed out"),
556 ErrorCategory::Timeout
557 );
558 }
559
560 #[test]
561 fn network_gateway_codes() {
562 assert_eq!(
563 classify_error_message("server returned 502 Bad Gateway"),
564 ErrorCategory::Network
565 );
566 assert_eq!(
567 classify_error_message("503 Service Unavailable"),
568 ErrorCategory::Network
569 );
570 assert_eq!(
571 classify_error_message("service temporarily unavailable"),
572 ErrorCategory::Network
573 );
574 }
575
576 #[test]
577 fn status_502_embedded_in_token_not_network() {
578 assert_eq!(
579 classify_error_message("error code ERR5021: bad input"),
580 ErrorCategory::Internal
581 );
582 }
583
584 #[test]
585 fn tool_not_found_is_state_not_tool() {
586 assert_eq!(
587 classify_error_message("tool execution failed: /bin/bash not found"),
588 ErrorCategory::State
589 );
590 }
591
592 #[test]
593 fn envelope_helpers() {
594 let t = ErrorEnvelope::transient("oops");
595 assert_eq!(t.category, ErrorCategory::Internal);
596 assert!(t.recoverable);
597 let f = ErrorEnvelope::fatal_auth("bad key");
598 assert_eq!(f.severity, ErrorSeverity::Critical);
599 assert!(!f.recoverable);
600 }
601
602 #[test]
603 fn display_labels() {
604 assert_eq!(ErrorCategory::RateLimit.to_string(), "rate_limit");
605 assert_eq!(ErrorSeverity::Critical.to_string(), "critical");
606 assert!(ErrorEnvelope::network("lost").to_string().contains("lost"));
607 }
608
609 #[test]
610 fn stream_overflow_envelope() {
611 let e = StreamError::Overflow {
612 limit_bytes: 1_000_000,
613 }
614 .into_envelope();
615 assert_eq!(e.category, ErrorCategory::Internal);
616 assert_eq!(e.severity, ErrorSeverity::Error);
617 }
618
619 #[test]
620 fn reasoning_content_constraint_is_invalid_input_not_network() {
621 assert_eq!(
622 classify_error_message(
623 "400 Bad Request: reasoning_content is required for tool calls in thinking mode"
624 ),
625 ErrorCategory::InvalidInput
626 );
627 assert_eq!(
628 classify_error_message("connection reset by peer"),
629 ErrorCategory::Network
630 );
631 }
632
633 #[test]
634 fn reasoning_effort_invalid() {
635 assert_eq!(
636 classify_error_message("invalid reasoning_effort: maxx"),
637 ErrorCategory::InvalidInput
638 );
639 }
640
641 #[test]
642 fn thinking_mode_constraint() {
643 assert_eq!(
644 classify_error_message("thinking mode does not support this parameter"),
645 ErrorCategory::InvalidInput
646 );
647 }
648
649 #[test]
650 fn rate_limit_exact() {
651 assert_eq!(
652 classify_error_message("rate limit exceeded"),
653 ErrorCategory::RateLimit
654 );
655 }
656
657 #[test]
658 fn timeout_before_network_status_codes() {
659 assert_eq!(
660 classify_error_message("504 Gateway Timeout"),
661 ErrorCategory::Timeout
662 );
663 assert_eq!(classify_error_message("502"), ErrorCategory::Network);
664 }
665
666 #[test]
667 fn network_disconnect() {
668 assert_eq!(
669 classify_error_message("connection reset by peer"),
670 ErrorCategory::Network
671 );
672 }
673
674 #[test]
675 fn auth_api_key() {
676 assert_eq!(
677 classify_error_message("invalid api key provided"),
678 ErrorCategory::Authentication
679 );
680 }
681
682 #[test]
683 fn authorization_denied() {
684 assert_eq!(
685 classify_error_message("access denied by policy"),
686 ErrorCategory::Authorization
687 );
688 }
689
690 #[test]
691 fn parse_malformed() {
692 assert_eq!(
693 classify_error_message("malformed response from server"),
694 ErrorCategory::Parse
695 );
696 }
697
698 #[test]
699 fn state_not_found() {
700 assert_eq!(
701 classify_error_message("thread not found"),
702 ErrorCategory::State
703 );
704 }
705
706 #[test]
707 fn tool_without_not_found_substring() {
708 assert_eq!(
709 classify_error_message("a tool returned an error code 1"),
710 ErrorCategory::Tool
711 );
712 }
713
714 #[test]
715 fn empty_and_whitespace_fallback_internal() {
716 assert_eq!(classify_error_message(""), ErrorCategory::Internal);
717 assert_eq!(classify_error_message(" "), ErrorCategory::Internal);
718 }
719
720 #[test]
721 fn internal_fallback() {
722 assert_eq!(
723 classify_error_message("something completely unexpected happened"),
724 ErrorCategory::Internal
725 );
726 }
727
728 #[test]
729 fn capitalization_irrelevant() {
730 assert_eq!(
731 classify_error_message("NETWORK ERROR: Connection REFUSED"),
732 ErrorCategory::Network
733 );
734 }
735
736 #[test]
737 fn classify_recoverable_internal_is_warning() {
738 let e = ErrorEnvelope::classify("unknown hiccup", true);
739 assert_eq!(e.category, ErrorCategory::Internal);
740 assert_eq!(e.severity, ErrorSeverity::Warning);
741 assert!(e.recoverable);
742 }
743
744 #[test]
745 fn classify_auth_is_critical() {
746 let e = ErrorEnvelope::classify("401 unauthorized", false);
747 assert_eq!(e.category, ErrorCategory::Authentication);
748 assert_eq!(e.severity, ErrorSeverity::Critical);
749 }
750
751 #[test]
752 fn stream_stall_is_recoverable_warning() {
753 let e = StreamError::Stall { timeout_secs: 60 }.into_envelope();
754 assert_eq!(e.category, ErrorCategory::Timeout);
755 assert_eq!(e.severity, ErrorSeverity::Warning);
756 assert!(e.recoverable);
757 }
758
759 #[test]
760 fn tool_timeout_is_recoverable_warning() {
761 let e: ErrorEnvelope = ToolError::Timeout { seconds: 30 }.into();
762 assert_eq!(e.category, ErrorCategory::Timeout);
763 assert_eq!(e.severity, ErrorSeverity::Warning);
764 assert!(e.recoverable);
765 }
766
767 #[test]
768 fn tool_path_escape_is_authorization() {
769 let e: ErrorEnvelope = ToolError::PathEscape {
770 path: std::path::PathBuf::from("/etc/passwd"),
771 }
772 .into();
773 assert_eq!(e.category, ErrorCategory::Authorization);
774 }
775
776 #[test]
777 fn stream_retry_policy_network_vs_invalid_input() {
778 assert!(is_stream_failure_retryable("connection reset by peer"));
779 assert!(is_stream_failure_retryable("502 Bad Gateway"));
780 assert!(!is_stream_failure_retryable(
781 "Missing reasoning_content on assistant tool message"
782 ));
783 assert!(!is_stream_failure_retryable("401 unauthorized"));
784 }
785
786 #[test]
787 fn user_hints_differ_for_network_vs_invalid_input() {
788 let net = user_hint_for_category(ErrorCategory::Network);
789 let invalid = user_hint_for_category(ErrorCategory::InvalidInput);
790 assert_ne!(net, invalid);
791 assert!(net.contains("network") || net.contains("proxy"));
792 assert!(invalid.contains("compact") || invalid.contains("thinking"));
793 }
794
795 #[test]
796 fn wire_error_body_includes_hint_class_and_retry_policy() {
797 let e = ErrorEnvelope::classify("connection reset by peer", true);
798 let body = e.to_wire_error_body(503);
799 let err = body.get("error").expect("error object");
800 assert_eq!(err["category"], "network");
801 assert_eq!(err["class"], "network");
802 assert_eq!(err["retry_policy"], "network_retryable");
803 assert_eq!(err["retryable"], true);
804 assert!(err.get("hint").and_then(|h| h.as_str()).is_some());
805 }
806
807 #[test]
808 fn invalid_input_wire_body_not_retryable() {
809 let e = ErrorEnvelope::classify(
810 "reasoning_content is required for tool calls in thinking mode",
811 false,
812 );
813 let err = e.to_wire_error_body(400).get("error").cloned().unwrap();
814 assert_eq!(err["category"], "invalid_input");
815 assert_eq!(err["retry_policy"], "not_retryable");
816 assert_eq!(err["retryable"], false);
817 }
818
819 #[test]
820 fn api_validation_messages_are_invalid_input() {
821 assert_eq!(
822 classify_error_message("decision must be 'approve' or 'deny'"),
823 ErrorCategory::InvalidInput
824 );
825 }
826
827 #[test]
828 fn category_retry_policy_labels() {
829 assert_eq!(
830 ErrorCategory::Network.retry_policy(),
831 ErrorRetryPolicy::NetworkRetryable
832 );
833 assert_eq!(
834 ErrorCategory::InvalidInput.retry_policy(),
835 ErrorRetryPolicy::NotRetryable
836 );
837 }
838}