1pub fn contains_glm_model(s: &str) -> bool {
14 let s_lower = s.to_lowercase();
15 s_lower.contains("glm")
16 || s_lower.contains("zhipuai")
17 || s_lower.contains("zai")
18 || s_lower.contains("qwen")
19 || s_lower.contains("deepseek")
20}
21
22pub fn is_glm_like_agent(s: &str) -> bool {
40 let s_lower = s.to_lowercase();
41
42 if !contains_glm_model(&s_lower) {
44 return false;
45 }
46
47 if s_lower.starts_with("opencode") {
49 return false;
50 }
51
52 s_lower.starts_with("ccs") || s_lower.contains("claude")
54}
55
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
63pub enum AgentErrorKind {
64 RateLimited,
66 TokenExhausted,
68 ApiUnavailable,
70 NetworkError,
72 AuthFailure,
74 CommandNotFound,
76 DiskFull,
78 ProcessKilled,
80 InvalidResponse,
82 Timeout,
84 ToolExecutionFailed,
86 AgentSpecificQuirk,
88 RetryableAgentQuirk,
90 Transient,
92 Permanent,
94}
95
96impl AgentErrorKind {
97 pub const fn should_retry(self) -> bool {
102 matches!(
103 self,
104 Self::ApiUnavailable
105 | Self::NetworkError
106 | Self::Timeout
107 | Self::InvalidResponse
108 | Self::RetryableAgentQuirk
109 | Self::Transient
110 )
111 }
112
113 pub const fn should_immediate_agent_fallback(self) -> bool {
119 matches!(self, Self::RateLimited)
120 }
121
122 pub const fn should_fallback(self) -> bool {
124 matches!(
125 self,
126 Self::TokenExhausted
127 | Self::AuthFailure
128 | Self::CommandNotFound
129 | Self::ProcessKilled
130 | Self::ToolExecutionFailed
131 | Self::AgentSpecificQuirk
132 )
133 }
134
135 pub const fn is_unrecoverable(self) -> bool {
137 matches!(self, Self::DiskFull | Self::Permanent)
138 }
139
140 pub const fn is_command_not_found(self) -> bool {
142 matches!(self, Self::CommandNotFound)
143 }
144
145 pub const fn is_network_error(self) -> bool {
147 matches!(self, Self::NetworkError | Self::Timeout)
148 }
149
150 pub const fn suggests_smaller_context(self) -> bool {
152 matches!(self, Self::TokenExhausted | Self::ProcessKilled)
153 }
154
155 pub const fn suggested_wait_ms(self) -> u64 {
157 match self {
158 Self::RateLimited => 0,
160 Self::ApiUnavailable => 3000, Self::NetworkError => 2000, Self::Timeout | Self::Transient | Self::RetryableAgentQuirk => 1000, Self::InvalidResponse => 500, _ => 0, }
166 }
167
168 pub const fn description(self) -> &'static str {
170 match self {
171 Self::RateLimited => "API rate limit exceeded",
172 Self::TokenExhausted => "Token/context limit exceeded",
173 Self::ApiUnavailable => "API service temporarily unavailable",
174 Self::NetworkError => "Network connectivity issue",
175 Self::AuthFailure => "Authentication failure",
176 Self::CommandNotFound => "Command not found",
177 Self::DiskFull => "Disk space exhausted",
178 Self::ProcessKilled => "Process terminated (possibly OOM)",
179 Self::InvalidResponse => "Invalid response from agent",
180 Self::Timeout => "Request timed out",
181 Self::ToolExecutionFailed => "Tool execution failed (e.g., file write)",
182 Self::AgentSpecificQuirk => "Known agent-specific issue",
183 Self::RetryableAgentQuirk => "Agent-specific issue (may be transient)",
184 Self::Transient => "Transient error",
185 Self::Permanent => "Permanent error",
186 }
187 }
188
189 pub const fn recovery_advice(self) -> &'static str {
191 match self {
192 Self::RateLimited => {
193 "Switching to next agent immediately. Rate limit indicates provider exhaustion."
194 }
195 Self::TokenExhausted => {
196 "Switching to alternative agent. Tip: Try RALPH_DEVELOPER_CONTEXT=0 or RALPH_REVIEWER_CONTEXT=0"
197 }
198 Self::ApiUnavailable => {
199 "API server issue. Will retry automatically. Tip: Check status page or try different provider."
200 }
201 Self::NetworkError => {
202 "Check your internet connection. Will retry automatically. Tip: Check firewall/VPN settings."
203 }
204 Self::AuthFailure => {
205 "Check API key or run 'agent auth' to authenticate. Tip: Verify credentials for this provider."
206 }
207 Self::CommandNotFound => {
208 "Agent binary not installed. See installation guidance below. Tip: Run 'ralph --list-available-agents'"
209 }
210 Self::DiskFull => "Free up disk space and try again. Tip: Check .agent directory size.",
211 Self::ProcessKilled => {
212 "Process was killed (possible OOM). Trying with smaller context. Tip: Reduce context with RALPH_*_CONTEXT=0"
213 }
214 Self::InvalidResponse => {
215 "Received malformed response. Retrying... Tip: May indicate parser mismatch with this agent."
216 }
217 Self::Timeout => {
218 "Request timed out. Will retry with longer timeout. Tip: Try reducing prompt size or context."
219 }
220 Self::ToolExecutionFailed => {
221 "Tool execution failed (file write/permissions). Switching agent. Tip: Check directory write permissions."
222 }
223 Self::AgentSpecificQuirk => {
224 "Known agent-specific issue. Switching to alternative agent. Tip: See docs/agent-compatibility.md"
225 }
226 Self::RetryableAgentQuirk => {
227 "Agent-specific issue that may be transient. Retrying... Tip: See docs/agent-compatibility.md"
228 }
229 Self::Transient => "Temporary issue. Will retry automatically.",
230 Self::Permanent => {
231 "Unrecoverable error. Check agent logs (.agent/logs/) and see docs/agent-compatibility.md for help."
232 }
233 }
234 }
235
236 pub fn classify_with_agent(
248 exit_code: i32,
249 stderr: &str,
250 agent_name: Option<&str>,
251 model_flag: Option<&str>,
252 ) -> Self {
253 let stderr_lower = stderr.to_lowercase();
254
255 if let Some(err) = Self::check_api_errors(&stderr_lower) {
258 return err;
259 }
260
261 if let Some(err) = Self::check_network_errors(&stderr_lower) {
262 return err;
263 }
264
265 if let Some(err) = Self::check_resource_errors(exit_code, &stderr_lower) {
266 return err;
267 }
268
269 if let Some(err) = Self::check_tool_failures(&stderr_lower) {
270 return err;
271 }
272
273 let is_problematic_agent =
279 agent_name.is_some_and(is_glm_like_agent) || model_flag.is_some_and(is_glm_like_agent);
280
281 if is_problematic_agent && exit_code == 1 {
282 let has_known_problematic_pattern = stderr_lower.contains("permission")
284 || stderr_lower.contains("denied")
285 || stderr_lower.contains("unauthorized")
286 || stderr_lower.contains("auth")
287 || stderr_lower.contains("token")
288 || stderr_lower.contains("limit")
289 || stderr_lower.contains("quota")
290 || stderr_lower.contains("disk")
291 || stderr_lower.contains("space")
292 || (stderr_lower.contains("glm") && stderr_lower.contains("failed"))
294 || (stderr_lower.contains("ccs") && stderr_lower.contains("failed"));
295
296 if has_known_problematic_pattern {
297 return Self::AgentSpecificQuirk;
299 }
300
301 return Self::RetryableAgentQuirk;
303 }
304
305 if let Some(err) = Self::check_agent_specific_quirks(&stderr_lower, exit_code) {
306 return err;
307 }
308
309 if let Some(err) = Self::check_command_not_found(exit_code, &stderr_lower) {
310 return err;
311 }
312
313 if exit_code == 1 && stderr_lower.contains("error") {
316 return Self::Transient;
319 }
320
321 Self::Permanent
322 }
323
324 fn check_api_errors(stderr_lower: &str) -> Option<Self> {
326 if stderr_lower.contains("rate limit")
328 || stderr_lower.contains("too many requests")
329 || stderr_lower.contains("429")
330 || stderr_lower.contains("quota exceeded")
331 {
332 return Some(Self::RateLimited);
333 }
334
335 if stderr_lower.contains("unauthorized")
339 || stderr_lower.contains("authentication")
340 || stderr_lower.contains("401")
341 || stderr_lower.contains("api key")
342 || stderr_lower.contains("invalid token")
343 || stderr_lower.contains("forbidden")
344 || stderr_lower.contains("403")
345 || stderr_lower.contains("access denied")
346 {
347 return Some(Self::AuthFailure);
348 }
349
350 if stderr_lower.contains("context length")
355 || stderr_lower.contains("maximum context")
356 || stderr_lower.contains("max context")
357 || stderr_lower.contains("context window")
358 || stderr_lower.contains("maximum tokens")
359 || stderr_lower.contains("max tokens")
360 || stderr_lower.contains("too many tokens")
361 || stderr_lower.contains("token limit")
362 || stderr_lower.contains("context_length_exceeded")
363 || stderr_lower.contains("input too large")
364 || stderr_lower.contains("prompt is too long")
365 || (stderr_lower.contains("too long")
366 && !stderr_lower.contains("argument list too long"))
367 {
368 return Some(Self::TokenExhausted);
369 }
370
371 None
372 }
373
374 fn check_network_errors(stderr_lower: &str) -> Option<Self> {
376 if stderr_lower.contains("connection refused")
378 || stderr_lower.contains("network unreachable")
379 || stderr_lower.contains("dns resolution")
380 || stderr_lower.contains("name resolution")
381 || stderr_lower.contains("no route to host")
382 || stderr_lower.contains("network is down")
383 || stderr_lower.contains("host unreachable")
384 || stderr_lower.contains("connection reset")
385 || stderr_lower.contains("broken pipe")
386 || stderr_lower.contains("econnrefused")
387 || stderr_lower.contains("enetunreach")
388 {
389 return Some(Self::NetworkError);
390 }
391
392 if stderr_lower.contains("service unavailable")
394 || stderr_lower.contains("503")
395 || stderr_lower.contains("502")
396 || stderr_lower.contains("504")
397 || stderr_lower.contains("500")
398 || stderr_lower.contains("internal server error")
399 || stderr_lower.contains("bad gateway")
400 || stderr_lower.contains("gateway timeout")
401 || stderr_lower.contains("overloaded")
402 || stderr_lower.contains("maintenance")
403 {
404 return Some(Self::ApiUnavailable);
405 }
406
407 if stderr_lower.contains("timeout")
409 || stderr_lower.contains("timed out")
410 || stderr_lower.contains("request timeout")
411 || stderr_lower.contains("deadline exceeded")
412 {
413 return Some(Self::Timeout);
414 }
415
416 None
417 }
418
419 fn check_resource_errors(exit_code: i32, stderr_lower: &str) -> Option<Self> {
421 if stderr_lower.contains("no space left")
423 || stderr_lower.contains("disk full")
424 || stderr_lower.contains("enospc")
425 || stderr_lower.contains("out of disk")
426 || stderr_lower.contains("insufficient storage")
427 {
428 return Some(Self::DiskFull);
429 }
430
431 if exit_code == 7
436 || stderr_lower.contains("argument list too long")
437 || stderr_lower.contains("e2big")
438 {
439 return Some(Self::ToolExecutionFailed);
440 }
441
442 if exit_code == 137
445 || exit_code == 139
446 || exit_code == -9
447 || stderr_lower.contains("killed")
448 || stderr_lower.contains("oom")
449 || stderr_lower.contains("out of memory")
450 || stderr_lower.contains("memory exhausted")
451 || stderr_lower.contains("cannot allocate")
452 || stderr_lower.contains("segmentation fault")
453 || stderr_lower.contains("sigsegv")
454 || stderr_lower.contains("sigkill")
455 {
456 return Some(Self::ProcessKilled);
457 }
458
459 None
460 }
461
462 fn check_tool_failures(stderr_lower: &str) -> Option<Self> {
464 if stderr_lower.contains("invalid json")
466 || stderr_lower.contains("json parse")
467 || stderr_lower.contains("unexpected token")
468 || stderr_lower.contains("malformed")
469 || stderr_lower.contains("truncated response")
470 || stderr_lower.contains("incomplete response")
471 {
472 return Some(Self::InvalidResponse);
473 }
474
475 if stderr_lower.contains("write error")
478 || stderr_lower.contains("cannot write")
479 || stderr_lower.contains("failed to write")
480 || stderr_lower.contains("unable to create file")
481 || stderr_lower.contains("file creation failed")
482 || stderr_lower.contains("i/o error")
483 || stderr_lower.contains("io error")
484 || stderr_lower.contains("tool failed")
485 || stderr_lower.contains("tool execution failed")
486 || stderr_lower.contains("tool call failed")
487 {
488 return Some(Self::ToolExecutionFailed);
489 }
490
491 if stderr_lower.contains("permission denied")
496 || stderr_lower.contains("operation not permitted")
497 || stderr_lower.contains("insufficient permissions")
498 || stderr_lower.contains("eacces")
499 || stderr_lower.contains("eperm")
500 {
501 return Some(Self::ToolExecutionFailed);
502 }
503
504 None
505 }
506
507 fn check_agent_specific_quirks(stderr_lower: &str, exit_code: i32) -> Option<Self> {
509 if stderr_lower.contains("ccs") || stderr_lower.contains("glm") {
513 if exit_code == 1 {
515 return Some(Self::AgentSpecificQuirk);
516 }
517 if stderr_lower.contains("ccs") && stderr_lower.contains("failed") {
519 return Some(Self::AgentSpecificQuirk);
520 }
521 if stderr_lower.contains("glm")
523 && (stderr_lower.contains("permission")
524 || stderr_lower.contains("denied")
525 || stderr_lower.contains("unauthorized"))
526 {
527 return Some(Self::AgentSpecificQuirk);
528 }
529 }
530
531 if stderr_lower.contains("glm") && exit_code == 1 {
533 return Some(Self::AgentSpecificQuirk);
534 }
535
536 None
537 }
538
539 fn check_command_not_found(exit_code: i32, stderr_lower: &str) -> Option<Self> {
541 if exit_code == 127
544 || exit_code == 126
545 || stderr_lower.contains("command not found")
546 || stderr_lower.contains("not found")
547 || stderr_lower.contains("no such file")
548 {
549 return Some(Self::CommandNotFound);
550 }
551
552 None
553 }
554}
555
556#[cfg(test)]
557mod tests {
558 use super::*;
559
560 fn classify(exit_code: i32, stderr: &str) -> AgentErrorKind {
561 AgentErrorKind::classify_with_agent(exit_code, stderr, None, None)
562 }
563
564 #[test]
565 fn test_is_glm_like_agent() {
566 assert!(is_glm_like_agent("ccs/glm"));
568 assert!(is_glm_like_agent("ccs/zai"));
569 assert!(is_glm_like_agent("ccs/zhipuai"));
570 assert!(is_glm_like_agent("ccs/qwen"));
571 assert!(is_glm_like_agent("ccs/deepseek"));
572 assert!(is_glm_like_agent("CCS/GLM")); assert!(is_glm_like_agent("claude -m glm-4"));
576
577 assert!(!is_glm_like_agent("opencode/opencode/glm-4.7-free"));
579 assert!(!is_glm_like_agent("opencode/zai/glm-4.7"));
580 assert!(!is_glm_like_agent("opencode run -m glm"));
581
582 assert!(!is_glm_like_agent("claude"));
584 assert!(!is_glm_like_agent("codex"));
585 assert!(!is_glm_like_agent("ccs/work"));
586 assert!(!is_glm_like_agent("ccs/personal"));
587
588 assert!(!is_glm_like_agent("glm-4.7-free"));
590 assert!(!is_glm_like_agent("zai/glm-4.7"));
591 }
592
593 #[test]
594 fn test_agent_error_kind_should_retry() {
595 assert!(!AgentErrorKind::RateLimited.should_retry());
597 assert!(AgentErrorKind::ApiUnavailable.should_retry());
598 assert!(AgentErrorKind::NetworkError.should_retry());
599 assert!(AgentErrorKind::Timeout.should_retry());
600 assert!(AgentErrorKind::InvalidResponse.should_retry());
601 assert!(AgentErrorKind::Transient.should_retry());
602 assert!(AgentErrorKind::RetryableAgentQuirk.should_retry());
603
604 assert!(!AgentErrorKind::AuthFailure.should_retry());
605 assert!(!AgentErrorKind::CommandNotFound.should_retry());
606 assert!(!AgentErrorKind::Permanent.should_retry());
607 }
608
609 #[test]
610 fn test_agent_error_kind_should_immediate_agent_fallback() {
611 assert!(AgentErrorKind::RateLimited.should_immediate_agent_fallback());
613
614 assert!(!AgentErrorKind::ApiUnavailable.should_immediate_agent_fallback());
616 assert!(!AgentErrorKind::NetworkError.should_immediate_agent_fallback());
617 assert!(!AgentErrorKind::Timeout.should_immediate_agent_fallback());
618 assert!(!AgentErrorKind::AuthFailure.should_immediate_agent_fallback());
619 assert!(!AgentErrorKind::TokenExhausted.should_immediate_agent_fallback());
620 assert!(!AgentErrorKind::CommandNotFound.should_immediate_agent_fallback());
621 assert!(!AgentErrorKind::Permanent.should_immediate_agent_fallback());
622 assert!(!AgentErrorKind::Transient.should_immediate_agent_fallback());
623 }
624
625 #[test]
626 fn test_agent_error_kind_should_fallback() {
627 assert!(AgentErrorKind::TokenExhausted.should_fallback());
628 assert!(AgentErrorKind::AuthFailure.should_fallback());
629 assert!(AgentErrorKind::CommandNotFound.should_fallback());
630 assert!(AgentErrorKind::ProcessKilled.should_fallback());
631 assert!(AgentErrorKind::ToolExecutionFailed.should_fallback());
632 assert!(AgentErrorKind::AgentSpecificQuirk.should_fallback());
633
634 assert!(!AgentErrorKind::RateLimited.should_fallback());
635 assert!(!AgentErrorKind::Permanent.should_fallback());
636 }
637
638 #[test]
639 fn test_agent_error_kind_is_unrecoverable() {
640 assert!(AgentErrorKind::DiskFull.is_unrecoverable());
641 assert!(AgentErrorKind::Permanent.is_unrecoverable());
642
643 assert!(!AgentErrorKind::RateLimited.is_unrecoverable());
644 assert!(!AgentErrorKind::AuthFailure.is_unrecoverable());
645 }
646
647 #[test]
648 fn test_agent_error_kind_classify() {
649 assert_eq!(
651 classify(1, "rate limit exceeded"),
652 AgentErrorKind::RateLimited
653 );
654 assert_eq!(classify(1, "error 429"), AgentErrorKind::RateLimited);
655
656 assert_eq!(classify(1, "unauthorized"), AgentErrorKind::AuthFailure);
658 assert_eq!(classify(1, "error 401"), AgentErrorKind::AuthFailure);
659 assert_eq!(classify(1, "invalid token"), AgentErrorKind::AuthFailure);
661
662 assert_eq!(classify(127, ""), AgentErrorKind::CommandNotFound);
664 assert_eq!(
665 classify(1, "command not found"),
666 AgentErrorKind::CommandNotFound
667 );
668
669 assert_eq!(classify(137, ""), AgentErrorKind::ProcessKilled);
671 assert_eq!(classify(1, "out of memory"), AgentErrorKind::ProcessKilled);
672
673 assert_eq!(
675 classify(1, "write error"),
676 AgentErrorKind::ToolExecutionFailed
677 );
678 assert_eq!(
679 classify(1, "tool failed"),
680 AgentErrorKind::ToolExecutionFailed
681 );
682 assert_eq!(
683 classify(1, "failed to write"),
684 AgentErrorKind::ToolExecutionFailed
685 );
686
687 assert_eq!(
689 classify(1, "permission denied"),
690 AgentErrorKind::ToolExecutionFailed
691 );
692 assert_eq!(
693 classify(1, "operation not permitted"),
694 AgentErrorKind::ToolExecutionFailed
695 );
696 assert_eq!(
697 classify(1, "insufficient permissions"),
698 AgentErrorKind::ToolExecutionFailed
699 );
700
701 assert_eq!(
703 classify(7, "argument list too long"),
704 AgentErrorKind::ToolExecutionFailed
705 );
706 assert_eq!(
707 classify(
708 7,
709 "opencode: Argument list too long (prompt exceeds OS limit)"
710 ),
711 AgentErrorKind::ToolExecutionFailed
712 );
713
714 assert_eq!(classify(1, "access denied"), AgentErrorKind::AuthFailure);
716
717 assert_eq!(classify(1, "glm error"), AgentErrorKind::AgentSpecificQuirk);
719 assert_eq!(
720 classify(1, "ccs glm failed"),
721 AgentErrorKind::AgentSpecificQuirk
722 );
723
724 assert_eq!(classify(1, "some random error"), AgentErrorKind::Transient);
727
728 assert_eq!(
730 AgentErrorKind::classify_with_agent(1, "some random error", Some("ccs/glm"), None),
731 AgentErrorKind::RetryableAgentQuirk
732 );
733
734 assert_eq!(
736 AgentErrorKind::classify_with_agent(1, "permission denied", Some("ccs/glm"), None),
737 AgentErrorKind::ToolExecutionFailed );
739 assert_eq!(
740 AgentErrorKind::classify_with_agent(1, "token limit exceeded", Some("ccs/glm"), None),
741 AgentErrorKind::TokenExhausted );
743 assert_eq!(
744 AgentErrorKind::classify_with_agent(1, "disk full", Some("ccs/glm"), None),
745 AgentErrorKind::DiskFull );
747 assert_eq!(
749 AgentErrorKind::classify_with_agent(1, "glm failed", Some("ccs/glm"), None),
750 AgentErrorKind::AgentSpecificQuirk
751 );
752 }
753
754 #[test]
755 fn test_opencode_error_classification_not_treated_as_glm() {
756 assert_eq!(
761 AgentErrorKind::classify_with_agent(
762 1,
763 "some error occurred",
764 Some("opencode/opencode/glm-4.7-free"),
765 None
766 ),
767 AgentErrorKind::Transient
768 );
769
770 assert_eq!(
772 AgentErrorKind::classify_with_agent(
773 1,
774 "something happened",
775 Some("opencode/opencode/glm-4.7-free"),
776 None
777 ),
778 AgentErrorKind::Permanent
779 );
780
781 assert_eq!(
783 AgentErrorKind::classify_with_agent(
784 1,
785 "rate limit exceeded",
786 Some("opencode/zai/glm-4.7"),
787 None
788 ),
789 AgentErrorKind::RateLimited
790 );
791 }
792
793 #[test]
794 fn test_agent_error_kind_description_and_advice() {
795 let error = AgentErrorKind::RateLimited;
796 assert!(!error.description().is_empty());
797 assert!(!error.recovery_advice().is_empty());
798 }
799
800 #[test]
801 fn test_agent_error_kind_suggested_wait_ms() {
802 assert_eq!(AgentErrorKind::RateLimited.suggested_wait_ms(), 0);
804 assert_eq!(AgentErrorKind::Permanent.suggested_wait_ms(), 0);
805 assert!(AgentErrorKind::ApiUnavailable.suggested_wait_ms() > 0);
807 assert!(AgentErrorKind::NetworkError.suggested_wait_ms() > 0);
808 }
809
810 #[test]
811 fn test_agent_error_kind_suggests_smaller_context() {
812 assert!(AgentErrorKind::TokenExhausted.suggests_smaller_context());
813 assert!(AgentErrorKind::ProcessKilled.suggests_smaller_context());
814 assert!(!AgentErrorKind::RateLimited.suggests_smaller_context());
815 }
816}