1pub fn contains_glm_model(s: &str) -> bool {
14 let s_lower = s.to_lowercase();
15 s_lower.contains("glm")
16 || s_lower.contains("zhipuai")
17 || s_lower.contains("zai")
18 || s_lower.contains("qwen")
19 || s_lower.contains("deepseek")
20}
21
22pub fn is_glm_like_agent(s: &str) -> bool {
40 let s_lower = s.to_lowercase();
41
42 if !contains_glm_model(&s_lower) {
44 return false;
45 }
46
47 if s_lower.starts_with("opencode") {
49 return false;
50 }
51
52 s_lower.starts_with("ccs") || s_lower.contains("claude")
54}
55
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
63pub enum AgentErrorKind {
64 RateLimited,
66 TokenExhausted,
68 ApiUnavailable,
70 NetworkError,
72 AuthFailure,
74 CommandNotFound,
76 DiskFull,
78 ProcessKilled,
80 InvalidResponse,
82 Timeout,
84 ToolExecutionFailed,
86 AgentSpecificQuirk,
88 RetryableAgentQuirk,
90 Transient,
92 Permanent,
94}
95
96impl AgentErrorKind {
97 pub const fn should_retry(self) -> bool {
99 matches!(
100 self,
101 Self::RateLimited
102 | Self::ApiUnavailable
103 | Self::NetworkError
104 | Self::Timeout
105 | Self::InvalidResponse
106 | Self::RetryableAgentQuirk
107 | Self::Transient
108 )
109 }
110
111 pub const fn should_fallback(self) -> bool {
113 matches!(
114 self,
115 Self::TokenExhausted
116 | Self::AuthFailure
117 | Self::CommandNotFound
118 | Self::ProcessKilled
119 | Self::ToolExecutionFailed
120 | Self::AgentSpecificQuirk
121 )
122 }
123
124 pub const fn is_unrecoverable(self) -> bool {
126 matches!(self, Self::DiskFull | Self::Permanent)
127 }
128
129 pub const fn is_command_not_found(self) -> bool {
131 matches!(self, Self::CommandNotFound)
132 }
133
134 pub const fn is_network_error(self) -> bool {
136 matches!(self, Self::NetworkError | Self::Timeout)
137 }
138
139 pub const fn suggests_smaller_context(self) -> bool {
141 matches!(self, Self::TokenExhausted | Self::ProcessKilled)
142 }
143
144 pub const fn suggested_wait_ms(self) -> u64 {
146 match self {
147 Self::RateLimited => 5000, Self::ApiUnavailable => 3000, Self::NetworkError => 2000, Self::Timeout | Self::Transient | Self::RetryableAgentQuirk => 1000, Self::InvalidResponse => 500, _ => 0, }
154 }
155
156 pub const fn description(self) -> &'static str {
158 match self {
159 Self::RateLimited => "API rate limit exceeded",
160 Self::TokenExhausted => "Token/context limit exceeded",
161 Self::ApiUnavailable => "API service temporarily unavailable",
162 Self::NetworkError => "Network connectivity issue",
163 Self::AuthFailure => "Authentication failure",
164 Self::CommandNotFound => "Command not found",
165 Self::DiskFull => "Disk space exhausted",
166 Self::ProcessKilled => "Process terminated (possibly OOM)",
167 Self::InvalidResponse => "Invalid response from agent",
168 Self::Timeout => "Request timed out",
169 Self::ToolExecutionFailed => "Tool execution failed (e.g., file write)",
170 Self::AgentSpecificQuirk => "Known agent-specific issue",
171 Self::RetryableAgentQuirk => "Agent-specific issue (may be transient)",
172 Self::Transient => "Transient error",
173 Self::Permanent => "Permanent error",
174 }
175 }
176
177 pub const fn recovery_advice(self) -> &'static str {
179 match self {
180 Self::RateLimited => {
181 "Will retry after delay. Tip: Consider reducing request frequency or using a different provider."
182 }
183 Self::TokenExhausted => {
184 "Switching to alternative agent. Tip: Try RALPH_DEVELOPER_CONTEXT=0 or RALPH_REVIEWER_CONTEXT=0"
185 }
186 Self::ApiUnavailable => {
187 "API server issue. Will retry automatically. Tip: Check status page or try different provider."
188 }
189 Self::NetworkError => {
190 "Check your internet connection. Will retry automatically. Tip: Check firewall/VPN settings."
191 }
192 Self::AuthFailure => {
193 "Check API key or run 'agent auth' to authenticate. Tip: Verify credentials for this provider."
194 }
195 Self::CommandNotFound => {
196 "Agent binary not installed. See installation guidance below. Tip: Run 'ralph --list-available-agents'"
197 }
198 Self::DiskFull => "Free up disk space and try again. Tip: Check .agent directory size.",
199 Self::ProcessKilled => {
200 "Process was killed (possible OOM). Trying with smaller context. Tip: Reduce context with RALPH_*_CONTEXT=0"
201 }
202 Self::InvalidResponse => {
203 "Received malformed response. Retrying... Tip: May indicate parser mismatch with this agent."
204 }
205 Self::Timeout => {
206 "Request timed out. Will retry with longer timeout. Tip: Try reducing prompt size or context."
207 }
208 Self::ToolExecutionFailed => {
209 "Tool execution failed (file write/permissions). Switching agent. Tip: Check directory write permissions."
210 }
211 Self::AgentSpecificQuirk => {
212 "Known agent-specific issue. Switching to alternative agent. Tip: See docs/agent-compatibility.md"
213 }
214 Self::RetryableAgentQuirk => {
215 "Agent-specific issue that may be transient. Retrying... Tip: See docs/agent-compatibility.md"
216 }
217 Self::Transient => "Temporary issue. Will retry automatically.",
218 Self::Permanent => {
219 "Unrecoverable error. Check agent logs (.agent/logs/) and see docs/agent-compatibility.md for help."
220 }
221 }
222 }
223
224 pub fn classify_with_agent(
236 exit_code: i32,
237 stderr: &str,
238 agent_name: Option<&str>,
239 model_flag: Option<&str>,
240 ) -> Self {
241 let stderr_lower = stderr.to_lowercase();
242
243 if let Some(err) = Self::check_api_errors(&stderr_lower) {
246 return err;
247 }
248
249 if let Some(err) = Self::check_network_errors(&stderr_lower) {
250 return err;
251 }
252
253 if let Some(err) = Self::check_resource_errors(exit_code, &stderr_lower) {
254 return err;
255 }
256
257 if let Some(err) = Self::check_tool_failures(&stderr_lower) {
258 return err;
259 }
260
261 let is_problematic_agent =
267 agent_name.is_some_and(is_glm_like_agent) || model_flag.is_some_and(is_glm_like_agent);
268
269 if is_problematic_agent && exit_code == 1 {
270 let has_known_problematic_pattern = stderr_lower.contains("permission")
272 || stderr_lower.contains("denied")
273 || stderr_lower.contains("unauthorized")
274 || stderr_lower.contains("auth")
275 || stderr_lower.contains("token")
276 || stderr_lower.contains("limit")
277 || stderr_lower.contains("quota")
278 || stderr_lower.contains("disk")
279 || stderr_lower.contains("space")
280 || (stderr_lower.contains("glm") && stderr_lower.contains("failed"))
282 || (stderr_lower.contains("ccs") && stderr_lower.contains("failed"));
283
284 if has_known_problematic_pattern {
285 return Self::AgentSpecificQuirk;
287 }
288
289 return Self::RetryableAgentQuirk;
291 }
292
293 if let Some(err) = Self::check_agent_specific_quirks(&stderr_lower, exit_code) {
294 return err;
295 }
296
297 if let Some(err) = Self::check_command_not_found(exit_code, &stderr_lower) {
298 return err;
299 }
300
301 if exit_code == 1 && stderr_lower.contains("error") {
304 return Self::Transient;
307 }
308
309 Self::Permanent
310 }
311
312 fn check_api_errors(stderr_lower: &str) -> Option<Self> {
314 if stderr_lower.contains("rate limit")
316 || stderr_lower.contains("too many requests")
317 || stderr_lower.contains("429")
318 || stderr_lower.contains("quota exceeded")
319 {
320 return Some(Self::RateLimited);
321 }
322
323 if stderr_lower.contains("token")
328 || stderr_lower.contains("context length")
329 || stderr_lower.contains("maximum context")
330 || stderr_lower.contains("input too large")
331 || (stderr_lower.contains("too long")
332 && !stderr_lower.contains("argument list too long"))
333 {
334 return Some(Self::TokenExhausted);
335 }
336
337 if stderr_lower.contains("unauthorized")
339 || stderr_lower.contains("authentication")
340 || stderr_lower.contains("401")
341 || stderr_lower.contains("api key")
342 || stderr_lower.contains("invalid token")
343 || stderr_lower.contains("forbidden")
344 || stderr_lower.contains("403")
345 || stderr_lower.contains("access denied")
346 {
347 return Some(Self::AuthFailure);
348 }
349
350 None
351 }
352
353 fn check_network_errors(stderr_lower: &str) -> Option<Self> {
355 if stderr_lower.contains("connection refused")
357 || stderr_lower.contains("network unreachable")
358 || stderr_lower.contains("dns resolution")
359 || stderr_lower.contains("name resolution")
360 || stderr_lower.contains("no route to host")
361 || stderr_lower.contains("network is down")
362 || stderr_lower.contains("host unreachable")
363 || stderr_lower.contains("connection reset")
364 || stderr_lower.contains("broken pipe")
365 || stderr_lower.contains("econnrefused")
366 || stderr_lower.contains("enetunreach")
367 {
368 return Some(Self::NetworkError);
369 }
370
371 if stderr_lower.contains("service unavailable")
373 || stderr_lower.contains("503")
374 || stderr_lower.contains("502")
375 || stderr_lower.contains("504")
376 || stderr_lower.contains("500")
377 || stderr_lower.contains("internal server error")
378 || stderr_lower.contains("bad gateway")
379 || stderr_lower.contains("gateway timeout")
380 || stderr_lower.contains("overloaded")
381 || stderr_lower.contains("maintenance")
382 {
383 return Some(Self::ApiUnavailable);
384 }
385
386 if stderr_lower.contains("timeout")
388 || stderr_lower.contains("timed out")
389 || stderr_lower.contains("request timeout")
390 || stderr_lower.contains("deadline exceeded")
391 {
392 return Some(Self::Timeout);
393 }
394
395 None
396 }
397
398 fn check_resource_errors(exit_code: i32, stderr_lower: &str) -> Option<Self> {
400 if stderr_lower.contains("no space left")
402 || stderr_lower.contains("disk full")
403 || stderr_lower.contains("enospc")
404 || stderr_lower.contains("out of disk")
405 || stderr_lower.contains("insufficient storage")
406 {
407 return Some(Self::DiskFull);
408 }
409
410 if exit_code == 7
415 || stderr_lower.contains("argument list too long")
416 || stderr_lower.contains("e2big")
417 {
418 return Some(Self::ToolExecutionFailed);
419 }
420
421 if exit_code == 137
424 || exit_code == 139
425 || exit_code == -9
426 || stderr_lower.contains("killed")
427 || stderr_lower.contains("oom")
428 || stderr_lower.contains("out of memory")
429 || stderr_lower.contains("memory exhausted")
430 || stderr_lower.contains("cannot allocate")
431 || stderr_lower.contains("segmentation fault")
432 || stderr_lower.contains("sigsegv")
433 || stderr_lower.contains("sigkill")
434 {
435 return Some(Self::ProcessKilled);
436 }
437
438 None
439 }
440
441 fn check_tool_failures(stderr_lower: &str) -> Option<Self> {
443 if stderr_lower.contains("invalid json")
445 || stderr_lower.contains("json parse")
446 || stderr_lower.contains("unexpected token")
447 || stderr_lower.contains("malformed")
448 || stderr_lower.contains("truncated response")
449 || stderr_lower.contains("incomplete response")
450 {
451 return Some(Self::InvalidResponse);
452 }
453
454 if stderr_lower.contains("write error")
457 || stderr_lower.contains("cannot write")
458 || stderr_lower.contains("failed to write")
459 || stderr_lower.contains("unable to create file")
460 || stderr_lower.contains("file creation failed")
461 || stderr_lower.contains("i/o error")
462 || stderr_lower.contains("io error")
463 || stderr_lower.contains("tool failed")
464 || stderr_lower.contains("tool execution failed")
465 || stderr_lower.contains("tool call failed")
466 {
467 return Some(Self::ToolExecutionFailed);
468 }
469
470 if stderr_lower.contains("permission denied")
475 || stderr_lower.contains("operation not permitted")
476 || stderr_lower.contains("insufficient permissions")
477 || stderr_lower.contains("eacces")
478 || stderr_lower.contains("eperm")
479 {
480 return Some(Self::ToolExecutionFailed);
481 }
482
483 None
484 }
485
486 fn check_agent_specific_quirks(stderr_lower: &str, exit_code: i32) -> Option<Self> {
488 if stderr_lower.contains("ccs") || stderr_lower.contains("glm") {
492 if exit_code == 1 {
494 return Some(Self::AgentSpecificQuirk);
495 }
496 if stderr_lower.contains("ccs") && stderr_lower.contains("failed") {
498 return Some(Self::AgentSpecificQuirk);
499 }
500 if stderr_lower.contains("glm")
502 && (stderr_lower.contains("permission")
503 || stderr_lower.contains("denied")
504 || stderr_lower.contains("unauthorized"))
505 {
506 return Some(Self::AgentSpecificQuirk);
507 }
508 }
509
510 if stderr_lower.contains("glm") && exit_code == 1 {
512 return Some(Self::AgentSpecificQuirk);
513 }
514
515 None
516 }
517
518 fn check_command_not_found(exit_code: i32, stderr_lower: &str) -> Option<Self> {
520 if exit_code == 127
523 || exit_code == 126
524 || stderr_lower.contains("command not found")
525 || stderr_lower.contains("not found")
526 || stderr_lower.contains("no such file")
527 {
528 return Some(Self::CommandNotFound);
529 }
530
531 None
532 }
533}
534
535#[cfg(test)]
536mod tests {
537 use super::*;
538
539 fn classify(exit_code: i32, stderr: &str) -> AgentErrorKind {
540 AgentErrorKind::classify_with_agent(exit_code, stderr, None, None)
541 }
542
543 #[test]
544 fn test_is_glm_like_agent() {
545 assert!(is_glm_like_agent("ccs/glm"));
547 assert!(is_glm_like_agent("ccs/zai"));
548 assert!(is_glm_like_agent("ccs/zhipuai"));
549 assert!(is_glm_like_agent("ccs/qwen"));
550 assert!(is_glm_like_agent("ccs/deepseek"));
551 assert!(is_glm_like_agent("CCS/GLM")); assert!(is_glm_like_agent("claude -m glm-4"));
555
556 assert!(!is_glm_like_agent("opencode/opencode/glm-4.7-free"));
558 assert!(!is_glm_like_agent("opencode/zai/glm-4.7"));
559 assert!(!is_glm_like_agent("opencode run -m glm"));
560
561 assert!(!is_glm_like_agent("claude"));
563 assert!(!is_glm_like_agent("codex"));
564 assert!(!is_glm_like_agent("ccs/work"));
565 assert!(!is_glm_like_agent("ccs/personal"));
566
567 assert!(!is_glm_like_agent("glm-4.7-free"));
569 assert!(!is_glm_like_agent("zai/glm-4.7"));
570 }
571
572 #[test]
573 fn test_agent_error_kind_should_retry() {
574 assert!(AgentErrorKind::RateLimited.should_retry());
575 assert!(AgentErrorKind::ApiUnavailable.should_retry());
576 assert!(AgentErrorKind::NetworkError.should_retry());
577 assert!(AgentErrorKind::Timeout.should_retry());
578 assert!(AgentErrorKind::InvalidResponse.should_retry());
579 assert!(AgentErrorKind::Transient.should_retry());
580 assert!(AgentErrorKind::RetryableAgentQuirk.should_retry());
581
582 assert!(!AgentErrorKind::AuthFailure.should_retry());
583 assert!(!AgentErrorKind::CommandNotFound.should_retry());
584 assert!(!AgentErrorKind::Permanent.should_retry());
585 }
586
587 #[test]
588 fn test_agent_error_kind_should_fallback() {
589 assert!(AgentErrorKind::TokenExhausted.should_fallback());
590 assert!(AgentErrorKind::AuthFailure.should_fallback());
591 assert!(AgentErrorKind::CommandNotFound.should_fallback());
592 assert!(AgentErrorKind::ProcessKilled.should_fallback());
593 assert!(AgentErrorKind::ToolExecutionFailed.should_fallback());
594 assert!(AgentErrorKind::AgentSpecificQuirk.should_fallback());
595
596 assert!(!AgentErrorKind::RateLimited.should_fallback());
597 assert!(!AgentErrorKind::Permanent.should_fallback());
598 }
599
600 #[test]
601 fn test_agent_error_kind_is_unrecoverable() {
602 assert!(AgentErrorKind::DiskFull.is_unrecoverable());
603 assert!(AgentErrorKind::Permanent.is_unrecoverable());
604
605 assert!(!AgentErrorKind::RateLimited.is_unrecoverable());
606 assert!(!AgentErrorKind::AuthFailure.is_unrecoverable());
607 }
608
609 #[test]
610 fn test_agent_error_kind_classify() {
611 assert_eq!(
613 classify(1, "rate limit exceeded"),
614 AgentErrorKind::RateLimited
615 );
616 assert_eq!(classify(1, "error 429"), AgentErrorKind::RateLimited);
617
618 assert_eq!(classify(1, "unauthorized"), AgentErrorKind::AuthFailure);
620 assert_eq!(classify(1, "error 401"), AgentErrorKind::AuthFailure);
621
622 assert_eq!(classify(127, ""), AgentErrorKind::CommandNotFound);
624 assert_eq!(
625 classify(1, "command not found"),
626 AgentErrorKind::CommandNotFound
627 );
628
629 assert_eq!(classify(137, ""), AgentErrorKind::ProcessKilled);
631 assert_eq!(classify(1, "out of memory"), AgentErrorKind::ProcessKilled);
632
633 assert_eq!(
635 classify(1, "write error"),
636 AgentErrorKind::ToolExecutionFailed
637 );
638 assert_eq!(
639 classify(1, "tool failed"),
640 AgentErrorKind::ToolExecutionFailed
641 );
642 assert_eq!(
643 classify(1, "failed to write"),
644 AgentErrorKind::ToolExecutionFailed
645 );
646
647 assert_eq!(
649 classify(1, "permission denied"),
650 AgentErrorKind::ToolExecutionFailed
651 );
652 assert_eq!(
653 classify(1, "operation not permitted"),
654 AgentErrorKind::ToolExecutionFailed
655 );
656 assert_eq!(
657 classify(1, "insufficient permissions"),
658 AgentErrorKind::ToolExecutionFailed
659 );
660
661 assert_eq!(
663 classify(7, "argument list too long"),
664 AgentErrorKind::ToolExecutionFailed
665 );
666 assert_eq!(
667 classify(
668 7,
669 "opencode: Argument list too long (prompt exceeds OS limit)"
670 ),
671 AgentErrorKind::ToolExecutionFailed
672 );
673
674 assert_eq!(classify(1, "access denied"), AgentErrorKind::AuthFailure);
676
677 assert_eq!(classify(1, "glm error"), AgentErrorKind::AgentSpecificQuirk);
679 assert_eq!(
680 classify(1, "ccs glm failed"),
681 AgentErrorKind::AgentSpecificQuirk
682 );
683
684 assert_eq!(classify(1, "some random error"), AgentErrorKind::Transient);
687
688 assert_eq!(
690 AgentErrorKind::classify_with_agent(1, "some random error", Some("ccs/glm"), None),
691 AgentErrorKind::RetryableAgentQuirk
692 );
693
694 assert_eq!(
696 AgentErrorKind::classify_with_agent(1, "permission denied", Some("ccs/glm"), None),
697 AgentErrorKind::ToolExecutionFailed );
699 assert_eq!(
700 AgentErrorKind::classify_with_agent(1, "token limit exceeded", Some("ccs/glm"), None),
701 AgentErrorKind::TokenExhausted );
703 assert_eq!(
704 AgentErrorKind::classify_with_agent(1, "disk full", Some("ccs/glm"), None),
705 AgentErrorKind::DiskFull );
707 assert_eq!(
709 AgentErrorKind::classify_with_agent(1, "glm failed", Some("ccs/glm"), None),
710 AgentErrorKind::AgentSpecificQuirk
711 );
712 }
713
714 #[test]
715 fn test_opencode_error_classification_not_treated_as_glm() {
716 assert_eq!(
721 AgentErrorKind::classify_with_agent(
722 1,
723 "some error occurred",
724 Some("opencode/opencode/glm-4.7-free"),
725 None
726 ),
727 AgentErrorKind::Transient
728 );
729
730 assert_eq!(
732 AgentErrorKind::classify_with_agent(
733 1,
734 "something happened",
735 Some("opencode/opencode/glm-4.7-free"),
736 None
737 ),
738 AgentErrorKind::Permanent
739 );
740
741 assert_eq!(
743 AgentErrorKind::classify_with_agent(
744 1,
745 "rate limit exceeded",
746 Some("opencode/zai/glm-4.7"),
747 None
748 ),
749 AgentErrorKind::RateLimited
750 );
751 }
752
753 #[test]
754 fn test_agent_error_kind_description_and_advice() {
755 let error = AgentErrorKind::RateLimited;
756 assert!(!error.description().is_empty());
757 assert!(!error.recovery_advice().is_empty());
758 }
759
760 #[test]
761 fn test_agent_error_kind_suggested_wait_ms() {
762 assert_eq!(AgentErrorKind::RateLimited.suggested_wait_ms(), 5000);
763 assert_eq!(AgentErrorKind::Permanent.suggested_wait_ms(), 0);
764 }
765
766 #[test]
767 fn test_agent_error_kind_suggests_smaller_context() {
768 assert!(AgentErrorKind::TokenExhausted.suggests_smaller_context());
769 assert!(AgentErrorKind::ProcessKilled.suggests_smaller_context());
770 assert!(!AgentErrorKind::RateLimited.suggests_smaller_context());
771 }
772}