1pub fn contains_glm_model(s: &str) -> bool {
14 let s_lower = s.to_lowercase();
15 s_lower.contains("glm")
16 || s_lower.contains("zhipuai")
17 || s_lower.contains("zai")
18 || s_lower.contains("qwen")
19 || s_lower.contains("deepseek")
20}
21
22pub fn is_glm_like_agent(s: &str) -> bool {
40 let s_lower = s.to_lowercase();
41
42 if !contains_glm_model(&s_lower) {
44 return false;
45 }
46
47 if s_lower.starts_with("opencode") {
49 return false;
50 }
51
52 s_lower.starts_with("ccs") || s_lower.contains("claude")
54}
55
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
63pub enum AgentErrorKind {
64 RateLimited,
66 TokenExhausted,
68 ApiUnavailable,
70 NetworkError,
72 AuthFailure,
74 CommandNotFound,
76 DiskFull,
78 ProcessKilled,
80 InvalidResponse,
82 Timeout,
84 ToolExecutionFailed,
86 AgentSpecificQuirk,
88 RetryableAgentQuirk,
90 Transient,
92 Permanent,
94}
95
96impl AgentErrorKind {
97 pub const fn should_retry(self) -> bool {
99 matches!(
100 self,
101 Self::RateLimited
102 | Self::ApiUnavailable
103 | Self::NetworkError
104 | Self::Timeout
105 | Self::InvalidResponse
106 | Self::RetryableAgentQuirk
107 | Self::Transient
108 )
109 }
110
111 pub const fn should_fallback(self) -> bool {
113 matches!(
114 self,
115 Self::TokenExhausted
116 | Self::AuthFailure
117 | Self::CommandNotFound
118 | Self::ProcessKilled
119 | Self::ToolExecutionFailed
120 | Self::AgentSpecificQuirk
121 )
122 }
123
124 pub const fn is_unrecoverable(self) -> bool {
126 matches!(self, Self::DiskFull | Self::Permanent)
127 }
128
129 pub const fn is_command_not_found(self) -> bool {
131 matches!(self, Self::CommandNotFound)
132 }
133
134 pub const fn is_network_error(self) -> bool {
136 matches!(self, Self::NetworkError | Self::Timeout)
137 }
138
139 pub const fn suggests_smaller_context(self) -> bool {
141 matches!(self, Self::TokenExhausted | Self::ProcessKilled)
142 }
143
144 pub const fn suggested_wait_ms(self) -> u64 {
146 match self {
147 Self::RateLimited => 5000, Self::ApiUnavailable => 3000, Self::NetworkError => 2000, Self::Timeout | Self::Transient | Self::RetryableAgentQuirk => 1000, Self::InvalidResponse => 500, _ => 0, }
154 }
155
156 pub const fn description(self) -> &'static str {
158 match self {
159 Self::RateLimited => "API rate limit exceeded",
160 Self::TokenExhausted => "Token/context limit exceeded",
161 Self::ApiUnavailable => "API service temporarily unavailable",
162 Self::NetworkError => "Network connectivity issue",
163 Self::AuthFailure => "Authentication failure",
164 Self::CommandNotFound => "Command not found",
165 Self::DiskFull => "Disk space exhausted",
166 Self::ProcessKilled => "Process terminated (possibly OOM)",
167 Self::InvalidResponse => "Invalid response from agent",
168 Self::Timeout => "Request timed out",
169 Self::ToolExecutionFailed => "Tool execution failed (e.g., file write)",
170 Self::AgentSpecificQuirk => "Known agent-specific issue",
171 Self::RetryableAgentQuirk => "Agent-specific issue (may be transient)",
172 Self::Transient => "Transient error",
173 Self::Permanent => "Permanent error",
174 }
175 }
176
177 pub const fn recovery_advice(self) -> &'static str {
179 match self {
180 Self::RateLimited => {
181 "Will retry after delay. Tip: Consider reducing request frequency or using a different provider."
182 }
183 Self::TokenExhausted => {
184 "Switching to alternative agent. Tip: Try RALPH_DEVELOPER_CONTEXT=0 or RALPH_REVIEWER_CONTEXT=0"
185 }
186 Self::ApiUnavailable => {
187 "API server issue. Will retry automatically. Tip: Check status page or try different provider."
188 }
189 Self::NetworkError => {
190 "Check your internet connection. Will retry automatically. Tip: Check firewall/VPN settings."
191 }
192 Self::AuthFailure => {
193 "Check API key or run 'agent auth' to authenticate. Tip: Verify credentials for this provider."
194 }
195 Self::CommandNotFound => {
196 "Agent binary not installed. See installation guidance below. Tip: Run 'ralph --list-available-agents'"
197 }
198 Self::DiskFull => "Free up disk space and try again. Tip: Check .agent directory size.",
199 Self::ProcessKilled => {
200 "Process was killed (possible OOM). Trying with smaller context. Tip: Reduce context with RALPH_*_CONTEXT=0"
201 }
202 Self::InvalidResponse => {
203 "Received malformed response. Retrying... Tip: May indicate parser mismatch with this agent."
204 }
205 Self::Timeout => {
206 "Request timed out. Will retry with longer timeout. Tip: Try reducing prompt size or context."
207 }
208 Self::ToolExecutionFailed => {
209 "Tool execution failed (file write/permissions). Switching agent. Tip: Check directory write permissions."
210 }
211 Self::AgentSpecificQuirk => {
212 "Known agent-specific issue. Switching to alternative agent. Tip: See docs/agent-compatibility.md"
213 }
214 Self::RetryableAgentQuirk => {
215 "Agent-specific issue that may be transient. Retrying... Tip: See docs/agent-compatibility.md"
216 }
217 Self::Transient => "Temporary issue. Will retry automatically.",
218 Self::Permanent => {
219 "Unrecoverable error. Check agent logs (.agent/logs/) and see docs/agent-compatibility.md for help."
220 }
221 }
222 }
223
224 pub fn classify_with_agent(
236 exit_code: i32,
237 stderr: &str,
238 agent_name: Option<&str>,
239 model_flag: Option<&str>,
240 ) -> Self {
241 let stderr_lower = stderr.to_lowercase();
242
243 if let Some(err) = Self::check_api_errors(&stderr_lower) {
246 return err;
247 }
248
249 if let Some(err) = Self::check_network_errors(&stderr_lower) {
250 return err;
251 }
252
253 if let Some(err) = Self::check_resource_errors(exit_code, &stderr_lower) {
254 return err;
255 }
256
257 if let Some(err) = Self::check_tool_failures(&stderr_lower) {
258 return err;
259 }
260
261 let is_problematic_agent =
267 agent_name.is_some_and(is_glm_like_agent) || model_flag.is_some_and(is_glm_like_agent);
268
269 if is_problematic_agent && exit_code == 1 {
270 let has_known_problematic_pattern = stderr_lower.contains("permission")
272 || stderr_lower.contains("denied")
273 || stderr_lower.contains("unauthorized")
274 || stderr_lower.contains("auth")
275 || stderr_lower.contains("token")
276 || stderr_lower.contains("limit")
277 || stderr_lower.contains("quota")
278 || stderr_lower.contains("disk")
279 || stderr_lower.contains("space")
280 || (stderr_lower.contains("glm") && stderr_lower.contains("failed"))
282 || (stderr_lower.contains("ccs") && stderr_lower.contains("failed"));
283
284 if has_known_problematic_pattern {
285 return Self::AgentSpecificQuirk;
287 }
288
289 return Self::RetryableAgentQuirk;
291 }
292
293 if let Some(err) = Self::check_agent_specific_quirks(&stderr_lower, exit_code) {
294 return err;
295 }
296
297 if let Some(err) = Self::check_command_not_found(exit_code, &stderr_lower) {
298 return err;
299 }
300
301 if exit_code == 1 && stderr_lower.contains("error") {
304 return Self::Transient;
307 }
308
309 Self::Permanent
310 }
311
312 fn check_api_errors(stderr_lower: &str) -> Option<Self> {
314 if stderr_lower.contains("rate limit")
316 || stderr_lower.contains("too many requests")
317 || stderr_lower.contains("429")
318 || stderr_lower.contains("quota exceeded")
319 {
320 return Some(Self::RateLimited);
321 }
322
323 if stderr_lower.contains("token")
326 || stderr_lower.contains("context length")
327 || stderr_lower.contains("maximum context")
328 || stderr_lower.contains("too long")
329 || stderr_lower.contains("input too large")
330 {
331 return Some(Self::TokenExhausted);
332 }
333
334 if stderr_lower.contains("unauthorized")
336 || stderr_lower.contains("authentication")
337 || stderr_lower.contains("401")
338 || stderr_lower.contains("api key")
339 || stderr_lower.contains("invalid token")
340 || stderr_lower.contains("forbidden")
341 || stderr_lower.contains("403")
342 || stderr_lower.contains("access denied")
343 {
344 return Some(Self::AuthFailure);
345 }
346
347 None
348 }
349
350 fn check_network_errors(stderr_lower: &str) -> Option<Self> {
352 if stderr_lower.contains("connection refused")
354 || stderr_lower.contains("network unreachable")
355 || stderr_lower.contains("dns resolution")
356 || stderr_lower.contains("name resolution")
357 || stderr_lower.contains("no route to host")
358 || stderr_lower.contains("network is down")
359 || stderr_lower.contains("host unreachable")
360 || stderr_lower.contains("connection reset")
361 || stderr_lower.contains("broken pipe")
362 || stderr_lower.contains("econnrefused")
363 || stderr_lower.contains("enetunreach")
364 {
365 return Some(Self::NetworkError);
366 }
367
368 if stderr_lower.contains("service unavailable")
370 || stderr_lower.contains("503")
371 || stderr_lower.contains("502")
372 || stderr_lower.contains("504")
373 || stderr_lower.contains("500")
374 || stderr_lower.contains("internal server error")
375 || stderr_lower.contains("bad gateway")
376 || stderr_lower.contains("gateway timeout")
377 || stderr_lower.contains("overloaded")
378 || stderr_lower.contains("maintenance")
379 {
380 return Some(Self::ApiUnavailable);
381 }
382
383 if stderr_lower.contains("timeout")
385 || stderr_lower.contains("timed out")
386 || stderr_lower.contains("request timeout")
387 || stderr_lower.contains("deadline exceeded")
388 {
389 return Some(Self::Timeout);
390 }
391
392 None
393 }
394
395 fn check_resource_errors(exit_code: i32, stderr_lower: &str) -> Option<Self> {
397 if stderr_lower.contains("no space left")
399 || stderr_lower.contains("disk full")
400 || stderr_lower.contains("enospc")
401 || stderr_lower.contains("out of disk")
402 || stderr_lower.contains("insufficient storage")
403 {
404 return Some(Self::DiskFull);
405 }
406
407 if exit_code == 137
410 || exit_code == 139
411 || exit_code == -9
412 || stderr_lower.contains("killed")
413 || stderr_lower.contains("oom")
414 || stderr_lower.contains("out of memory")
415 || stderr_lower.contains("memory exhausted")
416 || stderr_lower.contains("cannot allocate")
417 || stderr_lower.contains("segmentation fault")
418 || stderr_lower.contains("sigsegv")
419 || stderr_lower.contains("sigkill")
420 {
421 return Some(Self::ProcessKilled);
422 }
423
424 None
425 }
426
427 fn check_tool_failures(stderr_lower: &str) -> Option<Self> {
429 if stderr_lower.contains("invalid json")
431 || stderr_lower.contains("json parse")
432 || stderr_lower.contains("unexpected token")
433 || stderr_lower.contains("malformed")
434 || stderr_lower.contains("truncated response")
435 || stderr_lower.contains("incomplete response")
436 {
437 return Some(Self::InvalidResponse);
438 }
439
440 if stderr_lower.contains("write error")
443 || stderr_lower.contains("cannot write")
444 || stderr_lower.contains("failed to write")
445 || stderr_lower.contains("unable to create file")
446 || stderr_lower.contains("file creation failed")
447 || stderr_lower.contains("i/o error")
448 || stderr_lower.contains("io error")
449 || stderr_lower.contains("tool failed")
450 || stderr_lower.contains("tool execution failed")
451 || stderr_lower.contains("tool call failed")
452 {
453 return Some(Self::ToolExecutionFailed);
454 }
455
456 if stderr_lower.contains("permission denied")
461 || stderr_lower.contains("operation not permitted")
462 || stderr_lower.contains("insufficient permissions")
463 || stderr_lower.contains("eacces")
464 || stderr_lower.contains("eperm")
465 {
466 return Some(Self::ToolExecutionFailed);
467 }
468
469 None
470 }
471
472 fn check_agent_specific_quirks(stderr_lower: &str, exit_code: i32) -> Option<Self> {
474 if stderr_lower.contains("ccs") || stderr_lower.contains("glm") {
478 if exit_code == 1 {
480 return Some(Self::AgentSpecificQuirk);
481 }
482 if stderr_lower.contains("ccs") && stderr_lower.contains("failed") {
484 return Some(Self::AgentSpecificQuirk);
485 }
486 if stderr_lower.contains("glm")
488 && (stderr_lower.contains("permission")
489 || stderr_lower.contains("denied")
490 || stderr_lower.contains("unauthorized"))
491 {
492 return Some(Self::AgentSpecificQuirk);
493 }
494 }
495
496 if stderr_lower.contains("glm") && exit_code == 1 {
498 return Some(Self::AgentSpecificQuirk);
499 }
500
501 None
502 }
503
504 fn check_command_not_found(exit_code: i32, stderr_lower: &str) -> Option<Self> {
506 if exit_code == 127
509 || exit_code == 126
510 || stderr_lower.contains("command not found")
511 || stderr_lower.contains("not found")
512 || stderr_lower.contains("no such file")
513 {
514 return Some(Self::CommandNotFound);
515 }
516
517 None
518 }
519}
520
521#[cfg(test)]
522mod tests {
523 use super::*;
524
525 fn classify(exit_code: i32, stderr: &str) -> AgentErrorKind {
526 AgentErrorKind::classify_with_agent(exit_code, stderr, None, None)
527 }
528
529 #[test]
530 fn test_is_glm_like_agent() {
531 assert!(is_glm_like_agent("ccs/glm"));
533 assert!(is_glm_like_agent("ccs/zai"));
534 assert!(is_glm_like_agent("ccs/zhipuai"));
535 assert!(is_glm_like_agent("ccs/qwen"));
536 assert!(is_glm_like_agent("ccs/deepseek"));
537 assert!(is_glm_like_agent("CCS/GLM")); assert!(is_glm_like_agent("claude -m glm-4"));
541
542 assert!(!is_glm_like_agent("opencode/opencode/glm-4.7-free"));
544 assert!(!is_glm_like_agent("opencode/zai/glm-4.7"));
545 assert!(!is_glm_like_agent("opencode run -m glm"));
546
547 assert!(!is_glm_like_agent("claude"));
549 assert!(!is_glm_like_agent("codex"));
550 assert!(!is_glm_like_agent("ccs/work"));
551 assert!(!is_glm_like_agent("ccs/personal"));
552
553 assert!(!is_glm_like_agent("glm-4.7-free"));
555 assert!(!is_glm_like_agent("zai/glm-4.7"));
556 }
557
558 #[test]
559 fn test_agent_error_kind_should_retry() {
560 assert!(AgentErrorKind::RateLimited.should_retry());
561 assert!(AgentErrorKind::ApiUnavailable.should_retry());
562 assert!(AgentErrorKind::NetworkError.should_retry());
563 assert!(AgentErrorKind::Timeout.should_retry());
564 assert!(AgentErrorKind::InvalidResponse.should_retry());
565 assert!(AgentErrorKind::Transient.should_retry());
566 assert!(AgentErrorKind::RetryableAgentQuirk.should_retry());
567
568 assert!(!AgentErrorKind::AuthFailure.should_retry());
569 assert!(!AgentErrorKind::CommandNotFound.should_retry());
570 assert!(!AgentErrorKind::Permanent.should_retry());
571 }
572
573 #[test]
574 fn test_agent_error_kind_should_fallback() {
575 assert!(AgentErrorKind::TokenExhausted.should_fallback());
576 assert!(AgentErrorKind::AuthFailure.should_fallback());
577 assert!(AgentErrorKind::CommandNotFound.should_fallback());
578 assert!(AgentErrorKind::ProcessKilled.should_fallback());
579 assert!(AgentErrorKind::ToolExecutionFailed.should_fallback());
580 assert!(AgentErrorKind::AgentSpecificQuirk.should_fallback());
581
582 assert!(!AgentErrorKind::RateLimited.should_fallback());
583 assert!(!AgentErrorKind::Permanent.should_fallback());
584 }
585
586 #[test]
587 fn test_agent_error_kind_is_unrecoverable() {
588 assert!(AgentErrorKind::DiskFull.is_unrecoverable());
589 assert!(AgentErrorKind::Permanent.is_unrecoverable());
590
591 assert!(!AgentErrorKind::RateLimited.is_unrecoverable());
592 assert!(!AgentErrorKind::AuthFailure.is_unrecoverable());
593 }
594
595 #[test]
596 fn test_agent_error_kind_classify() {
597 assert_eq!(
599 classify(1, "rate limit exceeded"),
600 AgentErrorKind::RateLimited
601 );
602 assert_eq!(classify(1, "error 429"), AgentErrorKind::RateLimited);
603
604 assert_eq!(classify(1, "unauthorized"), AgentErrorKind::AuthFailure);
606 assert_eq!(classify(1, "error 401"), AgentErrorKind::AuthFailure);
607
608 assert_eq!(classify(127, ""), AgentErrorKind::CommandNotFound);
610 assert_eq!(
611 classify(1, "command not found"),
612 AgentErrorKind::CommandNotFound
613 );
614
615 assert_eq!(classify(137, ""), AgentErrorKind::ProcessKilled);
617 assert_eq!(classify(1, "out of memory"), AgentErrorKind::ProcessKilled);
618
619 assert_eq!(
621 classify(1, "write error"),
622 AgentErrorKind::ToolExecutionFailed
623 );
624 assert_eq!(
625 classify(1, "tool failed"),
626 AgentErrorKind::ToolExecutionFailed
627 );
628 assert_eq!(
629 classify(1, "failed to write"),
630 AgentErrorKind::ToolExecutionFailed
631 );
632
633 assert_eq!(
635 classify(1, "permission denied"),
636 AgentErrorKind::ToolExecutionFailed
637 );
638 assert_eq!(
639 classify(1, "operation not permitted"),
640 AgentErrorKind::ToolExecutionFailed
641 );
642 assert_eq!(
643 classify(1, "insufficient permissions"),
644 AgentErrorKind::ToolExecutionFailed
645 );
646
647 assert_eq!(classify(1, "access denied"), AgentErrorKind::AuthFailure);
649
650 assert_eq!(classify(1, "glm error"), AgentErrorKind::AgentSpecificQuirk);
652 assert_eq!(
653 classify(1, "ccs glm failed"),
654 AgentErrorKind::AgentSpecificQuirk
655 );
656
657 assert_eq!(classify(1, "some random error"), AgentErrorKind::Transient);
660
661 assert_eq!(
663 AgentErrorKind::classify_with_agent(1, "some random error", Some("ccs/glm"), None),
664 AgentErrorKind::RetryableAgentQuirk
665 );
666
667 assert_eq!(
669 AgentErrorKind::classify_with_agent(1, "permission denied", Some("ccs/glm"), None),
670 AgentErrorKind::ToolExecutionFailed );
672 assert_eq!(
673 AgentErrorKind::classify_with_agent(1, "token limit exceeded", Some("ccs/glm"), None),
674 AgentErrorKind::TokenExhausted );
676 assert_eq!(
677 AgentErrorKind::classify_with_agent(1, "disk full", Some("ccs/glm"), None),
678 AgentErrorKind::DiskFull );
680 assert_eq!(
682 AgentErrorKind::classify_with_agent(1, "glm failed", Some("ccs/glm"), None),
683 AgentErrorKind::AgentSpecificQuirk
684 );
685 }
686
687 #[test]
688 fn test_opencode_error_classification_not_treated_as_glm() {
689 assert_eq!(
694 AgentErrorKind::classify_with_agent(
695 1,
696 "some error occurred",
697 Some("opencode/opencode/glm-4.7-free"),
698 None
699 ),
700 AgentErrorKind::Transient
701 );
702
703 assert_eq!(
705 AgentErrorKind::classify_with_agent(
706 1,
707 "something happened",
708 Some("opencode/opencode/glm-4.7-free"),
709 None
710 ),
711 AgentErrorKind::Permanent
712 );
713
714 assert_eq!(
716 AgentErrorKind::classify_with_agent(
717 1,
718 "rate limit exceeded",
719 Some("opencode/zai/glm-4.7"),
720 None
721 ),
722 AgentErrorKind::RateLimited
723 );
724 }
725
726 #[test]
727 fn test_agent_error_kind_description_and_advice() {
728 let error = AgentErrorKind::RateLimited;
729 assert!(!error.description().is_empty());
730 assert!(!error.recovery_advice().is_empty());
731 }
732
733 #[test]
734 fn test_agent_error_kind_suggested_wait_ms() {
735 assert_eq!(AgentErrorKind::RateLimited.suggested_wait_ms(), 5000);
736 assert_eq!(AgentErrorKind::Permanent.suggested_wait_ms(), 0);
737 }
738
739 #[test]
740 fn test_agent_error_kind_suggests_smaller_context() {
741 assert!(AgentErrorKind::TokenExhausted.suggests_smaller_context());
742 assert!(AgentErrorKind::ProcessKilled.suggests_smaller_context());
743 assert!(!AgentErrorKind::RateLimited.suggests_smaller_context());
744 }
745}