1use std::fmt;
17
18use anyhow::anyhow;
19
20use crate::contracts::Runner;
21use crate::redaction::RedactedString;
22
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub(crate) enum RunnerFailureClass {
26 Retryable(RetryableReason),
28 RequiresUserInput(UserInputReason),
30 NonRetryable(NonRetryableReason),
32}
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub(crate) enum RetryableReason {
37 RateLimited,
39 TemporaryUnavailable,
41 TransientIo,
43}
44
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
47pub(crate) enum UserInputReason {
48 Auth,
50 MissingBinary,
52}
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub(crate) enum NonRetryableReason {
57 InvalidInvocation,
59 FatalExit,
61}
62
63#[derive(Debug, thiserror::Error)]
64pub enum RunnerError {
65 #[error("runner binary not found: {bin}")]
66 BinaryMissing {
67 bin: String,
68 #[source]
69 source: std::io::Error,
70 },
71
72 #[error("runner failed to spawn: {bin}")]
73 SpawnFailed {
74 bin: String,
75 #[source]
76 source: std::io::Error,
77 },
78
79 #[error("runner exited non-zero (code={code})\nstdout: {stdout}\nstderr: {stderr}")]
80 NonZeroExit {
81 code: i32,
82 stdout: RedactedString,
83 stderr: RedactedString,
84 session_id: Option<String>,
85 },
86
87 #[error("runner terminated by signal (signal={signal:?})\nstdout: {stdout}\nstderr: {stderr}")]
88 TerminatedBySignal {
89 signal: Option<i32>,
90 stdout: RedactedString,
91 stderr: RedactedString,
92 session_id: Option<String>,
93 },
94
95 #[error("runner interrupted")]
96 Interrupted,
97
98 #[error("runner timed out")]
99 Timeout,
100
101 #[error("io error: {0}")]
102 Io(#[from] std::io::Error),
103
104 #[error("other error: {0}")]
105 Other(#[from] anyhow::Error),
106}
107
108fn runner_label(runner: &Runner) -> String {
109 match runner {
110 Runner::Codex => "codex".to_string(),
111 Runner::Opencode => "opencode".to_string(),
112 Runner::Gemini => "gemini".to_string(),
113 Runner::Cursor => "cursor".to_string(),
114 Runner::Claude => "claude".to_string(),
115 Runner::Kimi => "kimi".to_string(),
116 Runner::Pi => "pi".to_string(),
117 Runner::Plugin(id) => format!("plugin:{}", id),
118 }
119}
120
121fn looks_like_rate_limit(text: &str) -> bool {
123 let lower = text.to_lowercase();
124 lower.contains("429")
125 || lower.contains("rate limit")
126 || lower.contains("too many requests")
127 || lower.contains("quota exceeded")
128 || lower.contains("throttled")
129}
130
131fn looks_like_temporary_unavailable(text: &str) -> bool {
133 let lower = text.to_lowercase();
134 lower.contains("503")
135 || lower.contains("service unavailable")
136 || lower.contains("temporarily unavailable")
137 || lower.contains("gateway timeout")
138 || lower.contains("502")
139 || lower.contains("504")
140}
141
142fn looks_like_auth_required(_runner: &Runner, text: &str) -> bool {
144 let lower = text.to_lowercase();
145 lower.contains("401")
146 || lower.contains("unauthorized")
147 || lower.contains("invalid api key")
148 || lower.contains("not logged in")
149 || lower.contains("authentication failed")
150 || lower.contains("access denied")
151}
152
153fn classify_textual_failure(
155 runner: &Runner,
156 _code: i32,
157 stdout: &str,
158 stderr: &str,
159) -> RunnerFailureClass {
160 let combined = format!("{} {}", stdout, stderr);
161 let text = combined.to_lowercase();
162
163 if looks_like_rate_limit(&text) {
164 return RunnerFailureClass::Retryable(RetryableReason::RateLimited);
165 }
166 if looks_like_temporary_unavailable(&text) {
167 return RunnerFailureClass::Retryable(RetryableReason::TemporaryUnavailable);
168 }
169 if looks_like_auth_required(runner, &text) {
170 return RunnerFailureClass::RequiresUserInput(UserInputReason::Auth);
171 }
172
173 RunnerFailureClass::NonRetryable(NonRetryableReason::FatalExit)
174}
175
176impl RunnerError {
177 pub(crate) fn classify(&self, runner: &Runner) -> RunnerFailureClass {
181 match self {
182 RunnerError::BinaryMissing { .. } => {
183 RunnerFailureClass::RequiresUserInput(UserInputReason::MissingBinary)
184 }
185 RunnerError::SpawnFailed { .. } => {
186 RunnerFailureClass::NonRetryable(NonRetryableReason::InvalidInvocation)
188 }
189 RunnerError::Interrupted => {
190 RunnerFailureClass::NonRetryable(NonRetryableReason::FatalExit)
191 }
192 RunnerError::Timeout => {
193 RunnerFailureClass::Retryable(RetryableReason::TemporaryUnavailable)
195 }
196 RunnerError::Io(e) => {
197 use std::io::ErrorKind;
198 match e.kind() {
199 ErrorKind::TimedOut
200 | ErrorKind::ConnectionReset
201 | ErrorKind::ConnectionAborted
202 | ErrorKind::ConnectionRefused
203 | ErrorKind::NotConnected
204 | ErrorKind::UnexpectedEof
205 | ErrorKind::WouldBlock => {
206 RunnerFailureClass::Retryable(RetryableReason::TransientIo)
207 }
208 _ => RunnerFailureClass::NonRetryable(NonRetryableReason::FatalExit),
209 }
210 }
211 RunnerError::NonZeroExit {
212 code,
213 stdout,
214 stderr,
215 ..
216 } => classify_textual_failure(runner, *code, &stdout.to_string(), &stderr.to_string()),
217 RunnerError::TerminatedBySignal { .. } => {
218 RunnerFailureClass::NonRetryable(NonRetryableReason::FatalExit)
220 }
221 RunnerError::Other(err) => {
222 let msg = format!("{:#}", err).to_lowercase();
223 if looks_like_rate_limit(&msg) {
224 RunnerFailureClass::Retryable(RetryableReason::RateLimited)
225 } else if looks_like_temporary_unavailable(&msg) {
226 RunnerFailureClass::Retryable(RetryableReason::TemporaryUnavailable)
227 } else if looks_like_auth_required(runner, &msg) {
228 RunnerFailureClass::RequiresUserInput(UserInputReason::Auth)
229 } else {
230 RunnerFailureClass::NonRetryable(NonRetryableReason::FatalExit)
231 }
232 }
233 }
234 }
235}
236
237pub(crate) fn runner_execution_error(runner: &Runner, bin: &str, step: &str) -> RunnerError {
238 RunnerError::Other(anyhow!(
239 "Runner execution failed (runner={}, bin={}): {}.",
240 runner_label(runner),
241 bin,
242 step
243 ))
244}
245
246pub(crate) fn runner_execution_error_with_source(
247 runner: &Runner,
248 bin: &str,
249 step: &str,
250 source: impl fmt::Display,
251) -> RunnerError {
252 RunnerError::Other(anyhow!(
253 "Runner execution failed (runner={}, bin={}): {}: {}.",
254 runner_label(runner),
255 bin,
256 step,
257 source
258 ))
259}
260
261#[cfg(test)]
262mod tests {
263 use super::*;
264
265 #[test]
266 fn runner_error_nonzero_exit_redacts_output() {
267 let err = RunnerError::NonZeroExit {
268 code: 1,
269 stdout: "out: API_KEY=secret123".into(),
270 stderr: "err: bearer abc123def456".into(),
271 session_id: None,
272 };
273 let msg = format!("{err}");
274 assert!(msg.contains("API_KEY=[REDACTED]"));
275 assert!(msg.contains("bearer [REDACTED]"));
276 assert!(!msg.contains("secret123"));
277 assert!(!msg.contains("abc123def456"));
278 }
279
280 #[test]
281 fn runner_execution_error_includes_context() {
282 let err = runner_execution_error(&Runner::Gemini, "gemini", "capture child stdout");
283 let msg = format!("{err}");
284 assert!(msg.contains("runner=gemini"));
285 assert!(msg.contains("bin=gemini"));
286 assert!(msg.contains("capture child stdout"));
287 }
288
289 #[test]
291 fn looks_like_rate_limit_detects_429() {
292 assert!(looks_like_rate_limit("Error 429"));
293 assert!(looks_like_rate_limit("HTTP 429"));
294 assert!(!looks_like_rate_limit("Error 500"));
295 }
296
297 #[test]
298 fn looks_like_rate_limit_detects_variations() {
299 assert!(looks_like_rate_limit("rate limit exceeded"));
300 assert!(looks_like_rate_limit("Rate Limit Exceeded"));
301 assert!(looks_like_rate_limit("too many requests"));
302 assert!(looks_like_rate_limit("Too Many Requests"));
303 assert!(looks_like_rate_limit("quota exceeded"));
304 assert!(looks_like_rate_limit("API throttled"));
305 }
306
307 #[test]
308 fn looks_like_rate_limit_negative_cases() {
309 assert!(!looks_like_rate_limit("success"));
310 assert!(!looks_like_rate_limit("internal server error"));
311 assert!(!looks_like_rate_limit(""));
312 }
313
314 #[test]
316 fn looks_like_temporary_unavailable_detects_503() {
317 assert!(looks_like_temporary_unavailable("Error 503"));
318 assert!(looks_like_temporary_unavailable("HTTP 503"));
319 }
320
321 #[test]
322 fn looks_like_temporary_unavailable_detects_gateway_errors() {
323 assert!(looks_like_temporary_unavailable("502 Bad Gateway"));
324 assert!(looks_like_temporary_unavailable("504 Gateway Timeout"));
325 }
326
327 #[test]
328 fn looks_like_temporary_unavailable_detects_variations() {
329 assert!(looks_like_temporary_unavailable("service unavailable"));
330 assert!(looks_like_temporary_unavailable("Service Unavailable"));
331 assert!(looks_like_temporary_unavailable("temporarily unavailable"));
332 assert!(looks_like_temporary_unavailable("gateway timeout"));
333 }
334
335 #[test]
336 fn looks_like_temporary_unavailable_negative_cases() {
337 assert!(!looks_like_temporary_unavailable("success"));
338 assert!(!looks_like_temporary_unavailable("Error 404"));
339 assert!(!looks_like_temporary_unavailable(""));
340 }
341
342 #[test]
344 fn looks_like_auth_required_detects_401() {
345 let runner = Runner::Gemini;
346 assert!(looks_like_auth_required(&runner, "Error 401"));
347 assert!(looks_like_auth_required(&runner, "HTTP 401"));
348 }
349
350 #[test]
351 fn looks_like_auth_required_detects_variations() {
352 let runner = Runner::Gemini;
353 assert!(looks_like_auth_required(&runner, "unauthorized"));
354 assert!(looks_like_auth_required(&runner, "Unauthorized"));
355 assert!(looks_like_auth_required(&runner, "invalid api key"));
356 assert!(looks_like_auth_required(&runner, "not logged in"));
357 assert!(looks_like_auth_required(&runner, "authentication failed"));
358 assert!(looks_like_auth_required(&runner, "access denied"));
359 }
360
361 #[test]
362 fn looks_like_auth_required_negative_cases() {
363 let runner = Runner::Gemini;
364 assert!(!looks_like_auth_required(&runner, "success"));
365 assert!(!looks_like_auth_required(&runner, "Error 500"));
366 assert!(!looks_like_auth_required(&runner, ""));
367 }
368
369 #[test]
371 fn classify_returns_retryable_for_rate_limit() {
372 let err = RunnerError::NonZeroExit {
373 code: 1,
374 stdout: "rate limit exceeded".into(),
375 stderr: "".into(),
376 session_id: None,
377 };
378 let runner = Runner::Gemini;
379 match err.classify(&runner) {
380 RunnerFailureClass::Retryable(RetryableReason::RateLimited) => {}
381 other => panic!("Expected RateLimited, got {:?}", other),
382 }
383 }
384
385 #[test]
386 fn classify_returns_retryable_for_503() {
387 let err = RunnerError::NonZeroExit {
388 code: 1,
389 stdout: "".into(),
390 stderr: "HTTP 503 Service Unavailable".into(),
391 session_id: None,
392 };
393 let runner = Runner::Gemini;
394 match err.classify(&runner) {
395 RunnerFailureClass::Retryable(RetryableReason::TemporaryUnavailable) => {}
396 other => panic!("Expected TemporaryUnavailable, got {:?}", other),
397 }
398 }
399
400 #[test]
401 fn classify_returns_requires_user_input_for_auth() {
402 let err = RunnerError::NonZeroExit {
403 code: 1,
404 stdout: "401 Unauthorized".into(),
405 stderr: "".into(),
406 session_id: None,
407 };
408 let runner = Runner::Gemini;
409 match err.classify(&runner) {
410 RunnerFailureClass::RequiresUserInput(UserInputReason::Auth) => {}
411 other => panic!("Expected Auth, got {:?}", other),
412 }
413 }
414
415 #[test]
416 fn classify_returns_non_retryable_for_fatal_exit() {
417 let err = RunnerError::NonZeroExit {
418 code: 1,
419 stdout: "some random error".into(),
420 stderr: "no matching pattern".into(),
421 session_id: None,
422 };
423 let runner = Runner::Gemini;
424 match err.classify(&runner) {
425 RunnerFailureClass::NonRetryable(NonRetryableReason::FatalExit) => {}
426 other => panic!("Expected FatalExit, got {:?}", other),
427 }
428 }
429
430 #[test]
432 fn classify_binary_missing_requires_user_input() {
433 let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "not found");
434 let err = RunnerError::BinaryMissing {
435 bin: "test".to_string(),
436 source: io_err,
437 };
438 let runner = Runner::Gemini;
439 match err.classify(&runner) {
440 RunnerFailureClass::RequiresUserInput(UserInputReason::MissingBinary) => {}
441 other => panic!("Expected MissingBinary, got {:?}", other),
442 }
443 }
444
445 #[test]
446 fn classify_timeout_is_retryable() {
447 let err = RunnerError::Timeout;
448 let runner = Runner::Gemini;
449 match err.classify(&runner) {
450 RunnerFailureClass::Retryable(RetryableReason::TemporaryUnavailable) => {}
451 other => panic!("Expected TemporaryUnavailable, got {:?}", other),
452 }
453 }
454
455 #[test]
456 fn classify_interrupted_is_non_retryable() {
457 let err = RunnerError::Interrupted;
458 let runner = Runner::Gemini;
459 match err.classify(&runner) {
460 RunnerFailureClass::NonRetryable(NonRetryableReason::FatalExit) => {}
461 other => panic!("Expected FatalExit, got {:?}", other),
462 }
463 }
464
465 #[test]
466 fn classify_io_transient_errors_are_retryable() {
467 use std::io::ErrorKind;
468
469 let transient_kinds = [
470 ErrorKind::TimedOut,
471 ErrorKind::ConnectionReset,
472 ErrorKind::ConnectionAborted,
473 ErrorKind::ConnectionRefused,
474 ErrorKind::NotConnected,
475 ErrorKind::UnexpectedEof,
476 ErrorKind::WouldBlock,
477 ];
478
479 for kind in &transient_kinds {
480 let io_err = std::io::Error::new(*kind, "transient error");
481 let err = RunnerError::Io(io_err);
482 let runner = Runner::Gemini;
483 match err.classify(&runner) {
484 RunnerFailureClass::Retryable(RetryableReason::TransientIo) => {}
485 other => panic!("Expected TransientIo for {:?}, got {:?}", kind, other),
486 }
487 }
488 }
489
490 #[test]
491 fn classify_io_other_errors_are_non_retryable() {
492 let io_err = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "permission denied");
493 let err = RunnerError::Io(io_err);
494 let runner = Runner::Gemini;
495 match err.classify(&runner) {
496 RunnerFailureClass::NonRetryable(NonRetryableReason::FatalExit) => {}
497 other => panic!("Expected FatalExit, got {:?}", other),
498 }
499 }
500
501 #[test]
502 fn classify_other_error_with_rate_limit_pattern() {
503 let err = RunnerError::Other(anyhow!("429 rate limit exceeded"));
504 let runner = Runner::Gemini;
505 match err.classify(&runner) {
506 RunnerFailureClass::Retryable(RetryableReason::RateLimited) => {}
507 other => panic!("Expected RateLimited, got {:?}", other),
508 }
509 }
510
511 #[test]
512 fn classify_other_error_with_auth_pattern() {
513 let err = RunnerError::Other(anyhow!("401 invalid api key"));
514 let runner = Runner::Gemini;
515 match err.classify(&runner) {
516 RunnerFailureClass::RequiresUserInput(UserInputReason::Auth) => {}
517 other => panic!("Expected Auth, got {:?}", other),
518 }
519 }
520
521 #[test]
522 fn classify_other_error_without_pattern_is_non_retryable() {
523 let err = RunnerError::Other(anyhow!("some generic error"));
524 let runner = Runner::Gemini;
525 match err.classify(&runner) {
526 RunnerFailureClass::NonRetryable(NonRetryableReason::FatalExit) => {}
527 other => panic!("Expected FatalExit, got {:?}", other),
528 }
529 }
530}