Skip to main content

tt_shared/
error.rs

1//! Error types returned by provider adapters. The core layer maps these to
2//! HTTP status codes and decides retry strategy — adapters do not retry.
3
4use thiserror::Error;
5
6#[derive(Debug, Error)]
7pub enum ProviderError {
8    #[error("unauthorized: {0}")]
9    Unauthorized(String),
10
11    #[error("rate limited (retry after {retry_after_ms} ms)")]
12    RateLimited { retry_after_ms: u64 },
13
14    #[error("model not found: {model}")]
15    ModelNotFound { model: String },
16
17    #[error("invalid request: {0}")]
18    InvalidRequest(String),
19
20    #[error("upstream provider error (status {status}): {message}")]
21    ProviderUpstream { status: u16, message: String },
22
23    #[error("timeout after {ms} ms")]
24    Timeout { ms: u64 },
25
26    #[error("network error: {0}")]
27    Network(#[from] reqwest::Error),
28
29    #[error("deserialize error: {0}")]
30    Deserialize(String),
31
32    #[error("unsupported feature: {0}")]
33    Unsupported(String),
34
35    #[error("internal error: {0}")]
36    Internal(String),
37}
38
39impl ProviderError {
40    /// True if the error is retriable. The core layer applies backoff + jitter.
41    pub fn is_retriable(&self) -> bool {
42        match self {
43            ProviderError::RateLimited { .. } => true,
44            ProviderError::Timeout { .. } => true,
45            ProviderError::Network(_) => true,
46            ProviderError::ProviderUpstream { status, .. } => *status >= 500,
47            _ => false,
48        }
49    }
50
51    /// True if the error means we should try a fallback provider.
52    ///
53    /// Transient/load and server-side conditions are fallback-eligible:
54    /// upstream *server* errors (5xx), timeouts, model-not-found, and rate
55    /// limiting (429). A 429 is also [`Self::is_retriable`], so the dispatch
56    /// loop first exhausts the same-provider retry budget (honoring
57    /// `retry_after_ms`); only then does failover advance to a candidate that
58    /// may have spare quota — a common real-world recovery during a primary
59    /// provider's capacity crunch.
60    ///
61    /// A *deterministic* client error (400 invalid request, 403 forbidden, 422
62    /// unprocessable) is NOT eligible: it would fail identically on every
63    /// provider, so failing over just burns extra upstream calls + spend.
64    pub fn is_fallback_eligible(&self) -> bool {
65        match self {
66            ProviderError::ModelNotFound { .. } => true,
67            ProviderError::Timeout { .. } => true,
68            ProviderError::RateLimited { .. } => true,
69            ProviderError::ProviderUpstream { status, .. } => *status >= 500,
70            _ => false,
71        }
72    }
73}
74
75#[cfg(test)]
76mod tests {
77    use super::*;
78
79    #[test]
80    fn upstream_5xx_is_fallback_eligible() {
81        assert!(ProviderError::ProviderUpstream {
82            status: 500,
83            message: "boom".into()
84        }
85        .is_fallback_eligible());
86        assert!(ProviderError::ProviderUpstream {
87            status: 503,
88            message: "unavailable".into()
89        }
90        .is_fallback_eligible());
91    }
92
93    #[test]
94    fn upstream_4xx_is_not_fallback_eligible() {
95        for status in [400u16, 403, 404, 422] {
96            assert!(
97                !ProviderError::ProviderUpstream {
98                    status,
99                    message: "client error".into()
100                }
101                .is_fallback_eligible(),
102                "status {status} must not fail over"
103            );
104        }
105    }
106
107    #[test]
108    fn model_not_found_and_timeout_still_fallback_eligible() {
109        assert!(ProviderError::ModelNotFound { model: "x".into() }.is_fallback_eligible());
110        assert!(ProviderError::Timeout { ms: 1000 }.is_fallback_eligible());
111    }
112
113    #[test]
114    fn rate_limited_is_fallback_eligible() {
115        // A sustained 429 on the primary should fail over to a provider with
116        // spare quota (after same-provider retries exhaust). 429 is also
117        // retriable, so the dispatch loop retries it in place first.
118        let e = ProviderError::RateLimited { retry_after_ms: 0 };
119        assert!(e.is_fallback_eligible());
120        assert!(e.is_retriable());
121    }
122
123    #[test]
124    fn invalid_request_and_unauthorized_not_fallback_eligible() {
125        assert!(!ProviderError::InvalidRequest("bad".into()).is_fallback_eligible());
126        assert!(!ProviderError::Unauthorized("nope".into()).is_fallback_eligible());
127    }
128}