Skip to main content

defect_core/llm/
error.rs

1//! Provider error types.
2
3use std::time::Duration;
4
5use thiserror::Error;
6
7use crate::error::BoxError;
8
9/// A unified error for provider operation failures.
10///
11/// The top-level struct separates cross-cutting diagnostic information (e.g.
12/// `request_id`)
13/// from classification information (`kind`) to avoid duplicating `request_id` in every
14/// variant.
15#[derive(Debug, Error)]
16#[error("{kind}")]
17pub struct ProviderError {
18    pub kind: ProviderErrorKind,
19    /// The request ID returned by the server (e.g. Anthropic `request-id` header / OpenAI
20    /// `x-request-id`). This is the primary signal for debugging; populate it whenever
21    /// possible.
22    pub request_id: Option<String>,
23}
24
25impl ProviderError {
26    pub fn new(kind: ProviderErrorKind) -> Self {
27        Self {
28            kind,
29            request_id: None,
30        }
31    }
32
33    pub fn with_request_id(mut self, request_id: impl Into<String>) -> Self {
34        self.request_id = Some(request_id.into());
35        self
36    }
37
38    /// Returns a retry hint for this error.
39    pub fn retry_hint(&self) -> RetryHint {
40        use ProviderErrorKind::*;
41        match &self.kind {
42            AuthMissing { .. }
43            | AuthMalformed { .. }
44            | AuthRejected { .. }
45            | ModelNotFound { .. }
46            | BadRequest { .. }
47            | InvalidToolSchema { .. }
48            | InputBlocked { .. }
49            | OutputBlocked { .. }
50            | ProtocolViolation { .. }
51            | MaxTokensInvalid { .. }
52            | QuotaExceeded { .. }
53            | Canceled
54            | Other(_) => RetryHint::No,
55
56            AuthExpired => RetryHint::AfterAction(RetryAction::RefreshAuth),
57            ContextOverflow { .. } => RetryHint::AfterAction(RetryAction::ReduceContext),
58
59            RateLimit {
60                retry_after: Some(d),
61                ..
62            } => RetryHint::After(*d),
63            RateLimit {
64                retry_after: None, ..
65            } => RetryHint::Backoff,
66
67            ServerError { .. }
68            | ServerStreamAborted { .. }
69            | Malformed(_)
70            | Transport(_)
71            | Timeout { .. } => RetryHint::Backoff,
72        }
73    }
74
75    /// Convenience check: whether the agent should automatically retry.
76    pub fn is_retryable(&self) -> bool {
77        !matches!(self.retry_hint(), RetryHint::No)
78    }
79}
80
81/// Semantic classification of provider errors.
82///
83/// Fallback principle: if a category of errors repeatedly falls into
84/// [`ProviderErrorKind::Other`],
85/// prefer to **extract it as a new variant** rather than letting `Other` become the
86/// default.
87#[derive(Debug, Error)]
88pub enum ProviderErrorKind {
89    // ---------- Authentication ----------
90    /// Missing credential.
91    #[error("missing credential{}", var_hint.as_deref().map(|h| format!(" (hint: {h})")).unwrap_or_default())]
92    AuthMissing { var_hint: Option<String> },
93
94    /// Malformed credential.
95    #[error("malformed credential{}", hint.as_deref().map(|h| format!(": {h}")).unwrap_or_default())]
96    AuthMalformed { hint: Option<String> },
97
98    /// Credential rejected by the server (401).
99    #[error("credential rejected by server{}", hint.as_deref().map(|h| format!(": {h}")).unwrap_or_default())]
100    AuthRejected { hint: Option<String> },
101
102    /// OAuth/STS token expired.
103    #[error("auth token expired")]
104    AuthExpired,
105
106    // Quota
107    /// Request-level rate limiting.
108    #[error("rate limit hit ({scope:?}){}", retry_after.map(|d| format!(", retry after {}s", d.as_secs())).unwrap_or_default())]
109    RateLimit {
110        retry_after: Option<Duration>,
111        scope: RateLimitScope,
112    },
113
114    /// Quota exhausted / monthly allowance depleted.
115    #[error("quota exceeded{}", hint.as_deref().map(|h| format!(": {h}")).unwrap_or_default())]
116    QuotaExceeded { hint: Option<String> },
117
118    // ---------- Input ----------
119    /// Context window overflow.
120    #[error("context overflow{}", match (used, limit) {
121        (Some(u), Some(l)) => format!(" ({u} > {l})"),
122        _ => String::new(),
123    })]
124    ContextOverflow {
125        used: Option<u64>,
126        limit: Option<u64>,
127    },
128
129    /// The requested `max_tokens` exceeds the model's limit or was rejected by the
130    /// server.
131    #[error("max_tokens invalid{}", match (requested, limit) {
132        (Some(r), Some(l)) => format!(" ({r} > {l})"),
133        _ => String::new(),
134    })]
135    MaxTokensInvalid {
136        requested: Option<u64>,
137        limit: Option<u64>,
138    },
139
140    /// Model ID does not exist or is unavailable.
141    #[error("model not found: {model}")]
142    ModelNotFound { model: String },
143
144    /// Request body rejected by the wire service validation (schema error, conflicting
145    /// mutually exclusive fields).
146    #[error("bad request{}", hint.as_deref().map(|h| format!(": {h}")).unwrap_or_default())]
147    BadRequest { hint: Option<String> },
148
149    /// The tool schema referenced in the request was rejected by the server.
150    #[error("invalid tool schema for {tool}{}", hint.as_deref().map(|h| format!(": {h}")).unwrap_or_default())]
151    InvalidToolSchema { tool: String, hint: Option<String> },
152
153    // ---------- Security / Compliance ----------
154    /// Input triggered a safety filter.
155    #[error("input blocked{}", policy.as_deref().map(|p| format!(" by {p}")).unwrap_or_default())]
156    InputBlocked { policy: Option<String> },
157
158    /// Model output blocked by safety filter.
159    #[error("output blocked{}", policy.as_deref().map(|p| format!(" by {p}")).unwrap_or_default())]
160    OutputBlocked { policy: Option<String> },
161
162    // ---------- Protocol / Server Faults ----------
163    /// A 5xx or server-reported internal error.
164    #[error("server error{}{}",
165        status.map(|s| format!(" ({s})")).unwrap_or_default(),
166        hint.as_deref().map(|h| format!(": {h}")).unwrap_or_default())]
167    ServerError {
168        status: Option<u16>,
169        hint: Option<String>,
170    },
171
172    /// The server aborted the stream during generation.
173    #[error("server aborted stream{}", hint.as_deref().map(|h| format!(": {h}")).unwrap_or_default())]
174    ServerStreamAborted { hint: Option<String> },
175
176    /// Failed to parse wire JSON / SSE.
177    #[error("malformed wire response: {0}")]
178    Malformed(#[source] BoxError),
179
180    /// The server responded with a wire type or field not defined in the protocol
181    /// specification.
182    #[error("protocol violation: {hint}")]
183    ProtocolViolation { hint: String },
184
185    // ---------- transport ----------
186    /// Transport-layer error (DNS, TCP, TLS, HTTP).
187    #[error("transport error: {0}")]
188    Transport(#[source] BoxError),
189
190    /// Request timed out.
191    #[error("request timeout at {phase:?}")]
192    Timeout { phase: TimeoutPhase },
193
194    // ---------- control flow ----------
195    /// Canceled by the user or upper layer.
196    #[error("canceled")]
197    Canceled,
198
199    // ---------- Catch-all ----------
200    /// Catch-all variant; prefer to extract cases from here when adding new categories.
201    #[error("other provider error: {0}")]
202    Other(#[source] BoxError),
203}
204
205#[derive(Debug, Clone, Copy, PartialEq, Eq)]
206pub enum RateLimitScope {
207    /// Requests per minute.
208    Rpm,
209    /// Requests per minute.
210    Tpm,
211    /// Reported by the server but not further subdivided.
212    Unspecified,
213}
214
215#[derive(Debug, Clone, Copy, PartialEq, Eq)]
216pub enum TimeoutPhase {
217    Connect,
218    ReadHeaders,
219    ReadBody,
220    Idle,
221    Total,
222}
223
224/// Retry hints for errors.
225#[derive(Debug, Clone, Copy, PartialEq, Eq)]
226pub enum RetryHint {
227    /// Not retryable.
228    No,
229    /// Retry immediately once.
230    Immediate,
231    /// Retry after the server-suggested duration.
232    After(Duration),
233    /// Retry with backoff (no server suggestion).
234    Backoff,
235    /// Retry after performing a prerequisite action.
236    AfterAction(RetryAction),
237}
238
239#[derive(Debug, Clone, Copy, PartialEq, Eq)]
240pub enum RetryAction {
241    RefreshAuth,
242    SwitchModel,
243    ReduceContext,
244}