Skip to main content

defect_agent/llm/
error.rs

1//! Provider error types.
2
3use std::time::Duration;
4
5use thiserror::Error;
6
7use crate::error::BoxError;
8
9/// A unified error for provider operation failures.
10///
11/// The top-level struct separates cross-cutting diagnostic information (e.g.
12/// `request_id`)
13/// from classification information (`kind`) to avoid duplicating `request_id` in every
14/// variant.
15#[derive(Debug, Error)]
16#[error("{kind}")]
17pub struct ProviderError {
18    pub kind: ProviderErrorKind,
19    /// The request ID returned by the server (e.g. Anthropic `request-id` header / OpenAI
20    /// `x-request-id`). This is the primary signal for debugging; populate it whenever
21    /// possible.
22    pub request_id: Option<String>,
23}
24
25impl ProviderError {
26    pub fn new(kind: ProviderErrorKind) -> Self {
27        Self {
28            kind,
29            request_id: None,
30        }
31    }
32
33    pub fn with_request_id(mut self, request_id: impl Into<String>) -> Self {
34        self.request_id = Some(request_id.into());
35        self
36    }
37
38    /// Returns a retry hint for this error.
39    pub fn retry_hint(&self) -> RetryHint {
40        use ProviderErrorKind::*;
41        match &self.kind {
42            AuthMissing { .. }
43            | AuthMalformed { .. }
44            | AuthRejected { .. }
45            | ModelNotFound { .. }
46            | BadRequest { .. }
47            | InvalidToolSchema { .. }
48            | InputBlocked { .. }
49            | OutputBlocked { .. }
50            | ProtocolViolation { .. }
51            | MaxTokensInvalid { .. }
52            | QuotaExceeded { .. }
53            | Canceled
54            | Other(_) => RetryHint::No,
55
56            AuthExpired => RetryHint::AfterAction(RetryAction::RefreshAuth),
57            ContextOverflow { .. } => RetryHint::AfterAction(RetryAction::ReduceContext),
58
59            RateLimit {
60                retry_after: Some(d),
61                ..
62            } => RetryHint::After(*d),
63            RateLimit {
64                retry_after: None, ..
65            } => RetryHint::Backoff,
66
67            ServerError { .. }
68            | ServerStreamAborted { .. }
69            | Malformed(_)
70            | Transport(_)
71            | Timeout { .. } => RetryHint::Backoff,
72        }
73    }
74
75    /// Convenience check: whether the agent should automatically retry.
76    pub fn is_retryable(&self) -> bool {
77        !matches!(self.retry_hint(), RetryHint::No)
78    }
79}
80
81/// Semantic classification of provider errors.
82///
83/// Fallback principle: if a category of errors repeatedly falls into
84/// [`ProviderErrorKind::Other`],
85/// prefer to **extract it as a new variant** rather than letting `Other` become the
86/// default.
87#[non_exhaustive]
88#[derive(Debug, Error)]
89pub enum ProviderErrorKind {
90    // ---------- Authentication ----------
91    /// Missing credential.
92    #[error("missing credential{}", var_hint.as_deref().map(|h| format!(" (hint: {h})")).unwrap_or_default())]
93    AuthMissing { var_hint: Option<String> },
94
95    /// Malformed credential.
96    #[error("malformed credential{}", hint.as_deref().map(|h| format!(": {h}")).unwrap_or_default())]
97    AuthMalformed { hint: Option<String> },
98
99    /// Credential rejected by the server (401).
100    #[error("credential rejected by server{}", hint.as_deref().map(|h| format!(": {h}")).unwrap_or_default())]
101    AuthRejected { hint: Option<String> },
102
103    /// OAuth/STS token expired.
104    #[error("auth token expired")]
105    AuthExpired,
106
107    // Quota
108    /// Request-level rate limiting.
109    #[error("rate limit hit ({scope:?}){}", retry_after.map(|d| format!(", retry after {}s", d.as_secs())).unwrap_or_default())]
110    RateLimit {
111        retry_after: Option<Duration>,
112        scope: RateLimitScope,
113    },
114
115    /// Quota exhausted / monthly allowance depleted.
116    #[error("quota exceeded{}", hint.as_deref().map(|h| format!(": {h}")).unwrap_or_default())]
117    QuotaExceeded { hint: Option<String> },
118
119    // ---------- Input ----------
120    /// Context window overflow.
121    #[error("context overflow{}", match (used, limit) {
122        (Some(u), Some(l)) => format!(" ({u} > {l})"),
123        _ => String::new(),
124    })]
125    ContextOverflow {
126        used: Option<u64>,
127        limit: Option<u64>,
128    },
129
130    /// The requested `max_tokens` exceeds the model's limit or was rejected by the
131    /// server.
132    #[error("max_tokens invalid{}", match (requested, limit) {
133        (Some(r), Some(l)) => format!(" ({r} > {l})"),
134        _ => String::new(),
135    })]
136    MaxTokensInvalid {
137        requested: Option<u64>,
138        limit: Option<u64>,
139    },
140
141    /// Model ID does not exist or is unavailable.
142    #[error("model not found: {model}")]
143    ModelNotFound { model: String },
144
145    /// Request body rejected by the wire service validation (schema error, conflicting
146    /// mutually exclusive fields).
147    #[error("bad request{}", hint.as_deref().map(|h| format!(": {h}")).unwrap_or_default())]
148    BadRequest { hint: Option<String> },
149
150    /// The tool schema referenced in the request was rejected by the server.
151    #[error("invalid tool schema for {tool}{}", hint.as_deref().map(|h| format!(": {h}")).unwrap_or_default())]
152    InvalidToolSchema { tool: String, hint: Option<String> },
153
154    // ---------- Security / Compliance ----------
155    /// Input triggered a safety filter.
156    #[error("input blocked{}", policy.as_deref().map(|p| format!(" by {p}")).unwrap_or_default())]
157    InputBlocked { policy: Option<String> },
158
159    /// Model output blocked by safety filter.
160    #[error("output blocked{}", policy.as_deref().map(|p| format!(" by {p}")).unwrap_or_default())]
161    OutputBlocked { policy: Option<String> },
162
163    // ---------- Protocol / Server Faults ----------
164    /// A 5xx or server-reported internal error.
165    #[error("server error{}{}",
166        status.map(|s| format!(" ({s})")).unwrap_or_default(),
167        hint.as_deref().map(|h| format!(": {h}")).unwrap_or_default())]
168    ServerError {
169        status: Option<u16>,
170        hint: Option<String>,
171    },
172
173    /// The server aborted the stream during generation.
174    #[error("server aborted stream{}", hint.as_deref().map(|h| format!(": {h}")).unwrap_or_default())]
175    ServerStreamAborted { hint: Option<String> },
176
177    /// Failed to parse wire JSON / SSE.
178    #[error("malformed wire response: {0}")]
179    Malformed(#[source] BoxError),
180
181    /// The server responded with a wire type or field not defined in the protocol
182    /// specification.
183    #[error("protocol violation: {hint}")]
184    ProtocolViolation { hint: String },
185
186    // ---------- transport ----------
187    /// Transport-layer error (DNS, TCP, TLS, HTTP).
188    #[error("transport error: {0}")]
189    Transport(#[source] BoxError),
190
191    /// Request timed out.
192    #[error("request timeout at {phase:?}")]
193    Timeout { phase: TimeoutPhase },
194
195    // ---------- control flow ----------
196    /// Canceled by the user or upper layer.
197    #[error("canceled")]
198    Canceled,
199
200    // ---------- Catch-all ----------
201    /// Catch-all variant; prefer to extract cases from here when adding new categories.
202    #[error("other provider error: {0}")]
203    Other(#[source] BoxError),
204}
205
206#[derive(Debug, Clone, Copy, PartialEq, Eq)]
207pub enum RateLimitScope {
208    /// Requests per minute.
209    Rpm,
210    /// Requests per minute.
211    Tpm,
212    /// Reported by the server but not further subdivided.
213    Unspecified,
214}
215
216#[derive(Debug, Clone, Copy, PartialEq, Eq)]
217pub enum TimeoutPhase {
218    Connect,
219    ReadHeaders,
220    ReadBody,
221    Idle,
222    Total,
223}
224
225/// Retry hints for errors.
226#[derive(Debug, Clone, Copy, PartialEq, Eq)]
227pub enum RetryHint {
228    /// Not retryable.
229    No,
230    /// Retry immediately once.
231    Immediate,
232    /// Retry after the server-suggested duration.
233    After(Duration),
234    /// Retry with backoff (no server suggestion).
235    Backoff,
236    /// Retry after performing a prerequisite action.
237    AfterAction(RetryAction),
238}
239
240#[derive(Debug, Clone, Copy, PartialEq, Eq)]
241pub enum RetryAction {
242    RefreshAuth,
243    SwitchModel,
244    ReduceContext,
245}