Skip to main content

zeph_llm/
error.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Error type for all LLM provider operations.
5
6/// Errors that can occur in any [`crate::provider::LlmProvider`] operation.
7///
8/// Use the predicate methods ([`is_rate_limited`](Self::is_rate_limited),
9/// [`is_context_length_error`](Self::is_context_length_error),
10/// [`is_invalid_input`](Self::is_invalid_input),
11/// [`is_beta_header_rejected`](Self::is_beta_header_rejected)) to classify errors
12/// before deciding whether to retry, fall back, or propagate.
13#[non_exhaustive]
14#[derive(Debug, thiserror::Error)]
15pub enum LlmError {
16    /// Underlying HTTP transport error (connection refused, TLS failure, etc.).
17    #[error("HTTP request failed: {0}")]
18    Http(#[from] reqwest::Error),
19
20    /// The API returned a response that could not be decoded as valid JSON.
21    #[error("JSON parse failed: {0}")]
22    Json(#[from] serde_json::Error),
23
24    /// An I/O error occurred (e.g. reading or writing a cache file).
25    #[error("I/O error: {0}")]
26    Io(#[from] std::io::Error),
27
28    /// The provider returned HTTP 429 (too many requests). Callers should back off and retry.
29    #[error("rate limited")]
30    RateLimited,
31
32    /// The provider is temporarily unavailable (HTTP 5xx or connection error).
33    #[error("provider unavailable")]
34    Unavailable,
35
36    /// The provider returned a successful HTTP status but no content in the response body.
37    #[error("empty response from {provider}")]
38    EmptyResponse { provider: String },
39
40    /// A Server-Sent Events frame could not be parsed.
41    #[error("SSE parse error: {0}")]
42    SseParse(String),
43
44    /// [`crate::provider::LlmProvider::embed`] was called on a provider that does not
45    /// support embedding generation.
46    #[error("embedding not supported by {provider}")]
47    EmbedUnsupported { provider: String },
48
49    /// `Candle` model weights or tokenizer could not be loaded from disk or `HuggingFace` Hub.
50    #[error("model loading failed: {0}")]
51    ModelLoad(String),
52
53    /// The `Candle` inference worker returned an error or timed out.
54    #[error("inference failed: {0}")]
55    Inference(String),
56
57    /// The [`crate::router::RouterProvider`] has no providers configured.
58    #[error("no route configured")]
59    NoRoute,
60
61    /// All providers in a router have been exhausted without a successful response.
62    #[error("no providers available")]
63    NoProviders,
64
65    /// A Candle tensor operation failed.
66    #[cfg(feature = "candle")]
67    #[error("candle error: {0}")]
68    Candle(#[from] candle_core::Error),
69
70    /// [`crate::provider::LlmProvider::chat_typed`] could not parse the model's response
71    /// as the requested type, even after a retry.
72    #[error("structured output parse failed: {0}")]
73    StructuredParse(String),
74
75    /// The speech-to-text backend rejected the audio or returned an error.
76    #[error("transcription failed: {0}")]
77    TranscriptionFailed(String),
78
79    /// The prompt exceeds the model's maximum context window. Do not retry with the same input
80    /// on another provider — the same input will fail there too. Summarize or truncate first.
81    #[error("context length exceeded")]
82    ContextLengthExceeded,
83
84    /// The request exceeded the configured per-call timeout.
85    #[error("LLM request timed out")]
86    Timeout,
87
88    /// A beta header sent in the request was rejected by the API (e.g. `compact-2026-01-12`
89    /// deprecated or not yet available). The provider has already disabled the feature
90    /// internally; the caller should retry without it.
91    #[error("beta header rejected by API: {header}")]
92    BetaHeaderRejected { header: String },
93
94    /// The input itself is invalid (HTTP 400). Retrying with the same input on another
95    /// provider will not help — the router should break the fallback loop immediately.
96    #[error("invalid input for {provider}: {message}")]
97    InvalidInput { provider: String, message: String },
98
99    /// A provider returned a non-success HTTP status that does not map to any more specific variant.
100    ///
101    /// This covers non-retriable API failures such as authentication errors (401/403),
102    /// server errors (500/503), and unexpected 4xx responses that are not `InvalidInput`,
103    /// `RateLimited`, or `ContextLengthExceeded`. Callers should not retry on this error.
104    #[error("{provider} API request failed (status {status})")]
105    ApiError { provider: String, status: u16 },
106
107    /// Catch-all for provider-specific errors that do not yet have a typed variant.
108    ///
109    /// # Deprecation
110    ///
111    /// Prefer adding a typed variant or propagating a specific source error. This variant
112    /// exists for backward compatibility and will be removed once all callsites are migrated.
113    #[error("{0}")]
114    Other(String),
115}
116
117impl LlmError {
118    /// Returns true if this error indicates the context/prompt is too long for the model.
119    ///
120    /// Providers must return [`LlmError::ContextLengthExceeded`] directly; this predicate
121    /// does not inspect error message strings.
122    #[must_use]
123    pub fn is_context_length_error(&self) -> bool {
124        matches!(self, Self::ContextLengthExceeded)
125    }
126
127    /// Returns true if this error indicates that a beta header was rejected by the API.
128    #[must_use]
129    pub fn is_beta_header_rejected(&self) -> bool {
130        matches!(self, Self::BetaHeaderRejected { .. })
131    }
132
133    /// Returns true if this error indicates that the input itself is invalid (HTTP 400).
134    ///
135    /// Callers (e.g. the router fallback loop) should not retry with a different provider
136    /// when this is true — the same input will fail there too.
137    #[must_use]
138    pub fn is_invalid_input(&self) -> bool {
139        matches!(self, Self::InvalidInput { .. })
140    }
141
142    #[must_use]
143    pub fn is_rate_limited(&self) -> bool {
144        matches!(self, Self::RateLimited)
145    }
146}
147
148/// Check whether a raw API error body text indicates a context-length error.
149///
150/// Used at the provider transport layer to convert HTTP 400 bodies into
151/// [`LlmError::ContextLengthExceeded`] before the error reaches callers.
152pub(crate) fn body_is_context_length_error(body: &str) -> bool {
153    let lower = body.to_lowercase();
154    lower.contains("maximum number of tokens")
155        || lower.contains("context length exceeded")
156        || lower.contains("maximum context length")
157        || lower.contains("context_length_exceeded")
158        || lower.contains("prompt is too long")
159        || lower.contains("input too long")
160}
161
162pub type Result<T> = std::result::Result<T, LlmError>;
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167
168    #[test]
169    fn context_length_exceeded_variant_is_detected() {
170        assert!(LlmError::ContextLengthExceeded.is_context_length_error());
171    }
172
173    #[test]
174    fn other_variant_is_not_context_length_error() {
175        // The `Other` path no longer triggers context-length classification.
176        // Providers must return `ContextLengthExceeded` directly.
177        assert!(
178            !LlmError::Other("maximum number of tokens exceeded".into()).is_context_length_error()
179        );
180        assert!(
181            !LlmError::Other("context length exceeded for model".into()).is_context_length_error()
182        );
183    }
184
185    #[test]
186    fn unrelated_error_is_not_detected() {
187        assert!(!LlmError::Unavailable.is_context_length_error());
188        assert!(!LlmError::RateLimited.is_context_length_error());
189        assert!(!LlmError::Other("some unrelated error".into()).is_context_length_error());
190    }
191
192    #[test]
193    fn context_length_exceeded_display() {
194        assert_eq!(
195            LlmError::ContextLengthExceeded.to_string(),
196            "context length exceeded"
197        );
198    }
199
200    #[test]
201    fn beta_header_rejected_is_detected() {
202        let e = LlmError::BetaHeaderRejected {
203            header: "compact-2026-01-12".into(),
204        };
205        assert!(e.is_beta_header_rejected());
206    }
207
208    #[test]
209    fn other_error_is_not_beta_header_rejected() {
210        assert!(!LlmError::Unavailable.is_beta_header_rejected());
211        assert!(!LlmError::ContextLengthExceeded.is_beta_header_rejected());
212        assert!(!LlmError::Other("400 bad request".into()).is_beta_header_rejected());
213    }
214
215    #[test]
216    fn beta_header_rejected_display() {
217        let e = LlmError::BetaHeaderRejected {
218            header: "compact-2026-01-12".into(),
219        };
220        assert!(e.to_string().contains("compact-2026-01-12"));
221    }
222
223    #[test]
224    fn invalid_input_is_detected() {
225        let e = LlmError::InvalidInput {
226            provider: "openai".into(),
227            message: "maximum sequence length exceeded".into(),
228        };
229        assert!(e.is_invalid_input());
230    }
231
232    #[test]
233    fn other_errors_are_not_invalid_input() {
234        assert!(!LlmError::Unavailable.is_invalid_input());
235        assert!(!LlmError::RateLimited.is_invalid_input());
236        assert!(!LlmError::Other("400 bad request".into()).is_invalid_input());
237    }
238
239    #[test]
240    fn invalid_input_display_includes_provider_and_message() {
241        let e = LlmError::InvalidInput {
242            provider: "openai".into(),
243            message: "input too long".into(),
244        };
245        let s = e.to_string();
246        assert!(s.contains("openai"));
247        assert!(s.contains("input too long"));
248    }
249
250    #[test]
251    fn api_error_display() {
252        let e = LlmError::ApiError {
253            provider: "claude".into(),
254            status: 503,
255        };
256        let s = e.to_string();
257        assert!(s.contains("claude"));
258        assert!(s.contains("503"));
259    }
260
261    #[test]
262    fn body_is_context_length_error_detects_known_messages() {
263        assert!(body_is_context_length_error(
264            "maximum number of tokens exceeded"
265        ));
266        assert!(body_is_context_length_error(
267            "This model's maximum context length is 4096 tokens. context_length_exceeded"
268        ));
269        assert!(body_is_context_length_error(
270            "context length exceeded for model"
271        ));
272        assert!(body_is_context_length_error("prompt is too long"));
273        assert!(body_is_context_length_error(
274            "input too long for this model"
275        ));
276    }
277
278    #[test]
279    fn body_is_context_length_error_ignores_unrelated_messages() {
280        assert!(!body_is_context_length_error("some unrelated error"));
281        assert!(!body_is_context_length_error("rate limit exceeded"));
282        assert!(!body_is_context_length_error("authentication failed"));
283    }
284}