liter_llm/
error.rs

1use std::time::Duration;
2
3use serde::{Deserialize, Serialize};
4
5/// Error response from an OpenAI-compatible API.
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct ErrorResponse {
8    pub error: ApiError,
9}
10
11/// Inner error object.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ApiError {
14    pub message: String,
15    #[serde(rename = "type")]
16    pub error_type: String,
17    #[serde(default)]
18    pub param: Option<String>,
19    #[serde(default)]
20    pub code: Option<String>,
21}
22
23/// All errors that can occur when using `liter-llm`.
24#[derive(Debug, thiserror::Error)]
25#[non_exhaustive]
26pub enum LiterLlmError {
27    #[error("authentication failed: {message}")]
28    Authentication { message: String },
29
30    #[error("rate limited: {message}")]
31    RateLimited {
32        message: String,
33        retry_after: Option<Duration>,
34    },
35
36    #[error("bad request: {message}")]
37    BadRequest { message: String },
38
39    #[error("context window exceeded: {message}")]
40    ContextWindowExceeded { message: String },
41
42    #[error("content policy violation: {message}")]
43    ContentPolicy { message: String },
44
45    #[error("not found: {message}")]
46    NotFound { message: String },
47
48    #[error("server error: {message}")]
49    ServerError { message: String },
50
51    #[error("service unavailable: {message}")]
52    ServiceUnavailable { message: String },
53
54    #[error("request timeout")]
55    Timeout,
56
57    #[cfg(any(feature = "native-http", feature = "wasm-http"))]
58    #[error(transparent)]
59    Network(#[from] reqwest::Error),
60
61    /// A catch-all for errors that occur during streaming response processing.
62    ///
63    /// This variant covers multiple sub-conditions including UTF-8 decoding
64    /// failures, CRC/checksum mismatches (AWS EventStream), JSON parse errors
65    /// in individual SSE chunks, and buffer overflow conditions.  The `message`
66    /// field contains a human-readable description of the specific failure.
67    #[error("streaming error: {message}")]
68    Streaming { message: String },
69
70    #[error("provider {provider} does not support {endpoint}")]
71    EndpointNotSupported { endpoint: String, provider: String },
72
73    #[error("invalid header {name:?}: {reason}")]
74    InvalidHeader { name: String, reason: String },
75
76    #[error("serialization error: {0}")]
77    Serialization(#[from] serde_json::Error),
78
79    #[error("budget exceeded: {message}")]
80    BudgetExceeded { message: String, model: Option<String> },
81
82    #[error("hook rejected: {message}")]
83    HookRejected { message: String },
84
85    /// An internal logic error (e.g. unexpected Tower response variant).
86    ///
87    /// This should never surface in normal operation — if it does, it
88    /// indicates a bug in the library.
89    #[error("internal error: {message}")]
90    InternalError { message: String },
91}
92
93impl LiterLlmError {
94    /// Returns `true` for errors that are worth retrying on a different service
95    /// or deployment (transient failures).
96    ///
97    /// Used by [`crate::tower::fallback::FallbackService`] and
98    /// [`crate::tower::router::Router`] to decide whether to route to an
99    /// alternative endpoint.
100    #[must_use]
101    pub fn is_transient(&self) -> bool {
102        match self {
103            Self::RateLimited { .. } | Self::ServiceUnavailable { .. } | Self::Timeout | Self::ServerError { .. } => {
104                true
105            }
106            #[cfg(any(feature = "native-http", feature = "wasm-http"))]
107            Self::Network(_) => true,
108            _ => false,
109        }
110    }
111
112    /// Return the OpenTelemetry `error.type` string for this error variant.
113    ///
114    /// Used by the tracing middleware to record the `error.type` span attribute
115    /// on failed requests per the GenAI semantic conventions.
116    #[must_use]
117    pub fn error_type(&self) -> &'static str {
118        match self {
119            Self::Authentication { .. } => "Authentication",
120            Self::RateLimited { .. } => "RateLimited",
121            Self::BadRequest { .. } => "BadRequest",
122            Self::ContextWindowExceeded { .. } => "ContextWindowExceeded",
123            Self::ContentPolicy { .. } => "ContentPolicy",
124            Self::NotFound { .. } => "NotFound",
125            Self::ServerError { .. } => "ServerError",
126            Self::ServiceUnavailable { .. } => "ServiceUnavailable",
127            Self::Timeout => "Timeout",
128            #[cfg(any(feature = "native-http", feature = "wasm-http"))]
129            Self::Network(_) => "Network",
130            Self::Streaming { .. } => "Streaming",
131            Self::EndpointNotSupported { .. } => "EndpointNotSupported",
132            Self::InvalidHeader { .. } => "InvalidHeader",
133            Self::Serialization(_) => "Serialization",
134            Self::BudgetExceeded { .. } => "BudgetExceeded",
135            Self::HookRejected { .. } => "HookRejected",
136            Self::InternalError { .. } => "InternalError",
137        }
138    }
139
140    /// Create from an HTTP status code, an API error response body, and an
141    /// optional `Retry-After` duration already parsed from the response header.
142    ///
143    /// The `retry_after` value is forwarded into [`LiterLlmError::RateLimited`]
144    /// so callers can honour the server-requested delay without re-parsing the
145    /// header.
146    pub fn from_status(status: u16, body: &str, retry_after: Option<Duration>) -> Self {
147        let parsed = serde_json::from_str::<ErrorResponse>(body).ok();
148        let code = parsed.as_ref().and_then(|r| r.error.code.clone());
149        let message = parsed.map(|r| r.error.message).unwrap_or_else(|| body.to_string());
150
151        match status {
152            401 | 403 => Self::Authentication { message },
153            429 => Self::RateLimited { message, retry_after },
154            400 | 422 => {
155                // Check the structured `code` field first — it is more reliable
156                // than substring matching on the human-readable message.
157                if code.as_deref() == Some("context_length_exceeded") {
158                    Self::ContextWindowExceeded { message }
159                } else if code.as_deref() == Some("content_policy_violation")
160                    || code.as_deref() == Some("content_filter")
161                {
162                    Self::ContentPolicy { message }
163                }
164                // Fall back to message-based heuristics for providers that do not
165                // populate the `code` field.
166                else if message.contains("context_length_exceeded")
167                    || message.contains("context window")
168                    || message.contains("maximum context length")
169                {
170                    Self::ContextWindowExceeded { message }
171                } else if message.contains("content_policy") || message.contains("content_filter") {
172                    Self::ContentPolicy { message }
173                } else {
174                    Self::BadRequest { message }
175                }
176            }
177            404 => Self::NotFound { message },
178            405 | 413 => Self::BadRequest { message },
179            408 => Self::Timeout,
180            500 => Self::ServerError { message },
181            502..=504 => Self::ServiceUnavailable { message },
182            // Map remaining 4xx codes to BadRequest (client errors) and
183            // everything else (5xx, unknown) to ServerError.
184            400..=499 => Self::BadRequest { message },
185            _ => Self::ServerError { message },
186        }
187    }
188}
189
190pub type Result<T> = std::result::Result<T, LiterLlmError>;
liter_llm/error.rs

liter_llm/
error.rs