axon/backends/
error.rs

1//! Typed transport errors for native Rust LLM backends — Fase 24.b.
2//!
3//! Mirror of the v1.16.1 named subclasses on the Python side
4//! (`axon.runtime.runtime_errors`):
5//!
6//!   * [`BackendError::RateLimit`]    — HTTP 429, retries exhausted.
7//!   * [`BackendError::Auth`]         — HTTP 401 / 403, fail-fast.
8//!   * [`BackendError::ContextLength`]— HTTP 400 with a context-overrun shape.
9//!   * [`BackendError::SafetyBreach`] — provider's content filter fired.
10//!   * [`BackendError::ModelNotFound`]— HTTP 404 / 400 with model-not-found shape.
11//!   * [`BackendError::Generic`]      — unmapped 4xx / 5xx / transport.
12//!
13//! Adopters can `match` on these variants without parsing message strings.
14//! For retry-policy decisions the legacy
15//! [`crate::backend_error::BackendErrorKind`] taxonomy is still consulted —
16//! the new typed variants carry a [`Self::kind`] accessor that translates,
17//! so existing `resilient_backend` / `circuit_breaker` infra keeps working.
18
19use std::fmt;
20
21use crate::backend_error::BackendErrorKind;
22
23/// Typed transport error from a native Rust backend.
24///
25/// Each variant carries enough context to render a useful message for the
26/// adopter without sacrificing programmatic dispatch — the message body
27/// repeats the provider name + model name + status code so log-only
28/// consumers don't lose information.
29#[derive(Debug, Clone)]
30pub enum BackendError {
31    /// Provider rate limit hit (HTTP 429); retries exhausted or unavailable.
32    RateLimit {
33        provider: String,
34        model: String,
35        retry_after_seconds: Option<u64>,
36        body_preview: String,
37    },
38    /// Provider rejected the request as unauthenticated (HTTP 401 / 403).
39    Auth {
40        provider: String,
41        model: String,
42        api_key_env: Option<String>,
43        status: u16,
44        body_preview: String,
45    },
46    /// Compiled prompt exceeds the model's context window (HTTP 400 with a
47    /// `context_length_exceeded` / `maximum context` / `too long` shape).
48    ContextLength {
49        provider: String,
50        model: String,
51        body_preview: String,
52    },
53    /// Provider's content filter blocked the request or response.
54    SafetyBreach {
55        provider: String,
56        model: String,
57        finish_reason: String,
58        body_preview: String,
59    },
60    /// Provider does not recognise the requested model identifier.
61    ModelNotFound {
62        provider: String,
63        model: String,
64        status: u16,
65        body_preview: String,
66    },
67    /// Unmapped HTTP error or transport-layer failure.
68    Generic {
69        provider: String,
70        model: String,
71        status: Option<u16>,
72        message: String,
73    },
74}
75
76impl BackendError {
77    /// Provider name reported by the error (always set).
78    pub fn provider(&self) -> &str {
79        match self {
80            Self::RateLimit { provider, .. }
81            | Self::Auth { provider, .. }
82            | Self::ContextLength { provider, .. }
83            | Self::SafetyBreach { provider, .. }
84            | Self::ModelNotFound { provider, .. }
85            | Self::Generic { provider, .. } => provider,
86        }
87    }
88
89    /// Resolved model name reported by the error (always set; may be the
90    /// provider's default if the request omitted one).
91    pub fn model(&self) -> &str {
92        match self {
93            Self::RateLimit { model, .. }
94            | Self::Auth { model, .. }
95            | Self::ContextLength { model, .. }
96            | Self::SafetyBreach { model, .. }
97            | Self::ModelNotFound { model, .. }
98            | Self::Generic { model, .. } => model,
99        }
100    }
101
102    /// Translate into the legacy [`BackendErrorKind`] taxonomy so that
103    /// existing infra in `resilient_backend.rs` / `circuit_breaker.rs` /
104    /// `retry_policy.rs` continues to drive retry / CB decisions without
105    /// changes during the Fase 24 transition (D6 — dual presence).
106    pub fn kind(&self) -> BackendErrorKind {
107        match self {
108            Self::RateLimit { retry_after_seconds, .. } => BackendErrorKind::RateLimit {
109                retry_after: retry_after_seconds.map(std::time::Duration::from_secs),
110            },
111            Self::Auth { .. } => BackendErrorKind::AuthError,
112            Self::ContextLength { .. } => BackendErrorKind::Unknown, // 400 — fail-fast, not retryable
113            Self::SafetyBreach { .. } => BackendErrorKind::Unknown, // not retryable
114            Self::ModelNotFound { .. } => BackendErrorKind::Unknown, // 404 — fail-fast
115            Self::Generic { status, .. } => match status {
116                Some(s) if (500..600).contains(s) => BackendErrorKind::ServerError { status: *s },
117                Some(429) => BackendErrorKind::RateLimit { retry_after: None },
118                Some(401) | Some(403) => BackendErrorKind::AuthError,
119                Some(408) => BackendErrorKind::Timeout,
120                Some(_) => BackendErrorKind::Unknown,
121                None => BackendErrorKind::NetworkError,
122            },
123        }
124    }
125
126    /// Whether the transport layer should retry this error before giving
127    /// up. Consults [`Self::kind`] + the legacy `is_retryable` predicate
128    /// so the policy stays in lockstep with `resilient_backend`.
129    pub fn is_retryable(&self) -> bool {
130        self.kind().is_retryable()
131    }
132
133    /// Stable category label for logs / metrics. Preserved across variants
134    /// so that operators can filter by error class without enum-matching.
135    pub fn category(&self) -> &'static str {
136        match self {
137            Self::RateLimit { .. } => "rate_limit",
138            Self::Auth { .. } => "auth_error",
139            Self::ContextLength { .. } => "context_length_exceeded",
140            Self::SafetyBreach { .. } => "safety_breach",
141            Self::ModelNotFound { .. } => "model_not_found",
142            Self::Generic { .. } => "model_call_error",
143        }
144    }
145}
146
147impl fmt::Display for BackendError {
148    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
149        match self {
150            Self::RateLimit {
151                provider,
152                model,
153                retry_after_seconds,
154                body_preview,
155            } => {
156                let retry_after_part = retry_after_seconds
157                    .map(|s| format!(", retry_after={}s", s))
158                    .unwrap_or_default();
159                write!(
160                    f,
161                    "Rate limit on provider {provider:?} (model={model:?}, status=429{retry_after_part}). \
162                     Retries exhausted. Body: {body_preview}"
163                )
164            }
165            Self::Auth {
166                provider,
167                model,
168                api_key_env,
169                status,
170                body_preview,
171            } => {
172                let env_hint = api_key_env
173                    .as_ref()
174                    .map(|env| format!(" (env var: {env})"))
175                    .unwrap_or_default();
176                write!(
177                    f,
178                    "Authentication failed on provider {provider:?}{env_hint}, \
179                     status={status}. Verify the API key is set, valid, and has \
180                     access to model {model:?}. Body: {body_preview}"
181                )
182            }
183            Self::ContextLength {
184                provider,
185                model,
186                body_preview,
187            } => write!(
188                f,
189                "Prompt exceeds context window of model {model:?} on provider \
190                 {provider:?} (status=400). Body: {body_preview}"
191            ),
192            Self::SafetyBreach {
193                provider,
194                model,
195                finish_reason,
196                body_preview,
197            } => write!(
198                f,
199                "Provider {provider:?} content filter blocked the request \
200                 (model={model:?}, finish_reason={finish_reason:?}). Body: {body_preview}"
201            ),
202            Self::ModelNotFound {
203                provider,
204                model,
205                status,
206                body_preview,
207            } => write!(
208                f,
209                "Model {model:?} not found at provider {provider:?} (status={status}). \
210                 Either the slug is mistyped or the model was deprecated. Body: {body_preview}"
211            ),
212            Self::Generic {
213                provider,
214                model,
215                status,
216                message,
217            } => {
218                let status_part = status
219                    .map(|s| format!("HTTP {s}"))
220                    .unwrap_or_else(|| "transport error".to_string());
221                write!(
222                    f,
223                    "Provider {provider:?} returned {status_part} for model \
224                     {model:?}. {message}"
225                )
226            }
227        }
228    }
229}
230
231impl std::error::Error for BackendError {}
232
233/// Helper: classify an HTTP status + response body into the right typed
234/// variant. Mirrors `_categorise_http_error` from `axon.server.model_clients`
235/// (Python v1.16.1).
236pub fn categorise_http(
237    provider: &str,
238    model: &str,
239    status: u16,
240    headers: &reqwest::header::HeaderMap,
241    body: &str,
242    api_key_env: Option<&str>,
243) -> BackendError {
244    let body_preview: String = body.chars().take(200).collect();
245    let body_lower = body.to_lowercase();
246
247    if status == 429 {
248        let retry_after = headers
249            .get("retry-after")
250            .and_then(|v| v.to_str().ok())
251            .and_then(|s| s.trim().parse::<u64>().ok());
252        return BackendError::RateLimit {
253            provider: provider.to_string(),
254            model: model.to_string(),
255            retry_after_seconds: retry_after,
256            body_preview,
257        };
258    }
259
260    if status == 401 || status == 403 {
261        return BackendError::Auth {
262            provider: provider.to_string(),
263            model: model.to_string(),
264            api_key_env: api_key_env.map(str::to_string),
265            status,
266            body_preview,
267        };
268    }
269
270    if status == 404 {
271        return BackendError::ModelNotFound {
272            provider: provider.to_string(),
273            model: model.to_string(),
274            status,
275            body_preview,
276        };
277    }
278
279    if status == 400 {
280        // Context-overrun shape: providers vary in wording. Substring match
281        // mirrors the Python implementation in v1.16.1.
282        if body_lower.contains("context_length")
283            || body_lower.contains("context length")
284            || body_lower.contains("maximum context")
285            || body_lower.contains("too long")
286        {
287            return BackendError::ContextLength {
288                provider: provider.to_string(),
289                model: model.to_string(),
290                body_preview,
291            };
292        }
293        if body_lower.contains("model_not_found")
294            || body_lower.contains("model not found")
295            || body_lower.contains("no such model")
296        {
297            return BackendError::ModelNotFound {
298                provider: provider.to_string(),
299                model: model.to_string(),
300                status,
301                body_preview,
302            };
303        }
304    }
305
306    BackendError::Generic {
307        provider: provider.to_string(),
308        model: model.to_string(),
309        status: Some(status),
310        message: format!("Body: {body_preview}"),
311    }
312}
313
314#[cfg(test)]
315mod tests {
316    use super::*;
317    use reqwest::header::HeaderMap;
318
319    fn empty_headers() -> HeaderMap {
320        HeaderMap::new()
321    }
322
323    #[test]
324    fn ratelimit_carries_retry_after() {
325        let mut h = HeaderMap::new();
326        h.insert("retry-after", "60".parse().unwrap());
327        let err = categorise_http("anthropic", "claude-x", 429, &h, "rate-limited", None);
328        assert!(matches!(err, BackendError::RateLimit { retry_after_seconds: Some(60), .. }));
329        assert_eq!(err.category(), "rate_limit");
330        assert!(err.is_retryable());
331    }
332
333    #[test]
334    fn ratelimit_without_header_is_still_classified() {
335        let err = categorise_http("openai", "gpt-x", 429, &empty_headers(), "no body", None);
336        match err {
337            BackendError::RateLimit { retry_after_seconds, .. } => {
338                assert!(retry_after_seconds.is_none());
339            }
340            _ => panic!("expected RateLimit"),
341        }
342    }
343
344    #[test]
345    fn auth_401_with_env_hint() {
346        let err = categorise_http(
347            "kimi",
348            "kimi-k2.6",
349            401,
350            &empty_headers(),
351            "unauthorized",
352            Some("AXON_KIMI_API_KEY"),
353        );
354        match err {
355            BackendError::Auth { api_key_env, status, .. } => {
356                assert_eq!(api_key_env.as_deref(), Some("AXON_KIMI_API_KEY"));
357                assert_eq!(status, 401);
358            }
359            _ => panic!("expected Auth"),
360        }
361    }
362
363    #[test]
364    fn auth_403_also_classified_as_auth() {
365        let err = categorise_http("openai", "gpt-x", 403, &empty_headers(), "", None);
366        assert!(matches!(err, BackendError::Auth { status: 403, .. }));
367    }
368
369    #[test]
370    fn model_not_found_404() {
371        let err = categorise_http("openai", "gpt-3.999", 404, &empty_headers(), "", None);
372        assert!(matches!(err, BackendError::ModelNotFound { .. }));
373        assert!(!err.is_retryable()); // fail-fast
374    }
375
376    #[test]
377    fn context_length_400_with_oai_marker() {
378        let body = r#"{"error":{"code":"context_length_exceeded","message":"prompt too long"}}"#;
379        let err = categorise_http("openai", "gpt-x", 400, &empty_headers(), body, None);
380        assert!(matches!(err, BackendError::ContextLength { .. }));
381    }
382
383    #[test]
384    fn context_length_400_with_anthropic_marker() {
385        let body = "the prompt is too long for this model's maximum context";
386        let err = categorise_http("anthropic", "claude-x", 400, &empty_headers(), body, None);
387        assert!(matches!(err, BackendError::ContextLength { .. }));
388    }
389
390    #[test]
391    fn model_not_found_400_with_marker() {
392        let body = r#"{"error":{"code":"model_not_found"}}"#;
393        let err = categorise_http("openai", "gpt-y", 400, &empty_headers(), body, None);
394        assert!(matches!(err, BackendError::ModelNotFound { status: 400, .. }));
395    }
396
397    #[test]
398    fn generic_500_is_retryable() {
399        let err = categorise_http("openai", "gpt-x", 500, &empty_headers(), "boom", None);
400        assert!(matches!(err, BackendError::Generic { status: Some(500), .. }));
401        assert!(err.is_retryable());
402    }
403
404    #[test]
405    fn generic_502_is_retryable() {
406        let err = categorise_http("openai", "gpt-x", 502, &empty_headers(), "", None);
407        assert!(err.is_retryable());
408    }
409
410    #[test]
411    fn generic_400_unmapped_is_not_retryable() {
412        let err = categorise_http("openai", "gpt-x", 400, &empty_headers(), "weird", None);
413        assert!(matches!(err, BackendError::Generic { .. }));
414        assert!(!err.is_retryable());
415    }
416
417    #[test]
418    fn provider_and_model_accessors() {
419        let err = categorise_http("kimi", "kimi-k2.6", 429, &empty_headers(), "", None);
420        assert_eq!(err.provider(), "kimi");
421        assert_eq!(err.model(), "kimi-k2.6");
422    }
423
424    #[test]
425    fn body_preview_truncated_to_200_chars() {
426        let body = "x".repeat(500);
427        let err = categorise_http("openai", "gpt-x", 500, &empty_headers(), &body, None);
428        match err {
429            BackendError::Generic { message, .. } => {
430                // message format: "Body: <preview>" where preview is 200 chars max.
431                assert!(message.starts_with("Body: "));
432                let preview = &message["Body: ".len()..];
433                assert_eq!(preview.len(), 200);
434            }
435            _ => panic!("expected Generic"),
436        }
437    }
438
439    #[test]
440    fn display_includes_provider_and_status() {
441        let err = categorise_http("anthropic", "claude-x", 429, &empty_headers(), "tx", None);
442        let s = format!("{err}");
443        assert!(s.contains("anthropic"));
444        assert!(s.contains("claude-x"));
445        assert!(s.contains("429"));
446    }
447
448    #[test]
449    fn safety_breach_constructed_directly() {
450        // SafetyBreach is not constructed by HTTP categorisation — it's
451        // emitted by the per-provider response parsers when a finish
452        // reason indicates filter blocking. Verify the variant is
453        // well-formed.
454        let err = BackendError::SafetyBreach {
455            provider: "openai".to_string(),
456            model: "gpt-4o".to_string(),
457            finish_reason: "content_filter".to_string(),
458            body_preview: "{}".to_string(),
459        };
460        assert_eq!(err.category(), "safety_breach");
461        assert!(!err.is_retryable());
462        let msg = format!("{err}");
463        assert!(msg.contains("content_filter"));
464    }
465}
axon/backends/error.rs

axon/backends/
error.rs