Skip to main content

sqlite_graphrag/
openrouter_http.rs

1//! Shared HTTP primitives for the OpenRouter chat and embeddings clients.
2//!
3//! [`crate::chat_api::OpenRouterChatClient`] and
4//! [`crate::embedding_api::OpenRouterClient`] talk to the same OpenAI-compatible
5//! REST surface and run the exact same retry/backoff policy. This module
6//! centralizes the pieces that were duplicated verbatim between the two
7//! (GAP-SG-74): the structured provider-error envelope, the
8//! exponential-backoff-with-jitter sleep helper, and — per the reauditor
9//! addendum — the retry-verdict classifiers both clients use to attach a
10//! typed [`crate::retry::AttemptOutcome`] to every error AT THE ORIGIN (the
11//! exact HTTP status or structured provider code), instead of letting a
12//! downstream consumer (the enrich queue) infer it from a formatted message
13//! substring, which `rules_rust_tratamento_de_erros.md` and
14//! `rules_rust_retry_com_backoff.md` both forbid ("NUNCA usar string matching
15//! em mensagens de erro").
16
17use std::time::Duration;
18
19use crate::retry::AttemptOutcome;
20
21/// Maximum number of attempts `execute_with_retry` makes per request before
22/// giving up. Shared by the chat and embeddings clients so both retry
23/// policies stay in lockstep by construction rather than by convention.
24pub(crate) const MAX_RETRIES: u32 = 4;
25
26/// Structured OpenRouter error object carried under the `error` key of an
27/// otherwise-2xx response body (e.g. a token/context-length overflow). `code`
28/// is a `serde_json::Value` because the provider sends it as either a JSON
29/// number or string; `message` defaults to empty so a malformed error object
30/// never masks the underlying cause.
31#[derive(serde::Deserialize)]
32pub(crate) struct ApiError {
33    #[serde(default)]
34    pub(crate) code: Option<serde_json::Value>,
35    #[serde(default)]
36    pub(crate) message: String,
37}
38
39impl ApiError {
40    /// Renders `code` as a plain string without JSON quoting, falling back to
41    /// `unknown` when the provider omitted it.
42    pub(crate) fn code_string(&self) -> String {
43        match &self.code {
44            Some(serde_json::Value::String(s)) => s.clone(),
45            Some(other) => other.to_string(),
46            None => "unknown".to_string(),
47        }
48    }
49}
50
51/// Exponential backoff with up to 500ms of jitter, shared by both clients'
52/// `execute_with_retry` loops. `attempt` is the zero-based retry index.
53pub(crate) async fn backoff(attempt: u32) {
54    let base_ms = 1000u64 * 2u64.pow(attempt);
55    let jitter = fastrand::u64(0..500);
56    let sleep_ms = base_ms + jitter;
57    tracing::debug!(attempt, sleep_ms, "exponential backoff");
58    tokio::time::sleep(Duration::from_millis(sleep_ms)).await;
59}
60
61/// Classifies an HTTP status from the OpenRouter chat/embeddings endpoints
62/// into a retry verdict, computed AT THE ORIGIN where the exact status is
63/// known — never by matching a formatted error message downstream.
64///
65/// `400`/`401`/`403`/`404` are permanent client errors (bad request, bad
66/// key, forbidden, bad model); `408`/`425`/`429`/`5xx` are transient. Any
67/// other status observed here (redirects, unexpected `2xx`/`3xx`/other
68/// `4xx`) defaults to [`AttemptOutcome::HardFailure`]: an unrecognised shape
69/// from this endpoint is closer to a permanent protocol violation than a
70/// transient hiccup, and the caller can widen this match from real
71/// dead-letter evidence.
72pub(crate) fn status_retry_class(status: reqwest::StatusCode) -> AttemptOutcome {
73    match status.as_u16() {
74        400 | 401 | 403 | 404 => AttemptOutcome::HardFailure,
75        408 | 425 | 429 => AttemptOutcome::Transient,
76        _ if status.is_server_error() => AttemptOutcome::Transient,
77        _ => AttemptOutcome::HardFailure,
78    }
79}
80
81/// Classifies a structured OpenRouter provider error (the `error` object
82/// carried inside an otherwise-2xx body) by its `code`, never by its
83/// `message` — mapping an external code to an internal variant is the
84/// pattern `rules_rust_retry_com_backoff.md` explicitly allows ("MAPEAR
85/// códigos de erro externos para variantes internas").
86///
87/// A numeric code in `429` or `500..=599` is transient (rate limit / server
88/// overload surfaced inside a 200 body); known transient string codes are
89/// also mapped. Everything else (e.g. `context_length_exceeded`,
90/// `invalid_request_error`, a refusal) is permanent.
91pub(crate) fn provider_error_retry_class(api_err: &ApiError) -> AttemptOutcome {
92    let code = api_err.code_string();
93    if let Ok(numeric) = code.parse::<u16>() {
94        return if numeric == 429 || (500..=599).contains(&numeric) {
95            AttemptOutcome::Transient
96        } else {
97            AttemptOutcome::HardFailure
98        };
99    }
100    match code.as_str() {
101        "rate_limit_exceeded" | "rate_limited" | "server_error" | "service_unavailable" => {
102            AttemptOutcome::Transient
103        }
104        _ => AttemptOutcome::HardFailure,
105    }
106}
107
108#[cfg(test)]
109mod tests {
110    use super::*;
111
112    #[test]
113    fn status_retry_class_maps_client_errors_to_hard_failure() {
114        assert_eq!(
115            status_retry_class(reqwest::StatusCode::UNAUTHORIZED),
116            AttemptOutcome::HardFailure
117        );
118        assert_eq!(
119            status_retry_class(reqwest::StatusCode::BAD_REQUEST),
120            AttemptOutcome::HardFailure
121        );
122        assert_eq!(
123            status_retry_class(reqwest::StatusCode::NOT_FOUND),
124            AttemptOutcome::HardFailure
125        );
126    }
127
128    #[test]
129    fn status_retry_class_maps_rate_limit_and_server_errors_to_transient() {
130        assert_eq!(
131            status_retry_class(reqwest::StatusCode::TOO_MANY_REQUESTS),
132            AttemptOutcome::Transient
133        );
134        assert_eq!(
135            status_retry_class(reqwest::StatusCode::SERVICE_UNAVAILABLE),
136            AttemptOutcome::Transient
137        );
138        assert_eq!(
139            status_retry_class(reqwest::StatusCode::BAD_GATEWAY),
140            AttemptOutcome::Transient
141        );
142    }
143
144    #[test]
145    fn status_retry_class_treats_403_as_hard_failure() {
146        assert_eq!(
147            status_retry_class(reqwest::StatusCode::FORBIDDEN),
148            AttemptOutcome::HardFailure
149        );
150    }
151
152    #[test]
153    fn status_retry_class_treats_408_and_425_as_transient() {
154        assert_eq!(
155            status_retry_class(reqwest::StatusCode::REQUEST_TIMEOUT),
156            AttemptOutcome::Transient
157        );
158        assert_eq!(
159            status_retry_class(reqwest::StatusCode::from_u16(425).expect("425 is a valid status")),
160            AttemptOutcome::Transient
161        );
162    }
163
164    #[test]
165    fn status_retry_class_defaults_unrecognised_status_to_hard_failure() {
166        assert_eq!(
167            status_retry_class(reqwest::StatusCode::IM_A_TEAPOT),
168            AttemptOutcome::HardFailure
169        );
170    }
171
172    fn api_error(code: serde_json::Value) -> ApiError {
173        serde_json::from_value(serde_json::json!({ "code": code, "message": "x" }))
174            .expect("valid ApiError fixture")
175    }
176
177    #[test]
178    fn provider_error_retry_class_treats_numeric_429_and_5xx_as_transient() {
179        assert_eq!(
180            provider_error_retry_class(&api_error(serde_json::json!(429))),
181            AttemptOutcome::Transient
182        );
183        assert_eq!(
184            provider_error_retry_class(&api_error(serde_json::json!(503))),
185            AttemptOutcome::Transient
186        );
187    }
188
189    #[test]
190    fn provider_error_retry_class_treats_numeric_400_as_hard_failure() {
191        assert_eq!(
192            provider_error_retry_class(&api_error(serde_json::json!(400))),
193            AttemptOutcome::HardFailure
194        );
195    }
196
197    #[test]
198    fn provider_error_retry_class_treats_known_transient_codes_as_transient() {
199        assert_eq!(
200            provider_error_retry_class(&api_error(serde_json::json!("rate_limited"))),
201            AttemptOutcome::Transient
202        );
203        assert_eq!(
204            provider_error_retry_class(&api_error(serde_json::json!("server_error"))),
205            AttemptOutcome::Transient
206        );
207    }
208
209    #[test]
210    fn provider_error_retry_class_treats_context_length_exceeded_as_hard_failure() {
211        assert_eq!(
212            provider_error_retry_class(&api_error(serde_json::json!("context_length_exceeded"))),
213            AttemptOutcome::HardFailure
214        );
215    }
216}