Skip to main content

sqlite_graphrag/
chat_api.rs

1//! HTTP client for the OpenRouter chat-completions API.
2//!
3//! Sends structured-output chat requests to the OpenAI-compatible endpoint
4//! at `openrouter.ai/api/v1/chat/completions` and returns the parsed JSON
5//! object the model produced under a strict `json_schema` `response_format`.
6//!
7//! This mirrors [`crate::embedding_api`] for the embeddings endpoint: same
8//! retry/backoff policy (immediate abort on 401/400/404, `retry-after` on
9//! 429, exponential backoff + jitter on 5xx) and the same minimal headers
10//! (only `Authorization: Bearer`, no `HTTP-Referer`/`X-Title`).
11//!
12//! v1.0.95 (ADR-0054): adds an OpenRouter REST transport for the `enrich`
13//! JUDGE so structured extraction no longer requires a locally installed
14//! `claude` / `codex` / `opencode` CLI subprocess.
15
16use crate::errors::AppError;
17use secrecy::{ExposeSecret, SecretBox};
18use serde::{Deserialize, Serialize};
19use std::time::Duration;
20
21const OPENROUTER_CHAT_URL: &str = "https://openrouter.ai/api/v1/chat/completions";
22// GAP-SG-17: raised from 300 to 600 — the per-request fallback budget when a
23// caller passes `0`. Dense bodies near the model's ~32K-token context ceiling
24// regularly need more than five minutes to generate.
25const DEFAULT_TIMEOUT_SECS: u64 = 600;
26const DEFAULT_CONNECT_TIMEOUT_SECS: u64 = 10;
27const MAX_RETRIES: u32 = 4;
28
29/// Fixed `json_schema` name sent in the `response_format`. OpenRouter only
30/// requires a short identifier; the actual contract is carried by `schema`.
31const SCHEMA_NAME: &str = "enrich_output";
32
33#[derive(Serialize)]
34struct ChatRequest<'a> {
35    model: &'a str,
36    messages: Vec<ChatMessage<'a>>,
37    response_format: ResponseFormat,
38    provider: ProviderPrefs,
39    #[serde(skip_serializing_if = "Option::is_none")]
40    reasoning: Option<ReasoningPrefs>,
41    #[serde(skip_serializing_if = "Option::is_none")]
42    max_tokens: Option<u32>,
43}
44
45#[derive(Serialize)]
46struct ChatMessage<'a> {
47    role: &'a str,
48    content: String,
49}
50
51#[derive(Serialize)]
52struct ResponseFormat {
53    #[serde(rename = "type")]
54    format_type: &'static str,
55    json_schema: JsonSchemaSpec,
56}
57
58#[derive(Serialize)]
59struct JsonSchemaSpec {
60    name: &'static str,
61    strict: bool,
62    schema: serde_json::Value,
63}
64
65#[derive(Serialize)]
66struct ProviderPrefs {
67    require_parameters: bool,
68}
69
70#[derive(Serialize)]
71struct ReasoningPrefs {
72    enabled: bool,
73}
74
75#[derive(Deserialize)]
76struct ChatResponse {
77    #[serde(default)]
78    choices: Vec<Choice>,
79    #[serde(default)]
80    usage: Option<Usage>,
81    /// Structured provider error. OpenRouter may return this inside an HTTP 200
82    /// body (e.g. token/context-length overflow); without it the response would
83    /// parse into empty `choices` and surface the misleading "no structured
84    /// content" error instead of the real cause (GAP-SG-03).
85    #[serde(default)]
86    error: Option<ApiError>,
87}
88
89#[derive(Deserialize)]
90struct Choice {
91    message: RespMessage,
92}
93
94#[derive(Deserialize)]
95struct RespMessage {
96    #[serde(default)]
97    content: Option<String>,
98}
99
100#[derive(Deserialize)]
101struct Usage {
102    #[serde(default)]
103    cost: Option<f64>,
104}
105
106/// Structured OpenRouter error object carried under the `error` key. `code` is
107/// a `serde_json::Value` because the provider sends it as either a JSON number
108/// or string; `message` defaults to empty so a malformed error object never
109/// masks the cause.
110#[derive(Deserialize)]
111struct ApiError {
112    #[serde(default)]
113    code: Option<serde_json::Value>,
114    #[serde(default)]
115    message: String,
116}
117
118impl ApiError {
119    /// Renders `code` as a plain string without JSON quoting, falling back to
120    /// `unknown` when the provider omitted it.
121    fn code_string(&self) -> String {
122        match &self.code {
123            Some(serde_json::Value::String(s)) => s.clone(),
124            Some(other) => other.to_string(),
125            None => "unknown".to_string(),
126        }
127    }
128}
129
130/// Process-wide OpenRouter chat client. Holds the model name so that callers
131/// only thread the per-item prompt/schema/input through [`Self::complete`].
132pub struct OpenRouterChatClient {
133    client: reqwest::Client,
134    api_key: SecretBox<String>,
135    model: String,
136    /// Endpoint each request is POSTed to. Always [`OPENROUTER_CHAT_URL`] in
137    /// production; only the test-only [`Self::new_with_url`] constructor
138    /// repoints it at a local mock server.
139    base_url: String,
140}
141
142impl OpenRouterChatClient {
143    /// Builds a chat client bound to `model`, applying `timeout_secs` as the
144    /// total per-request budget (wired from `--openrouter-timeout`). A value of
145    /// `0` falls back to `DEFAULT_TIMEOUT_SECS` so a missing or zero flag never
146    /// degrades into reqwest`'s immediate-timeout behaviour.
147    pub fn new(
148        api_key: SecretBox<String>,
149        model: String,
150        timeout_secs: u64,
151    ) -> Result<Self, AppError> {
152        let timeout_secs = if timeout_secs == 0 {
153            DEFAULT_TIMEOUT_SECS
154        } else {
155            timeout_secs
156        };
157        let client = reqwest::Client::builder()
158            .timeout(Duration::from_secs(timeout_secs))
159            .connect_timeout(Duration::from_secs(DEFAULT_CONNECT_TIMEOUT_SECS))
160            .user_agent("sqlite-graphrag/1.0.95")
161            .build()
162            .map_err(|e| AppError::Validation(format!("failed to build HTTP client: {e}")))?;
163
164        Ok(Self {
165            client,
166            api_key,
167            model,
168            base_url: OPENROUTER_CHAT_URL.to_string(),
169        })
170    }
171
172    /// Test-only constructor that POSTs to an arbitrary `base_url` (such as a
173    /// `wiremock::MockServer`) instead of the public OpenRouter endpoint.
174    /// Behaviour is otherwise identical to [`Self::new`].
175    #[cfg(test)]
176    pub fn new_with_url(
177        api_key: SecretBox<String>,
178        model: String,
179        base_url: String,
180        timeout_secs: u64,
181    ) -> Result<Self, AppError> {
182        let mut client = Self::new(api_key, model, timeout_secs)?;
183        client.base_url = base_url;
184        Ok(client)
185    }
186
187    /// Returns the model bound to this client.
188    pub fn model(&self) -> &str {
189        &self.model
190    }
191
192    /// Runs a single structured-output completion.
193    ///
194    /// `schema_str` is the JSON Schema (as a string) the model must honour
195    /// under `strict: true`. When `input_text` is empty only the system
196    /// message is sent. Returns `(value, cost_usd, is_oauth)` where `value`
197    /// is the model output parsed as JSON, `cost_usd` is read from
198    /// `usage.cost` (or `0.0` when absent), and `is_oauth` is always `false`
199    /// because OpenRouter uses an API key, not OAuth.
200    pub async fn complete(
201        &self,
202        system_prompt: &str,
203        input_text: &str,
204        schema_str: &str,
205        max_tokens: Option<u32>,
206    ) -> Result<(serde_json::Value, f64, bool), AppError> {
207        let schema: serde_json::Value = serde_json::from_str(schema_str).map_err(|e| {
208            AppError::Validation(format!("invalid JSON schema for OpenRouter request: {e}"))
209        })?;
210
211        // First attempt sends reasoning.enabled=false (token savings on the
212        // ~9 models that allow disabling). The ~4 reasoning-mandatory models
213        // (e.g. minimax-m2.7, gpt-oss-120b) reject it with HTTP 400 mentioning
214        // "reasoning"; on that specific failure we retry ONCE with the
215        // reasoning field omitted so the model uses its mandatory default. Any
216        // other error, or a second failure, propagates the original error.
217        let primary = self.build_request(
218            schema.clone(),
219            system_prompt,
220            input_text,
221            max_tokens,
222            Some(ReasoningPrefs { enabled: false }),
223        );
224        let response = match self.execute_with_retry(&primary).await {
225            Ok(r) => r,
226            Err(first_err) => {
227                if reasoning_disable_rejected(&first_err) {
228                    tracing::warn!(
229                        model = %self.model,
230                        "model rejected reasoning.enabled=false (mandatory); \
231                         retrying once with reasoning omitted"
232                    );
233                    let fallback =
234                        self.build_request(schema, system_prompt, input_text, max_tokens, None);
235                    match self.execute_with_retry(&fallback).await {
236                        Ok(r) => r,
237                        Err(_) => return Err(first_err),
238                    }
239                } else {
240                    return Err(first_err);
241                }
242            }
243        };
244
245        let content = response
246            .choices
247            .into_iter()
248            .next()
249            .and_then(|c| c.message.content)
250            .filter(|c| !c.trim().is_empty())
251            .ok_or_else(|| {
252                AppError::Validation(format!(
253                    "model '{}' returned no structured content (incompatible with \
254                     structured outputs, or refused the request)",
255                    self.model
256                ))
257            })?;
258
259        // GAP-SG-10: deepseek-v4-flash:nitro and similar models do not honour
260        // `json_schema` strict mode reliably — they wrap output in markdown
261        // fences, add trailing commas, or omit quotes around keys. Try a strict
262        // parse first (zero cost for well-formed JSON), then fall back to the
263        // repair pass (a Rust port of `json_repair`) before giving up.
264        let value = crate::json_repair::repair_to_value(&content).map_err(|e| {
265            AppError::Validation(format!(
266                "model '{}' returned content that could not be parsed even after \
267                 JSON repair: {e}",
268                self.model
269            ))
270        })?;
271
272        // GAP-SG-10: `llm_json` coerces aggressively — free text becomes a JSON
273        // string, empty input becomes `{}`, a lone delimiter becomes `null`. The
274        // enrich JUDGE contract is ALWAYS a JSON object, so a non-object result
275        // here is a malformed/refused generation, NOT a usable value. Reject it
276        // (the enrich classifier reclassifies this as a transient model hiccup,
277        // GAP-SG-09) instead of letting a coerced scalar masquerade as a
278        // valid-but-empty result downstream.
279        if !value.is_object() {
280            return Err(AppError::Validation(format!(
281                "model '{}' returned non-object JSON after repair (got {}); \
282                 likely a refusal or malformed structured output",
283                self.model,
284                json_shape_name(&value)
285            )));
286        }
287
288        let cost = response.usage.and_then(|u| u.cost).unwrap_or(0.0);
289
290        Ok((value, cost, false))
291    }
292
293    /// Builds a `ChatRequest` for one attempt. `reasoning` is `Some` on the
294    /// primary attempt (`enabled:false`) and `None` on the mandatory-reasoning
295    /// fallback, where the field is omitted entirely.
296    fn build_request<'a>(
297        &'a self,
298        schema: serde_json::Value,
299        system_prompt: &str,
300        input_text: &str,
301        max_tokens: Option<u32>,
302        reasoning: Option<ReasoningPrefs>,
303    ) -> ChatRequest<'a> {
304        let mut messages = Vec::with_capacity(2);
305        messages.push(ChatMessage {
306            role: "system",
307            content: system_prompt.to_string(),
308        });
309        if !input_text.is_empty() {
310            messages.push(ChatMessage {
311                role: "user",
312                content: input_text.to_string(),
313            });
314        }
315        ChatRequest {
316            model: &self.model,
317            messages,
318            response_format: ResponseFormat {
319                format_type: "json_schema",
320                json_schema: JsonSchemaSpec {
321                    name: SCHEMA_NAME,
322                    strict: true,
323                    schema,
324                },
325            },
326            provider: ProviderPrefs {
327                require_parameters: true,
328            },
329            reasoning,
330            max_tokens,
331        }
332    }
333
334    async fn execute_with_retry(
335        &self,
336        request: &ChatRequest<'_>,
337    ) -> Result<ChatResponse, AppError> {
338        let mut last_err = None;
339
340        for attempt in 0..MAX_RETRIES {
341            let result = self
342                .client
343                .post(&self.base_url)
344                .header(
345                    "Authorization",
346                    format!("Bearer {}", self.api_key.expose_secret()),
347                )
348                .json(request)
349                .send()
350                .await;
351
352            let resp = match result {
353                Ok(r) => r,
354                Err(e) if e.is_timeout() => {
355                    return Err(AppError::Validation(
356                        "OpenRouter chat request timed out".into(),
357                    ));
358                }
359                Err(e) => {
360                    last_err = Some(AppError::Validation(format!("HTTP request failed: {e}")));
361                    Self::backoff(attempt).await;
362                    continue;
363                }
364            };
365
366            let status = resp.status();
367
368            if status.is_success() {
369                let body = resp.text().await.map_err(|e| {
370                    AppError::Validation(format!("failed to read response body: {e}"))
371                })?;
372                match serde_json::from_str::<ChatResponse>(&body) {
373                    Ok(parsed) => {
374                        // A structured error object inside a 2xx body is a
375                        // PERMANENT provider rejection (e.g. context-length
376                        // overflow). Surface the REAL code/message instead of
377                        // letting empty choices masquerade as no-structured-
378                        // content, and do not retry.
379                        if let Some(api_err) = parsed.error {
380                            return Err(AppError::ProviderError {
381                                code: api_err.code_string(),
382                                message: api_err.message,
383                            });
384                        }
385                        return Ok(parsed);
386                    }
387                    Err(e) => {
388                        tracing::warn!(
389                            attempt,
390                            body_len = body.len(),
391                            "HTTP 200 but parse failed (retrying): {e}"
392                        );
393                        last_err = Some(AppError::Validation(format!(
394                            "failed to parse chat response: {e}"
395                        )));
396                        Self::backoff(attempt).await;
397                        continue;
398                    }
399                }
400            }
401
402            if status.as_u16() == 401 {
403                return Err(AppError::Validation(
404                    "invalid OpenRouter API key (HTTP 401)".into(),
405                ));
406            }
407
408            if status.as_u16() == 400 || status.as_u16() == 404 {
409                let body = resp.text().await.unwrap_or_default();
410                return Err(AppError::Validation(format!(
411                    "OpenRouter returned {status} for model '{}': {body}",
412                    self.model
413                )));
414            }
415
416            if status.as_u16() == 429 {
417                let retry_after = resp
418                    .headers()
419                    .get("retry-after")
420                    .and_then(|v| v.to_str().ok())
421                    .and_then(|v| v.parse::<u64>().ok())
422                    .unwrap_or(2);
423                tracing::warn!(
424                    attempt,
425                    retry_after_secs = retry_after,
426                    "OpenRouter rate limited, waiting"
427                );
428                // GAP-SG-56: surface the Retry-After delay to the caller. If
429                // every attempt is rate limited, the loop exits with this
430                // RateLimited error (retryable) carrying the server-advised
431                // wait, instead of a generic max-retries-exceeded message.
432                last_err = Some(AppError::RateLimited {
433                    detail: format!("OpenRouter HTTP 429 (retry-after {retry_after}s)"),
434                });
435                tokio::time::sleep(Duration::from_secs(retry_after)).await;
436                continue;
437            }
438
439            if status.is_server_error() {
440                tracing::warn!(attempt, status = %status, "OpenRouter server error, retrying");
441                last_err = Some(AppError::Validation(format!(
442                    "OpenRouter server error: {status}"
443                )));
444                Self::backoff(attempt).await;
445                continue;
446            }
447
448            let body = resp.text().await.unwrap_or_default();
449            return Err(AppError::Validation(format!(
450                "unexpected HTTP {status}: {body}"
451            )));
452        }
453
454        Err(last_err.unwrap_or_else(|| {
455            AppError::Validation("max retries exceeded for OpenRouter chat request".into())
456        }))
457    }
458
459    async fn backoff(attempt: u32) {
460        let base_ms = 1000u64 * 2u64.pow(attempt);
461        let jitter = fastrand::u64(0..500);
462        let sleep_ms = base_ms + jitter;
463        tracing::debug!(attempt, sleep_ms, "exponential backoff");
464        tokio::time::sleep(Duration::from_millis(sleep_ms)).await;
465    }
466}
467
468/// True when an error from `execute_with_retry` indicates the model rejected
469/// `reasoning.enabled=false` because reasoning is mandatory: an HTTP 400 whose
470/// body mentions "reasoning" (case-insensitive). Triggers the one-shot retry
471/// with the `reasoning` field omitted.
472fn reasoning_disable_rejected(err: &AppError) -> bool {
473    let msg = err.to_string().to_lowercase();
474    msg.contains("400") && msg.contains("reasoning")
475}
476
477/// Names the JSON shape of `value` for diagnostics (GAP-SG-10). Used when the
478/// repaired model output is not the object the enrich JUDGE contract requires.
479fn json_shape_name(value: &serde_json::Value) -> &'static str {
480    match value {
481        serde_json::Value::Null => "null",
482        serde_json::Value::Bool(_) => "boolean",
483        serde_json::Value::Number(_) => "number",
484        serde_json::Value::String(_) => "string",
485        serde_json::Value::Array(_) => "array",
486        serde_json::Value::Object(_) => "object",
487    }
488}
489
490#[cfg(test)]
491mod tests {
492    use super::*;
493    use serde_json::json;
494    use wiremock::matchers::{body_partial_json, method, path};
495    use wiremock::{Mock, MockServer, ResponseTemplate};
496
497    const TEST_SCHEMA: &str = r#"{"type":"object"}"#;
498
499    fn key() -> SecretBox<String> {
500        SecretBox::new(Box::new("test-key".to_string()))
501    }
502
503    /// Builds a chat-completions success body whose single choice carries the
504    /// model output as a JSON *string* (the double-encoding the real API uses
505    /// under structured outputs), optionally attaching `usage.cost`.
506    fn success_body(content: &str, cost: Option<f64>) -> serde_json::Value {
507        let mut body = json!({
508            "choices": [{ "message": { "content": content } }]
509        });
510        if let Some(c) = cost {
511            body["usage"] = json!({ "cost": c });
512        }
513        body
514    }
515
516    async fn client_for(server: &MockServer, model: &str) -> OpenRouterChatClient {
517        OpenRouterChatClient::new_with_url(
518            key(),
519            model.to_string(),
520            format!("{}/chat/completions", server.uri()),
521            30,
522        )
523        .expect("test client builds")
524    }
525
526    #[test]
527    fn new_builds_client_and_binds_model() {
528        let client = OpenRouterChatClient::new(key(), "z-ai/glm-5.2".to_string(), 30)
529            .expect("client builds");
530        assert_eq!(client.model(), "z-ai/glm-5.2");
531    }
532
533    #[test]
534    fn new_defaults_base_url_to_public_endpoint() {
535        let client = OpenRouterChatClient::new(key(), "z-ai/glm-5.2".to_string(), 30)
536            .expect("client builds");
537        assert_eq!(client.base_url, OPENROUTER_CHAT_URL);
538    }
539
540    #[test]
541    fn request_serializes_with_strict_schema_and_disabled_reasoning() {
542        let request = ChatRequest {
543            model: "deepseek/deepseek-v4-flash",
544            messages: vec![ChatMessage {
545                role: "system",
546                content: "extract".to_string(),
547            }],
548            response_format: ResponseFormat {
549                format_type: "json_schema",
550                json_schema: JsonSchemaSpec {
551                    name: SCHEMA_NAME,
552                    strict: true,
553                    schema: serde_json::json!({"type": "object"}),
554                },
555            },
556            provider: ProviderPrefs {
557                require_parameters: true,
558            },
559            reasoning: Some(ReasoningPrefs { enabled: false }),
560            max_tokens: None,
561        };
562        let json = serde_json::to_value(&request).expect("serializes");
563        assert_eq!(json["response_format"]["type"], "json_schema");
564        assert_eq!(json["response_format"]["json_schema"]["strict"], true);
565        assert_eq!(json["provider"]["require_parameters"], true);
566        assert_eq!(json["reasoning"]["enabled"], false);
567        // max_tokens omitted when None
568        assert!(json.get("max_tokens").is_none());
569    }
570
571    #[tokio::test]
572    async fn complete_sends_wellformed_request_and_parses_content() {
573        let server = MockServer::start().await;
574        Mock::given(method("POST"))
575            .and(path("/chat/completions"))
576            .and(body_partial_json(json!({
577                "model": "deepseek/deepseek-v4-flash",
578                "response_format": {
579                    "type": "json_schema",
580                    "json_schema": { "name": "enrich_output", "strict": true }
581                },
582                "provider": { "require_parameters": true },
583                "reasoning": { "enabled": false }
584            })))
585            .respond_with(ResponseTemplate::new(200).set_body_json(success_body(
586                r#"{"entities":[],"relationships":[]}"#,
587                Some(0.0023),
588            )))
589            .expect(1)
590            .mount(&server)
591            .await;
592
593        let client = client_for(&server, "deepseek/deepseek-v4-flash").await;
594        let (value, cost, is_oauth) = client
595            .complete("system", "input", TEST_SCHEMA, None)
596            .await
597            .expect("completion succeeds");
598
599        assert_eq!(value, json!({"entities": [], "relationships": []}));
600        assert!((cost - 0.0023).abs() < f64::EPSILON);
601        assert!(!is_oauth);
602    }
603
604    #[tokio::test]
605    async fn complete_defaults_cost_to_zero_when_usage_absent() {
606        let server = MockServer::start().await;
607        Mock::given(method("POST"))
608            .respond_with(
609                ResponseTemplate::new(200).set_body_json(success_body(r#"{"entities":[]}"#, None)),
610            )
611            .mount(&server)
612            .await;
613
614        let client = client_for(&server, "z-ai/glm-5.2").await;
615        let (_, cost, _) = client
616            .complete("system", "", TEST_SCHEMA, Some(4096))
617            .await
618            .expect("completion succeeds");
619        assert_eq!(cost, 0.0);
620    }
621
622    #[tokio::test]
623    async fn complete_retries_on_429_honouring_retry_after() {
624        let server = MockServer::start().await;
625        Mock::given(method("POST"))
626            .respond_with(ResponseTemplate::new(429).insert_header("retry-after", "1"))
627            .up_to_n_times(1)
628            .expect(1)
629            .mount(&server)
630            .await;
631        Mock::given(method("POST"))
632            .respond_with(
633                ResponseTemplate::new(200).set_body_json(success_body(r#"{"ok":true}"#, Some(0.0))),
634            )
635            .expect(1)
636            .mount(&server)
637            .await;
638
639        let client = client_for(&server, "minimax/minimax-m3").await;
640        let (value, _, _) = client
641            .complete("system", "input", TEST_SCHEMA, None)
642            .await
643            .expect("retried completion succeeds");
644        assert_eq!(value, json!({"ok": true}));
645    }
646
647    #[tokio::test]
648    async fn complete_retries_on_5xx_with_backoff() {
649        let server = MockServer::start().await;
650        Mock::given(method("POST"))
651            .respond_with(ResponseTemplate::new(503))
652            .up_to_n_times(1)
653            .expect(1)
654            .mount(&server)
655            .await;
656        Mock::given(method("POST"))
657            .respond_with(
658                ResponseTemplate::new(200).set_body_json(success_body(r#"{"ok":1}"#, Some(0.0))),
659            )
660            .expect(1)
661            .mount(&server)
662            .await;
663
664        let client = client_for(&server, "openai/gpt-oss-120b").await;
665        let (value, _, _) = client
666            .complete("system", "input", TEST_SCHEMA, None)
667            .await
668            .expect("retried completion succeeds");
669        assert_eq!(value, json!({"ok": 1}));
670    }
671
672    #[tokio::test]
673    async fn complete_401_is_permanent_without_retry() {
674        let server = MockServer::start().await;
675        Mock::given(method("POST"))
676            .respond_with(ResponseTemplate::new(401))
677            .expect(1)
678            .mount(&server)
679            .await;
680
681        let client = client_for(&server, "z-ai/glm-5.2").await;
682        let err = client
683            .complete("system", "input", TEST_SCHEMA, None)
684            .await
685            .expect_err("401 is an error");
686        assert!(err.to_string().contains("401"), "got: {err}");
687    }
688
689    #[tokio::test]
690    async fn complete_400_returns_body_and_model_without_retry() {
691        let server = MockServer::start().await;
692        Mock::given(method("POST"))
693            .respond_with(ResponseTemplate::new(400).set_body_string("schema not supported"))
694            .expect(1)
695            .mount(&server)
696            .await;
697
698        let client = client_for(&server, "xiaomi/mimo-v2.5").await;
699        let err = client
700            .complete("system", "input", TEST_SCHEMA, None)
701            .await
702            .expect_err("400 is an error");
703        let msg = err.to_string();
704        assert!(msg.contains("400"), "got: {msg}");
705        assert!(msg.contains("xiaomi/mimo-v2.5"), "got: {msg}");
706        assert!(msg.contains("schema not supported"), "got: {msg}");
707    }
708
709    #[tokio::test]
710    async fn complete_empty_choices_errors_citing_model() {
711        let server = MockServer::start().await;
712        Mock::given(method("POST"))
713            .respond_with(ResponseTemplate::new(200).set_body_json(json!({ "choices": [] })))
714            .mount(&server)
715            .await;
716
717        let client = client_for(&server, "minimax/minimax-m2.7").await;
718        let err = client
719            .complete("system", "input", TEST_SCHEMA, None)
720            .await
721            .expect_err("empty choices is an error");
722        let msg = err.to_string();
723        assert!(msg.contains("minimax/minimax-m2.7"), "got: {msg}");
724        assert!(msg.contains("no structured content"), "got: {msg}");
725    }
726
727    #[tokio::test]
728    async fn complete_empty_content_errors() {
729        let server = MockServer::start().await;
730        Mock::given(method("POST"))
731            .respond_with(ResponseTemplate::new(200).set_body_json(success_body("   ", Some(0.0))))
732            .mount(&server)
733            .await;
734
735        let client = client_for(&server, "z-ai/glm-5.2:nitro").await;
736        let err = client
737            .complete("system", "input", TEST_SCHEMA, None)
738            .await
739            .expect_err("blank content is an error");
740        assert!(
741            err.to_string().contains("no structured content"),
742            "got: {err}"
743        );
744    }
745
746    #[tokio::test]
747    async fn complete_non_json_content_errors_as_incompatible() {
748        // GAP-SG-10: free text is coerced by the repair pass into a JSON string
749        // (not an object), so it is rejected by the shape guard rather than the
750        // strict-parse error. The message names the offending shape + model.
751        let server = MockServer::start().await;
752        Mock::given(method("POST"))
753            .respond_with(
754                ResponseTemplate::new(200)
755                    .set_body_json(success_body("this is not json", Some(0.0))),
756            )
757            .mount(&server)
758            .await;
759
760        let client = client_for(&server, "google/gemini-3.1-flash-lite").await;
761        let err = client
762            .complete("system", "input", TEST_SCHEMA, None)
763            .await
764            .expect_err("non-json content is an error");
765        let msg = err.to_string();
766        assert!(msg.contains("non-object JSON after repair"), "got: {msg}");
767        assert!(msg.contains("google/gemini-3.1-flash-lite"), "got: {msg}");
768    }
769
770    #[tokio::test]
771    async fn complete_repairs_markdown_fenced_object() {
772        // GAP-SG-10: a model that wraps a valid object in a ```json fence (a
773        // common deepseek-v4-flash:nitro defect) is repaired and parsed instead
774        // of being rejected as non-JSON.
775        let server = MockServer::start().await;
776        Mock::given(method("POST"))
777            .respond_with(ResponseTemplate::new(200).set_body_json(success_body(
778                "```json\n{\"entities\":[\"rust\"],\"relationships\":[]}\n```",
779                Some(0.0),
780            )))
781            .mount(&server)
782            .await;
783
784        let client = client_for(&server, "deepseek/deepseek-v4-flash").await;
785        let (value, _, _) = client
786            .complete("system", "input", TEST_SCHEMA, None)
787            .await
788            .expect("fenced object is repaired");
789        assert_eq!(value, json!({"entities": ["rust"], "relationships": []}));
790    }
791
792    #[tokio::test]
793    async fn complete_rejects_invalid_schema_before_network() {
794        // No mock mounted: an unreachable URL proves we never hit the network.
795        let client = OpenRouterChatClient::new_with_url(
796            key(),
797            "z-ai/glm-5.2".to_string(),
798            "http://127.0.0.1:1/chat/completions".to_string(),
799            30,
800        )
801        .expect("client builds");
802        let err = client
803            .complete("system", "input", "{not valid json", None)
804            .await
805            .expect_err("invalid schema is rejected");
806        assert!(
807            err.to_string().contains("invalid JSON schema"),
808            "got: {err}"
809        );
810    }
811
812    #[tokio::test]
813    async fn complete_retries_with_reasoning_omitted_when_mandatory() {
814        let server = MockServer::start().await;
815        // Primary attempt (reasoning.enabled=false) is rejected with a 400 whose
816        // body mentions "reasoning" — the mandatory-reasoning signal that drives
817        // the one-shot fallback.
818        Mock::given(method("POST"))
819            .respond_with(
820                ResponseTemplate::new(400).set_body_string(
821                    "reasoning is mandatory for this model and cannot be disabled",
822                ),
823            )
824            .up_to_n_times(1)
825            .expect(1)
826            .mount(&server)
827            .await;
828        // Fallback attempt (reasoning field omitted) succeeds.
829        Mock::given(method("POST"))
830            .respond_with(ResponseTemplate::new(200).set_body_json(success_body(
831                r#"{"entities":[],"relationships":[]}"#,
832                Some(0.0),
833            )))
834            .expect(1)
835            .mount(&server)
836            .await;
837
838        let client = client_for(&server, "minimax/minimax-m2.7").await;
839        let (value, _, _) = client
840            .complete("system", "input", TEST_SCHEMA, None)
841            .await
842            .expect("fallback completion succeeds");
843        assert_eq!(value, json!({"entities": [], "relationships": []}));
844
845        // Exactly two requests were sent: the FIRST carries reasoning.enabled=false,
846        // the SECOND (fallback) OMITS the reasoning field entirely.
847        let requests = server
848            .received_requests()
849            .await
850            .expect("request recording is enabled");
851        assert_eq!(requests.len(), 2, "expected primary + fallback requests");
852        let first: serde_json::Value =
853            serde_json::from_slice(&requests[0].body).expect("first request body is JSON");
854        let second: serde_json::Value =
855            serde_json::from_slice(&requests[1].body).expect("second request body is JSON");
856        assert_eq!(
857            first["reasoning"]["enabled"],
858            json!(false),
859            "primary request must send reasoning.enabled=false"
860        );
861        assert!(
862            second.get("reasoning").is_none(),
863            "fallback request must omit the reasoning field, got: {second}"
864        );
865    }
866
867    #[tokio::test]
868    async fn complete_honours_configured_timeout() {
869        // A 1s client timeout against a server that delays 2s proves the
870        // --openrouter-timeout value is wired into the reqwest builder instead
871        // of the fixed 300s default (regression: the flag was silently ignored).
872        let server = MockServer::start().await;
873        Mock::given(method("POST"))
874            .respond_with(
875                ResponseTemplate::new(200)
876                    .set_delay(std::time::Duration::from_secs(2))
877                    .set_body_json(success_body(r#"{"ok":1}"#, Some(0.0))),
878            )
879            .mount(&server)
880            .await;
881
882        let client = OpenRouterChatClient::new_with_url(
883            key(),
884            "z-ai/glm-5.2".to_string(),
885            format!("{}/chat/completions", server.uri()),
886            1,
887        )
888        .expect("client builds");
889        let err = client
890            .complete("system", "input", TEST_SCHEMA, None)
891            .await
892            .expect_err("request exceeds the 1s timeout");
893        assert!(err.to_string().contains("timed out"), "got: {err}");
894    }
895
896    #[tokio::test]
897    async fn complete_surfaces_provider_error_in_200_body() {
898        // GAP-SG-03: an HTTP 200 whose body is a structured OpenRouter error
899        // (token/context-length overflow) must surface the REAL message, not
900        // the misleading no-structured-content from empty choices.
901        let server = MockServer::start().await;
902        Mock::given(method("POST"))
903            .respond_with(ResponseTemplate::new(200).set_body_json(json!({
904                "error": { "code": 400, "message": "context length exceeded" }
905            })))
906            .mount(&server)
907            .await;
908
909        let client = client_for(&server, "deepseek/deepseek-v4-flash").await;
910        let err = client
911            .complete("system", "input", TEST_SCHEMA, None)
912            .await
913            .expect_err("provider error must surface");
914        let msg = err.to_string();
915        assert!(msg.contains("context length exceeded"), "got: {msg}");
916        assert!(
917            !msg.contains("no structured content"),
918            "must not mask as empty choices: {msg}"
919        );
920        assert!(
921            !msg.contains("missing field"),
922            "must not mask as a missing field: {msg}"
923        );
924    }
925}