1use crate::errors::AppError;
17use secrecy::{ExposeSecret, SecretBox};
18use serde::{Deserialize, Serialize};
19use std::time::Duration;
20
21const OPENROUTER_CHAT_URL: &str = "https://openrouter.ai/api/v1/chat/completions";
22const DEFAULT_TIMEOUT_SECS: u64 = 600;
26const DEFAULT_CONNECT_TIMEOUT_SECS: u64 = 10;
27const MAX_RETRIES: u32 = 4;
28
29const SCHEMA_NAME: &str = "enrich_output";
32
33#[derive(Serialize)]
34struct ChatRequest<'a> {
35 model: &'a str,
36 messages: Vec<ChatMessage<'a>>,
37 response_format: ResponseFormat,
38 provider: ProviderPrefs,
39 #[serde(skip_serializing_if = "Option::is_none")]
40 reasoning: Option<ReasoningPrefs>,
41 #[serde(skip_serializing_if = "Option::is_none")]
42 max_tokens: Option<u32>,
43}
44
45#[derive(Serialize)]
46struct ChatMessage<'a> {
47 role: &'a str,
48 content: String,
49}
50
51#[derive(Serialize)]
52struct ResponseFormat {
53 #[serde(rename = "type")]
54 format_type: &'static str,
55 json_schema: JsonSchemaSpec,
56}
57
58#[derive(Serialize)]
59struct JsonSchemaSpec {
60 name: &'static str,
61 strict: bool,
62 schema: serde_json::Value,
63}
64
65#[derive(Serialize)]
66struct ProviderPrefs {
67 require_parameters: bool,
68}
69
70#[derive(Serialize)]
71struct ReasoningPrefs {
72 enabled: bool,
73}
74
75#[derive(Deserialize)]
76struct ChatResponse {
77 #[serde(default)]
78 choices: Vec<Choice>,
79 #[serde(default)]
80 usage: Option<Usage>,
81 #[serde(default)]
86 error: Option<ApiError>,
87}
88
89#[derive(Deserialize)]
90struct Choice {
91 message: RespMessage,
92}
93
94#[derive(Deserialize)]
95struct RespMessage {
96 #[serde(default)]
97 content: Option<String>,
98}
99
100#[derive(Deserialize)]
101struct Usage {
102 #[serde(default)]
103 cost: Option<f64>,
104}
105
106#[derive(Deserialize)]
111struct ApiError {
112 #[serde(default)]
113 code: Option<serde_json::Value>,
114 #[serde(default)]
115 message: String,
116}
117
118impl ApiError {
119 fn code_string(&self) -> String {
122 match &self.code {
123 Some(serde_json::Value::String(s)) => s.clone(),
124 Some(other) => other.to_string(),
125 None => "unknown".to_string(),
126 }
127 }
128}
129
130pub struct OpenRouterChatClient {
133 client: reqwest::Client,
134 api_key: SecretBox<String>,
135 model: String,
136 base_url: String,
140}
141
142impl OpenRouterChatClient {
143 pub fn new(
148 api_key: SecretBox<String>,
149 model: String,
150 timeout_secs: u64,
151 ) -> Result<Self, AppError> {
152 let timeout_secs = if timeout_secs == 0 {
153 DEFAULT_TIMEOUT_SECS
154 } else {
155 timeout_secs
156 };
157 let client = reqwest::Client::builder()
158 .timeout(Duration::from_secs(timeout_secs))
159 .connect_timeout(Duration::from_secs(DEFAULT_CONNECT_TIMEOUT_SECS))
160 .user_agent("sqlite-graphrag/1.0.95")
161 .build()
162 .map_err(|e| AppError::Validation(format!("failed to build HTTP client: {e}")))?;
163
164 Ok(Self {
165 client,
166 api_key,
167 model,
168 base_url: OPENROUTER_CHAT_URL.to_string(),
169 })
170 }
171
172 #[cfg(test)]
176 pub fn new_with_url(
177 api_key: SecretBox<String>,
178 model: String,
179 base_url: String,
180 timeout_secs: u64,
181 ) -> Result<Self, AppError> {
182 let mut client = Self::new(api_key, model, timeout_secs)?;
183 client.base_url = base_url;
184 Ok(client)
185 }
186
187 pub fn model(&self) -> &str {
189 &self.model
190 }
191
192 pub async fn complete(
201 &self,
202 system_prompt: &str,
203 input_text: &str,
204 schema_str: &str,
205 max_tokens: Option<u32>,
206 ) -> Result<(serde_json::Value, f64, bool), AppError> {
207 let schema: serde_json::Value = serde_json::from_str(schema_str).map_err(|e| {
208 AppError::Validation(format!("invalid JSON schema for OpenRouter request: {e}"))
209 })?;
210
211 let primary = self.build_request(
218 schema.clone(),
219 system_prompt,
220 input_text,
221 max_tokens,
222 Some(ReasoningPrefs { enabled: false }),
223 );
224 let response = match self.execute_with_retry(&primary).await {
225 Ok(r) => r,
226 Err(first_err) => {
227 if reasoning_disable_rejected(&first_err) {
228 tracing::warn!(
229 model = %self.model,
230 "model rejected reasoning.enabled=false (mandatory); \
231 retrying once with reasoning omitted"
232 );
233 let fallback =
234 self.build_request(schema, system_prompt, input_text, max_tokens, None);
235 match self.execute_with_retry(&fallback).await {
236 Ok(r) => r,
237 Err(_) => return Err(first_err),
238 }
239 } else {
240 return Err(first_err);
241 }
242 }
243 };
244
245 let content = response
246 .choices
247 .into_iter()
248 .next()
249 .and_then(|c| c.message.content)
250 .filter(|c| !c.trim().is_empty())
251 .ok_or_else(|| {
252 AppError::Validation(format!(
253 "model '{}' returned no structured content (incompatible with \
254 structured outputs, or refused the request)",
255 self.model
256 ))
257 })?;
258
259 let value = crate::json_repair::repair_to_value(&content).map_err(|e| {
265 AppError::Validation(format!(
266 "model '{}' returned content that could not be parsed even after \
267 JSON repair: {e}",
268 self.model
269 ))
270 })?;
271
272 if !value.is_object() {
280 return Err(AppError::Validation(format!(
281 "model '{}' returned non-object JSON after repair (got {}); \
282 likely a refusal or malformed structured output",
283 self.model,
284 json_shape_name(&value)
285 )));
286 }
287
288 let cost = response.usage.and_then(|u| u.cost).unwrap_or(0.0);
289
290 Ok((value, cost, false))
291 }
292
293 fn build_request<'a>(
297 &'a self,
298 schema: serde_json::Value,
299 system_prompt: &str,
300 input_text: &str,
301 max_tokens: Option<u32>,
302 reasoning: Option<ReasoningPrefs>,
303 ) -> ChatRequest<'a> {
304 let mut messages = Vec::with_capacity(2);
305 messages.push(ChatMessage {
306 role: "system",
307 content: system_prompt.to_string(),
308 });
309 if !input_text.is_empty() {
310 messages.push(ChatMessage {
311 role: "user",
312 content: input_text.to_string(),
313 });
314 }
315 ChatRequest {
316 model: &self.model,
317 messages,
318 response_format: ResponseFormat {
319 format_type: "json_schema",
320 json_schema: JsonSchemaSpec {
321 name: SCHEMA_NAME,
322 strict: true,
323 schema,
324 },
325 },
326 provider: ProviderPrefs {
327 require_parameters: true,
328 },
329 reasoning,
330 max_tokens,
331 }
332 }
333
334 async fn execute_with_retry(
335 &self,
336 request: &ChatRequest<'_>,
337 ) -> Result<ChatResponse, AppError> {
338 let mut last_err = None;
339
340 for attempt in 0..MAX_RETRIES {
341 let result = self
342 .client
343 .post(&self.base_url)
344 .header(
345 "Authorization",
346 format!("Bearer {}", self.api_key.expose_secret()),
347 )
348 .json(request)
349 .send()
350 .await;
351
352 let resp = match result {
353 Ok(r) => r,
354 Err(e) if e.is_timeout() => {
355 return Err(AppError::Validation(
356 "OpenRouter chat request timed out".into(),
357 ));
358 }
359 Err(e) => {
360 last_err = Some(AppError::Validation(format!("HTTP request failed: {e}")));
361 Self::backoff(attempt).await;
362 continue;
363 }
364 };
365
366 let status = resp.status();
367
368 if status.is_success() {
369 let body = resp.text().await.map_err(|e| {
370 AppError::Validation(format!("failed to read response body: {e}"))
371 })?;
372 match serde_json::from_str::<ChatResponse>(&body) {
373 Ok(parsed) => {
374 if let Some(api_err) = parsed.error {
380 return Err(AppError::ProviderError {
381 code: api_err.code_string(),
382 message: api_err.message,
383 });
384 }
385 return Ok(parsed);
386 }
387 Err(e) => {
388 tracing::warn!(
389 attempt,
390 body_len = body.len(),
391 "HTTP 200 but parse failed (retrying): {e}"
392 );
393 last_err = Some(AppError::Validation(format!(
394 "failed to parse chat response: {e}"
395 )));
396 Self::backoff(attempt).await;
397 continue;
398 }
399 }
400 }
401
402 if status.as_u16() == 401 {
403 return Err(AppError::Validation(
404 "invalid OpenRouter API key (HTTP 401)".into(),
405 ));
406 }
407
408 if status.as_u16() == 400 || status.as_u16() == 404 {
409 let body = resp.text().await.unwrap_or_default();
410 return Err(AppError::Validation(format!(
411 "OpenRouter returned {status} for model '{}': {body}",
412 self.model
413 )));
414 }
415
416 if status.as_u16() == 429 {
417 let retry_after = resp
418 .headers()
419 .get("retry-after")
420 .and_then(|v| v.to_str().ok())
421 .and_then(|v| v.parse::<u64>().ok())
422 .unwrap_or(2);
423 tracing::warn!(
424 attempt,
425 retry_after_secs = retry_after,
426 "OpenRouter rate limited, waiting"
427 );
428 last_err = Some(AppError::RateLimited {
433 detail: format!("OpenRouter HTTP 429 (retry-after {retry_after}s)"),
434 });
435 tokio::time::sleep(Duration::from_secs(retry_after)).await;
436 continue;
437 }
438
439 if status.is_server_error() {
440 tracing::warn!(attempt, status = %status, "OpenRouter server error, retrying");
441 last_err = Some(AppError::Validation(format!(
442 "OpenRouter server error: {status}"
443 )));
444 Self::backoff(attempt).await;
445 continue;
446 }
447
448 let body = resp.text().await.unwrap_or_default();
449 return Err(AppError::Validation(format!(
450 "unexpected HTTP {status}: {body}"
451 )));
452 }
453
454 Err(last_err.unwrap_or_else(|| {
455 AppError::Validation("max retries exceeded for OpenRouter chat request".into())
456 }))
457 }
458
459 async fn backoff(attempt: u32) {
460 let base_ms = 1000u64 * 2u64.pow(attempt);
461 let jitter = fastrand::u64(0..500);
462 let sleep_ms = base_ms + jitter;
463 tracing::debug!(attempt, sleep_ms, "exponential backoff");
464 tokio::time::sleep(Duration::from_millis(sleep_ms)).await;
465 }
466}
467
468fn reasoning_disable_rejected(err: &AppError) -> bool {
473 let msg = err.to_string().to_lowercase();
474 msg.contains("400") && msg.contains("reasoning")
475}
476
477fn json_shape_name(value: &serde_json::Value) -> &'static str {
480 match value {
481 serde_json::Value::Null => "null",
482 serde_json::Value::Bool(_) => "boolean",
483 serde_json::Value::Number(_) => "number",
484 serde_json::Value::String(_) => "string",
485 serde_json::Value::Array(_) => "array",
486 serde_json::Value::Object(_) => "object",
487 }
488}
489
490#[cfg(test)]
491mod tests {
492 use super::*;
493 use serde_json::json;
494 use wiremock::matchers::{body_partial_json, method, path};
495 use wiremock::{Mock, MockServer, ResponseTemplate};
496
497 const TEST_SCHEMA: &str = r#"{"type":"object"}"#;
498
499 fn key() -> SecretBox<String> {
500 SecretBox::new(Box::new("test-key".to_string()))
501 }
502
503 fn success_body(content: &str, cost: Option<f64>) -> serde_json::Value {
507 let mut body = json!({
508 "choices": [{ "message": { "content": content } }]
509 });
510 if let Some(c) = cost {
511 body["usage"] = json!({ "cost": c });
512 }
513 body
514 }
515
516 async fn client_for(server: &MockServer, model: &str) -> OpenRouterChatClient {
517 OpenRouterChatClient::new_with_url(
518 key(),
519 model.to_string(),
520 format!("{}/chat/completions", server.uri()),
521 30,
522 )
523 .expect("test client builds")
524 }
525
526 #[test]
527 fn new_builds_client_and_binds_model() {
528 let client = OpenRouterChatClient::new(key(), "z-ai/glm-5.2".to_string(), 30)
529 .expect("client builds");
530 assert_eq!(client.model(), "z-ai/glm-5.2");
531 }
532
533 #[test]
534 fn new_defaults_base_url_to_public_endpoint() {
535 let client = OpenRouterChatClient::new(key(), "z-ai/glm-5.2".to_string(), 30)
536 .expect("client builds");
537 assert_eq!(client.base_url, OPENROUTER_CHAT_URL);
538 }
539
540 #[test]
541 fn request_serializes_with_strict_schema_and_disabled_reasoning() {
542 let request = ChatRequest {
543 model: "deepseek/deepseek-v4-flash",
544 messages: vec![ChatMessage {
545 role: "system",
546 content: "extract".to_string(),
547 }],
548 response_format: ResponseFormat {
549 format_type: "json_schema",
550 json_schema: JsonSchemaSpec {
551 name: SCHEMA_NAME,
552 strict: true,
553 schema: serde_json::json!({"type": "object"}),
554 },
555 },
556 provider: ProviderPrefs {
557 require_parameters: true,
558 },
559 reasoning: Some(ReasoningPrefs { enabled: false }),
560 max_tokens: None,
561 };
562 let json = serde_json::to_value(&request).expect("serializes");
563 assert_eq!(json["response_format"]["type"], "json_schema");
564 assert_eq!(json["response_format"]["json_schema"]["strict"], true);
565 assert_eq!(json["provider"]["require_parameters"], true);
566 assert_eq!(json["reasoning"]["enabled"], false);
567 assert!(json.get("max_tokens").is_none());
569 }
570
571 #[tokio::test]
572 async fn complete_sends_wellformed_request_and_parses_content() {
573 let server = MockServer::start().await;
574 Mock::given(method("POST"))
575 .and(path("/chat/completions"))
576 .and(body_partial_json(json!({
577 "model": "deepseek/deepseek-v4-flash",
578 "response_format": {
579 "type": "json_schema",
580 "json_schema": { "name": "enrich_output", "strict": true }
581 },
582 "provider": { "require_parameters": true },
583 "reasoning": { "enabled": false }
584 })))
585 .respond_with(ResponseTemplate::new(200).set_body_json(success_body(
586 r#"{"entities":[],"relationships":[]}"#,
587 Some(0.0023),
588 )))
589 .expect(1)
590 .mount(&server)
591 .await;
592
593 let client = client_for(&server, "deepseek/deepseek-v4-flash").await;
594 let (value, cost, is_oauth) = client
595 .complete("system", "input", TEST_SCHEMA, None)
596 .await
597 .expect("completion succeeds");
598
599 assert_eq!(value, json!({"entities": [], "relationships": []}));
600 assert!((cost - 0.0023).abs() < f64::EPSILON);
601 assert!(!is_oauth);
602 }
603
604 #[tokio::test]
605 async fn complete_defaults_cost_to_zero_when_usage_absent() {
606 let server = MockServer::start().await;
607 Mock::given(method("POST"))
608 .respond_with(
609 ResponseTemplate::new(200).set_body_json(success_body(r#"{"entities":[]}"#, None)),
610 )
611 .mount(&server)
612 .await;
613
614 let client = client_for(&server, "z-ai/glm-5.2").await;
615 let (_, cost, _) = client
616 .complete("system", "", TEST_SCHEMA, Some(4096))
617 .await
618 .expect("completion succeeds");
619 assert_eq!(cost, 0.0);
620 }
621
622 #[tokio::test]
623 async fn complete_retries_on_429_honouring_retry_after() {
624 let server = MockServer::start().await;
625 Mock::given(method("POST"))
626 .respond_with(ResponseTemplate::new(429).insert_header("retry-after", "1"))
627 .up_to_n_times(1)
628 .expect(1)
629 .mount(&server)
630 .await;
631 Mock::given(method("POST"))
632 .respond_with(
633 ResponseTemplate::new(200).set_body_json(success_body(r#"{"ok":true}"#, Some(0.0))),
634 )
635 .expect(1)
636 .mount(&server)
637 .await;
638
639 let client = client_for(&server, "minimax/minimax-m3").await;
640 let (value, _, _) = client
641 .complete("system", "input", TEST_SCHEMA, None)
642 .await
643 .expect("retried completion succeeds");
644 assert_eq!(value, json!({"ok": true}));
645 }
646
647 #[tokio::test]
648 async fn complete_retries_on_5xx_with_backoff() {
649 let server = MockServer::start().await;
650 Mock::given(method("POST"))
651 .respond_with(ResponseTemplate::new(503))
652 .up_to_n_times(1)
653 .expect(1)
654 .mount(&server)
655 .await;
656 Mock::given(method("POST"))
657 .respond_with(
658 ResponseTemplate::new(200).set_body_json(success_body(r#"{"ok":1}"#, Some(0.0))),
659 )
660 .expect(1)
661 .mount(&server)
662 .await;
663
664 let client = client_for(&server, "openai/gpt-oss-120b").await;
665 let (value, _, _) = client
666 .complete("system", "input", TEST_SCHEMA, None)
667 .await
668 .expect("retried completion succeeds");
669 assert_eq!(value, json!({"ok": 1}));
670 }
671
672 #[tokio::test]
673 async fn complete_401_is_permanent_without_retry() {
674 let server = MockServer::start().await;
675 Mock::given(method("POST"))
676 .respond_with(ResponseTemplate::new(401))
677 .expect(1)
678 .mount(&server)
679 .await;
680
681 let client = client_for(&server, "z-ai/glm-5.2").await;
682 let err = client
683 .complete("system", "input", TEST_SCHEMA, None)
684 .await
685 .expect_err("401 is an error");
686 assert!(err.to_string().contains("401"), "got: {err}");
687 }
688
689 #[tokio::test]
690 async fn complete_400_returns_body_and_model_without_retry() {
691 let server = MockServer::start().await;
692 Mock::given(method("POST"))
693 .respond_with(ResponseTemplate::new(400).set_body_string("schema not supported"))
694 .expect(1)
695 .mount(&server)
696 .await;
697
698 let client = client_for(&server, "xiaomi/mimo-v2.5").await;
699 let err = client
700 .complete("system", "input", TEST_SCHEMA, None)
701 .await
702 .expect_err("400 is an error");
703 let msg = err.to_string();
704 assert!(msg.contains("400"), "got: {msg}");
705 assert!(msg.contains("xiaomi/mimo-v2.5"), "got: {msg}");
706 assert!(msg.contains("schema not supported"), "got: {msg}");
707 }
708
709 #[tokio::test]
710 async fn complete_empty_choices_errors_citing_model() {
711 let server = MockServer::start().await;
712 Mock::given(method("POST"))
713 .respond_with(ResponseTemplate::new(200).set_body_json(json!({ "choices": [] })))
714 .mount(&server)
715 .await;
716
717 let client = client_for(&server, "minimax/minimax-m2.7").await;
718 let err = client
719 .complete("system", "input", TEST_SCHEMA, None)
720 .await
721 .expect_err("empty choices is an error");
722 let msg = err.to_string();
723 assert!(msg.contains("minimax/minimax-m2.7"), "got: {msg}");
724 assert!(msg.contains("no structured content"), "got: {msg}");
725 }
726
727 #[tokio::test]
728 async fn complete_empty_content_errors() {
729 let server = MockServer::start().await;
730 Mock::given(method("POST"))
731 .respond_with(ResponseTemplate::new(200).set_body_json(success_body(" ", Some(0.0))))
732 .mount(&server)
733 .await;
734
735 let client = client_for(&server, "z-ai/glm-5.2:nitro").await;
736 let err = client
737 .complete("system", "input", TEST_SCHEMA, None)
738 .await
739 .expect_err("blank content is an error");
740 assert!(
741 err.to_string().contains("no structured content"),
742 "got: {err}"
743 );
744 }
745
746 #[tokio::test]
747 async fn complete_non_json_content_errors_as_incompatible() {
748 let server = MockServer::start().await;
752 Mock::given(method("POST"))
753 .respond_with(
754 ResponseTemplate::new(200)
755 .set_body_json(success_body("this is not json", Some(0.0))),
756 )
757 .mount(&server)
758 .await;
759
760 let client = client_for(&server, "google/gemini-3.1-flash-lite").await;
761 let err = client
762 .complete("system", "input", TEST_SCHEMA, None)
763 .await
764 .expect_err("non-json content is an error");
765 let msg = err.to_string();
766 assert!(msg.contains("non-object JSON after repair"), "got: {msg}");
767 assert!(msg.contains("google/gemini-3.1-flash-lite"), "got: {msg}");
768 }
769
770 #[tokio::test]
771 async fn complete_repairs_markdown_fenced_object() {
772 let server = MockServer::start().await;
776 Mock::given(method("POST"))
777 .respond_with(ResponseTemplate::new(200).set_body_json(success_body(
778 "```json\n{\"entities\":[\"rust\"],\"relationships\":[]}\n```",
779 Some(0.0),
780 )))
781 .mount(&server)
782 .await;
783
784 let client = client_for(&server, "deepseek/deepseek-v4-flash").await;
785 let (value, _, _) = client
786 .complete("system", "input", TEST_SCHEMA, None)
787 .await
788 .expect("fenced object is repaired");
789 assert_eq!(value, json!({"entities": ["rust"], "relationships": []}));
790 }
791
792 #[tokio::test]
793 async fn complete_rejects_invalid_schema_before_network() {
794 let client = OpenRouterChatClient::new_with_url(
796 key(),
797 "z-ai/glm-5.2".to_string(),
798 "http://127.0.0.1:1/chat/completions".to_string(),
799 30,
800 )
801 .expect("client builds");
802 let err = client
803 .complete("system", "input", "{not valid json", None)
804 .await
805 .expect_err("invalid schema is rejected");
806 assert!(
807 err.to_string().contains("invalid JSON schema"),
808 "got: {err}"
809 );
810 }
811
812 #[tokio::test]
813 async fn complete_retries_with_reasoning_omitted_when_mandatory() {
814 let server = MockServer::start().await;
815 Mock::given(method("POST"))
819 .respond_with(
820 ResponseTemplate::new(400).set_body_string(
821 "reasoning is mandatory for this model and cannot be disabled",
822 ),
823 )
824 .up_to_n_times(1)
825 .expect(1)
826 .mount(&server)
827 .await;
828 Mock::given(method("POST"))
830 .respond_with(ResponseTemplate::new(200).set_body_json(success_body(
831 r#"{"entities":[],"relationships":[]}"#,
832 Some(0.0),
833 )))
834 .expect(1)
835 .mount(&server)
836 .await;
837
838 let client = client_for(&server, "minimax/minimax-m2.7").await;
839 let (value, _, _) = client
840 .complete("system", "input", TEST_SCHEMA, None)
841 .await
842 .expect("fallback completion succeeds");
843 assert_eq!(value, json!({"entities": [], "relationships": []}));
844
845 let requests = server
848 .received_requests()
849 .await
850 .expect("request recording is enabled");
851 assert_eq!(requests.len(), 2, "expected primary + fallback requests");
852 let first: serde_json::Value =
853 serde_json::from_slice(&requests[0].body).expect("first request body is JSON");
854 let second: serde_json::Value =
855 serde_json::from_slice(&requests[1].body).expect("second request body is JSON");
856 assert_eq!(
857 first["reasoning"]["enabled"],
858 json!(false),
859 "primary request must send reasoning.enabled=false"
860 );
861 assert!(
862 second.get("reasoning").is_none(),
863 "fallback request must omit the reasoning field, got: {second}"
864 );
865 }
866
867 #[tokio::test]
868 async fn complete_honours_configured_timeout() {
869 let server = MockServer::start().await;
873 Mock::given(method("POST"))
874 .respond_with(
875 ResponseTemplate::new(200)
876 .set_delay(std::time::Duration::from_secs(2))
877 .set_body_json(success_body(r#"{"ok":1}"#, Some(0.0))),
878 )
879 .mount(&server)
880 .await;
881
882 let client = OpenRouterChatClient::new_with_url(
883 key(),
884 "z-ai/glm-5.2".to_string(),
885 format!("{}/chat/completions", server.uri()),
886 1,
887 )
888 .expect("client builds");
889 let err = client
890 .complete("system", "input", TEST_SCHEMA, None)
891 .await
892 .expect_err("request exceeds the 1s timeout");
893 assert!(err.to_string().contains("timed out"), "got: {err}");
894 }
895
896 #[tokio::test]
897 async fn complete_surfaces_provider_error_in_200_body() {
898 let server = MockServer::start().await;
902 Mock::given(method("POST"))
903 .respond_with(ResponseTemplate::new(200).set_body_json(json!({
904 "error": { "code": 400, "message": "context length exceeded" }
905 })))
906 .mount(&server)
907 .await;
908
909 let client = client_for(&server, "deepseek/deepseek-v4-flash").await;
910 let err = client
911 .complete("system", "input", TEST_SCHEMA, None)
912 .await
913 .expect_err("provider error must surface");
914 let msg = err.to_string();
915 assert!(msg.contains("context length exceeded"), "got: {msg}");
916 assert!(
917 !msg.contains("no structured content"),
918 "must not mask as empty choices: {msg}"
919 );
920 assert!(
921 !msg.contains("missing field"),
922 "must not mask as a missing field: {msg}"
923 );
924 }
925}