1use crate::errors::AppError;
31use crate::retry::AttemptOutcome;
32use secrecy::{ExposeSecret, SecretBox};
33use serde::{Deserialize, Serialize};
34use std::time::Duration;
35
36const OPENROUTER_CHAT_URL: &str = "https://openrouter.ai/api/v1/chat/completions";
37const DEFAULT_TIMEOUT_SECS: u64 = 600;
41const DEFAULT_CONNECT_TIMEOUT_SECS: u64 = 10;
42
43const SCHEMA_NAME: &str = "enrich_output";
46
47#[derive(Serialize)]
48struct ChatRequest<'a> {
49 model: &'a str,
50 messages: Vec<ChatMessage<'a>>,
51 response_format: ResponseFormat,
52 provider: ProviderPrefs,
53 #[serde(skip_serializing_if = "Option::is_none")]
54 reasoning: Option<ReasoningPrefs>,
55 #[serde(skip_serializing_if = "Option::is_none")]
56 max_tokens: Option<u32>,
57}
58
59#[derive(Serialize)]
60struct ChatMessage<'a> {
61 role: &'a str,
62 content: String,
63}
64
65#[derive(Serialize)]
66struct ResponseFormat {
67 #[serde(rename = "type")]
68 format_type: &'static str,
69 json_schema: JsonSchemaSpec,
70}
71
72#[derive(Serialize)]
73struct JsonSchemaSpec {
74 name: &'static str,
75 strict: bool,
76 schema: serde_json::Value,
77}
78
79#[derive(Serialize)]
80struct ProviderPrefs {
81 require_parameters: bool,
82}
83
84#[derive(Serialize)]
85struct ReasoningPrefs {
86 enabled: bool,
87}
88
89#[derive(Deserialize)]
90struct ChatResponse {
91 #[serde(default)]
92 choices: Vec<Choice>,
93 #[serde(default)]
94 usage: Option<Usage>,
95 #[serde(default)]
100 error: Option<crate::openrouter_http::ApiError>,
101}
102
103#[derive(Deserialize)]
104struct Choice {
105 message: RespMessage,
106 #[serde(default)]
110 finish_reason: Option<String>,
111}
112
113#[derive(Deserialize)]
114struct RespMessage {
115 #[serde(default)]
116 content: Option<String>,
117}
118
119#[derive(Deserialize)]
120struct Usage {
121 #[serde(default)]
122 cost: Option<f64>,
123 #[serde(default)]
126 prompt_tokens: Option<u32>,
127 #[serde(default)]
130 completion_tokens: Option<u32>,
131}
132
133#[derive(Debug)]
140pub struct ChatCompletion {
141 pub value: serde_json::Value,
143 pub cost_usd: f64,
145 pub finish_reason: Option<String>,
147 pub prompt_tokens: Option<u32>,
149 pub completion_tokens: Option<u32>,
151}
152
153#[derive(Debug)]
168pub struct ChatError {
169 pub source: AppError,
171 pub finish_reason: Option<String>,
174 pub prompt_tokens: Option<u32>,
177 pub completion_tokens: Option<u32>,
180 pub retry_class: AttemptOutcome,
183}
184
185impl ChatError {
186 fn new(source: AppError, retry_class: AttemptOutcome) -> Self {
190 Self {
191 source,
192 finish_reason: None,
193 prompt_tokens: None,
194 completion_tokens: None,
195 retry_class,
196 }
197 }
198
199 fn with_diagnostics(
203 source: AppError,
204 finish_reason: Option<String>,
205 prompt_tokens: Option<u32>,
206 completion_tokens: Option<u32>,
207 retry_class: AttemptOutcome,
208 ) -> Self {
209 Self {
210 source,
211 finish_reason,
212 prompt_tokens,
213 completion_tokens,
214 retry_class,
215 }
216 }
217}
218
219impl std::fmt::Display for ChatError {
220 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
221 std::fmt::Display::fmt(&self.source, f)
222 }
223}
224
225impl std::error::Error for ChatError {
226 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
227 Some(&self.source)
228 }
229}
230
231pub struct OpenRouterChatClient {
234 client: reqwest::Client,
235 api_key: SecretBox<String>,
236 model: String,
237 base_url: String,
241}
242
243impl OpenRouterChatClient {
244 pub fn new(
249 api_key: SecretBox<String>,
250 model: String,
251 timeout_secs: u64,
252 ) -> Result<Self, AppError> {
253 let timeout_secs = if timeout_secs == 0 {
254 DEFAULT_TIMEOUT_SECS
255 } else {
256 timeout_secs
257 };
258 let client = reqwest::Client::builder()
259 .timeout(Duration::from_secs(timeout_secs))
260 .connect_timeout(Duration::from_secs(DEFAULT_CONNECT_TIMEOUT_SECS))
261 .user_agent(concat!("sqlite-graphrag/", env!("CARGO_PKG_VERSION")))
264 .build()
265 .map_err(|e| AppError::Validation(format!("failed to build HTTP client: {e}")))?;
266
267 Ok(Self {
268 client,
269 api_key,
270 model,
271 base_url: OPENROUTER_CHAT_URL.to_string(),
272 })
273 }
274
275 #[cfg(test)]
279 pub fn new_with_url(
280 api_key: SecretBox<String>,
281 model: String,
282 base_url: String,
283 timeout_secs: u64,
284 ) -> Result<Self, AppError> {
285 let mut client = Self::new(api_key, model, timeout_secs)?;
286 client.base_url = base_url;
287 Ok(client)
288 }
289
290 pub fn model(&self) -> &str {
292 &self.model
293 }
294
295 pub async fn complete(
318 &self,
319 system_prompt: &str,
320 input_text: &str,
321 schema_str: &str,
322 max_tokens: Option<u32>,
323 ) -> Result<ChatCompletion, ChatError> {
324 let schema: serde_json::Value = serde_json::from_str(schema_str).map_err(|e| {
328 ChatError::new(
329 AppError::Validation(format!("invalid JSON schema for OpenRouter request: {e}")),
330 AttemptOutcome::HardFailure,
331 )
332 })?;
333
334 let mut current_max_tokens = max_tokens;
335
336 for length_attempt in 0..=crate::constants::ENRICH_MAX_LENGTH_RETRIES {
337 let response = self
338 .complete_one_attempt(&schema, system_prompt, input_text, current_max_tokens)
339 .await?;
340
341 let finish_reason = response
342 .choices
343 .first()
344 .and_then(|c| c.finish_reason.clone());
345 let prompt_tokens = response.usage.as_ref().and_then(|u| u.prompt_tokens);
346 let completion_tokens = response.usage.as_ref().and_then(|u| u.completion_tokens);
347
348 let truncated = finish_reason.as_deref() == Some("length");
349 let retries_left = length_attempt < crate::constants::ENRICH_MAX_LENGTH_RETRIES;
350
351 if truncated && retries_left {
352 let next_max_tokens = grow_max_tokens(current_max_tokens);
353 tracing::warn!(
354 model = %self.model,
355 attempt = length_attempt,
356 previous_max_tokens = ?current_max_tokens,
357 next_max_tokens,
358 "OpenRouter completion truncated (finish_reason=length); \
359 retrying with a larger max_tokens budget"
360 );
361 current_max_tokens = Some(next_max_tokens);
362 continue;
363 }
364
365 if truncated {
366 tracing::warn!(
367 model = %self.model,
368 max_length_retries = crate::constants::ENRICH_MAX_LENGTH_RETRIES,
369 max_tokens = ?current_max_tokens,
370 "OpenRouter completion still truncated after exhausting \
371 max_tokens growth"
372 );
373 }
374
375 return self.finish_completion(
376 response,
377 finish_reason,
378 prompt_tokens,
379 completion_tokens,
380 );
381 }
382
383 unreachable!("loop always returns within ENRICH_MAX_LENGTH_RETRIES + 1 iterations")
384 }
385
386 async fn complete_one_attempt(
391 &self,
392 schema: &serde_json::Value,
393 system_prompt: &str,
394 input_text: &str,
395 max_tokens: Option<u32>,
396 ) -> Result<ChatResponse, ChatError> {
397 let primary = self.build_request(
404 schema.clone(),
405 system_prompt,
406 input_text,
407 max_tokens,
408 Some(ReasoningPrefs { enabled: false }),
409 );
410 match self.execute_with_retry(&primary).await {
411 Ok(r) => Ok(r),
412 Err(first_err) => {
413 if reasoning_disable_rejected(&first_err) {
414 tracing::warn!(
415 model = %self.model,
416 "model rejected reasoning.enabled=false (mandatory); \
417 retrying once with reasoning omitted"
418 );
419 let fallback = self.build_request(
420 schema.clone(),
421 system_prompt,
422 input_text,
423 max_tokens,
424 None,
425 );
426 match self.execute_with_retry(&fallback).await {
427 Ok(r) => Ok(r),
428 Err(_) => Err(first_err),
429 }
430 } else {
431 Err(first_err)
432 }
433 }
434 }
435 }
436
437 fn finish_completion(
453 &self,
454 response: ChatResponse,
455 finish_reason: Option<String>,
456 prompt_tokens: Option<u32>,
457 completion_tokens: Option<u32>,
458 ) -> Result<ChatCompletion, ChatError> {
459 let content = response
460 .choices
461 .into_iter()
462 .next()
463 .and_then(|c| c.message.content)
464 .filter(|c| !c.trim().is_empty())
465 .ok_or_else(|| {
466 AppError::Validation(format!(
467 "model '{}' returned no structured content (incompatible with \
468 structured outputs, or refused the request)",
469 self.model
470 ))
471 })
472 .map_err(|e| {
473 ChatError::with_diagnostics(
474 e,
475 finish_reason.clone(),
476 prompt_tokens,
477 completion_tokens,
478 AttemptOutcome::Transient,
479 )
480 })?;
481
482 let value = crate::json_repair::repair_to_value(&content).map_err(|e| {
488 ChatError::with_diagnostics(
489 AppError::Validation(format!(
490 "model '{}' returned content that could not be parsed even after \
491 JSON repair: {e}",
492 self.model
493 )),
494 finish_reason.clone(),
495 prompt_tokens,
496 completion_tokens,
497 AttemptOutcome::Transient,
498 )
499 })?;
500
501 if !value.is_object() {
509 return Err(ChatError::with_diagnostics(
510 AppError::Validation(format!(
511 "model '{}' returned non-object JSON after repair (got {}); \
512 likely a refusal or malformed structured output",
513 self.model,
514 json_shape_name(&value)
515 )),
516 finish_reason,
517 prompt_tokens,
518 completion_tokens,
519 AttemptOutcome::Transient,
520 ));
521 }
522
523 let cost = response.usage.and_then(|u| u.cost).unwrap_or(0.0);
524
525 Ok(ChatCompletion {
526 value,
527 cost_usd: cost,
528 finish_reason,
529 prompt_tokens,
530 completion_tokens,
531 })
532 }
533
534 fn build_request<'a>(
538 &'a self,
539 schema: serde_json::Value,
540 system_prompt: &str,
541 input_text: &str,
542 max_tokens: Option<u32>,
543 reasoning: Option<ReasoningPrefs>,
544 ) -> ChatRequest<'a> {
545 let mut messages = Vec::with_capacity(2);
546 messages.push(ChatMessage {
547 role: "system",
548 content: system_prompt.to_string(),
549 });
550 if !input_text.is_empty() {
551 messages.push(ChatMessage {
552 role: "user",
553 content: input_text.to_string(),
554 });
555 }
556 ChatRequest {
557 model: &self.model,
558 messages,
559 response_format: ResponseFormat {
560 format_type: "json_schema",
561 json_schema: JsonSchemaSpec {
562 name: SCHEMA_NAME,
563 strict: true,
564 schema,
565 },
566 },
567 provider: ProviderPrefs {
568 require_parameters: true,
569 },
570 reasoning,
571 max_tokens,
572 }
573 }
574
575 async fn execute_with_retry(
581 &self,
582 request: &ChatRequest<'_>,
583 ) -> Result<ChatResponse, ChatError> {
584 let mut last_err: Option<ChatError> = None;
585
586 for attempt in 0..crate::openrouter_http::MAX_RETRIES {
587 let result = self
588 .client
589 .post(&self.base_url)
590 .header(
591 "Authorization",
592 format!("Bearer {}", self.api_key.expose_secret()),
593 )
594 .json(request)
595 .send()
596 .await;
597
598 let resp = match result {
599 Ok(r) => r,
600 Err(e) if e.is_timeout() => {
601 return Err(ChatError::new(
602 AppError::Validation("OpenRouter chat request timed out".into()),
603 AttemptOutcome::Transient,
604 ));
605 }
606 Err(e) => {
607 last_err = Some(ChatError::new(
608 AppError::Validation(format!("HTTP request failed: {e}")),
609 AttemptOutcome::Transient,
610 ));
611 crate::openrouter_http::backoff(attempt).await;
612 continue;
613 }
614 };
615
616 let status = resp.status();
617
618 if status.is_success() {
619 let body = resp.text().await.map_err(|e| {
620 ChatError::new(
621 AppError::Validation(format!("failed to read response body: {e}")),
622 AttemptOutcome::Transient,
623 )
624 })?;
625 match serde_json::from_str::<ChatResponse>(&body) {
626 Ok(parsed) => {
627 if let Some(api_err) = parsed.error {
632 let retry_class =
633 crate::openrouter_http::provider_error_retry_class(&api_err);
634 return Err(ChatError::new(
635 AppError::ProviderError {
636 code: api_err.code_string(),
637 message: api_err.message,
638 },
639 retry_class,
640 ));
641 }
642 return Ok(parsed);
643 }
644 Err(e) => {
645 tracing::warn!(
646 attempt,
647 body_len = body.len(),
648 "HTTP 200 but parse failed (retrying): {e}"
649 );
650 last_err = Some(ChatError::new(
651 AppError::Validation(format!("failed to parse chat response: {e}")),
652 AttemptOutcome::Transient,
653 ));
654 crate::openrouter_http::backoff(attempt).await;
655 continue;
656 }
657 }
658 }
659
660 if status.as_u16() == 401 {
661 return Err(ChatError::new(
662 AppError::Validation("invalid OpenRouter API key (HTTP 401)".into()),
663 AttemptOutcome::HardFailure,
664 ));
665 }
666
667 if status.as_u16() == 400 || status.as_u16() == 404 {
668 let body = resp.text().await.unwrap_or_default();
669 return Err(ChatError::new(
670 AppError::Validation(format!(
671 "OpenRouter returned {status} for model '{}': {body}",
672 self.model
673 )),
674 AttemptOutcome::HardFailure,
675 ));
676 }
677
678 if status.as_u16() == 429 {
679 let retry_after = resp
680 .headers()
681 .get("retry-after")
682 .and_then(|v| v.to_str().ok())
683 .and_then(|v| v.parse::<u64>().ok())
684 .unwrap_or(2);
685 tracing::warn!(
686 attempt,
687 retry_after_secs = retry_after,
688 "OpenRouter rate limited, waiting"
689 );
690 last_err = Some(ChatError::new(
695 AppError::RateLimited {
696 detail: format!("OpenRouter HTTP 429 (retry-after {retry_after}s)"),
697 },
698 AttemptOutcome::Transient,
699 ));
700 tokio::time::sleep(Duration::from_secs(retry_after)).await;
701 continue;
702 }
703
704 if status.is_server_error() {
705 tracing::warn!(attempt, status = %status, "OpenRouter server error, retrying");
706 last_err = Some(ChatError::new(
707 AppError::Validation(format!("OpenRouter server error: {status}")),
708 AttemptOutcome::Transient,
709 ));
710 crate::openrouter_http::backoff(attempt).await;
711 continue;
712 }
713
714 let body = resp.text().await.unwrap_or_default();
715 return Err(ChatError::new(
716 AppError::Validation(format!("unexpected HTTP {status}: {body}")),
717 crate::openrouter_http::status_retry_class(status),
718 ));
719 }
720
721 Err(last_err.unwrap_or_else(|| {
726 ChatError::new(
727 AppError::Validation("max retries exceeded for OpenRouter chat request".into()),
728 AttemptOutcome::Transient,
729 )
730 }))
731 }
732}
733
734fn grow_max_tokens(current: Option<u32>) -> u32 {
741 let base = current.unwrap_or(crate::constants::ENRICH_INITIAL_MAX_TOKENS);
742 base.saturating_mul(crate::constants::ENRICH_MAX_TOKENS_GROWTH_FACTOR)
743 .min(crate::constants::ENRICH_MAX_TOKENS_CEILING)
744}
745
746fn reasoning_disable_rejected(err: &ChatError) -> bool {
756 let msg = err.source.to_string().to_lowercase();
757 msg.contains("400") && msg.contains("reasoning")
758}
759
760fn json_shape_name(value: &serde_json::Value) -> &'static str {
763 match value {
764 serde_json::Value::Null => "null",
765 serde_json::Value::Bool(_) => "boolean",
766 serde_json::Value::Number(_) => "number",
767 serde_json::Value::String(_) => "string",
768 serde_json::Value::Array(_) => "array",
769 serde_json::Value::Object(_) => "object",
770 }
771}
772
773#[cfg(test)]
774mod tests {
775 use super::*;
776 use serde_json::json;
777 use wiremock::matchers::{body_partial_json, method, path};
778 use wiremock::{Mock, MockServer, ResponseTemplate};
779
780 const TEST_SCHEMA: &str = r#"{"type":"object"}"#;
781
782 fn key() -> SecretBox<String> {
783 SecretBox::new(Box::new("test-key".to_string()))
784 }
785
786 fn success_body(content: &str, cost: Option<f64>) -> serde_json::Value {
791 success_body_with_finish(content, cost, "stop")
792 }
793
794 fn success_body_with_finish(
797 content: &str,
798 cost: Option<f64>,
799 finish_reason: &str,
800 ) -> serde_json::Value {
801 let mut body = json!({
802 "choices": [{ "message": { "content": content }, "finish_reason": finish_reason }]
803 });
804 if let Some(c) = cost {
805 body["usage"] = json!({ "cost": c });
806 }
807 body
808 }
809
810 async fn client_for(server: &MockServer, model: &str) -> OpenRouterChatClient {
811 OpenRouterChatClient::new_with_url(
812 key(),
813 model.to_string(),
814 format!("{}/chat/completions", server.uri()),
815 30,
816 )
817 .expect("test client builds")
818 }
819
820 #[test]
821 fn new_builds_client_and_binds_model() {
822 let client = OpenRouterChatClient::new(key(), "z-ai/glm-5.2".to_string(), 30)
823 .expect("client builds");
824 assert_eq!(client.model(), "z-ai/glm-5.2");
825 }
826
827 #[test]
828 fn new_defaults_base_url_to_public_endpoint() {
829 let client = OpenRouterChatClient::new(key(), "z-ai/glm-5.2".to_string(), 30)
830 .expect("client builds");
831 assert_eq!(client.base_url, OPENROUTER_CHAT_URL);
832 }
833
834 #[test]
835 fn request_serializes_with_strict_schema_and_disabled_reasoning() {
836 let request = ChatRequest {
837 model: "deepseek/deepseek-v4-flash",
838 messages: vec![ChatMessage {
839 role: "system",
840 content: "extract".to_string(),
841 }],
842 response_format: ResponseFormat {
843 format_type: "json_schema",
844 json_schema: JsonSchemaSpec {
845 name: SCHEMA_NAME,
846 strict: true,
847 schema: serde_json::json!({"type": "object"}),
848 },
849 },
850 provider: ProviderPrefs {
851 require_parameters: true,
852 },
853 reasoning: Some(ReasoningPrefs { enabled: false }),
854 max_tokens: None,
855 };
856 let json = serde_json::to_value(&request).expect("serializes");
857 assert_eq!(json["response_format"]["type"], "json_schema");
858 assert_eq!(json["response_format"]["json_schema"]["strict"], true);
859 assert_eq!(json["provider"]["require_parameters"], true);
860 assert_eq!(json["reasoning"]["enabled"], false);
861 assert!(json.get("max_tokens").is_none());
863 }
864
865 #[test]
866 fn grow_max_tokens_uses_initial_default_when_current_is_none() {
867 assert_eq!(
868 grow_max_tokens(None),
869 crate::constants::ENRICH_INITIAL_MAX_TOKENS
870 * crate::constants::ENRICH_MAX_TOKENS_GROWTH_FACTOR
871 );
872 }
873
874 #[test]
875 fn grow_max_tokens_caps_at_ceiling() {
876 assert_eq!(
877 grow_max_tokens(Some(crate::constants::ENRICH_MAX_TOKENS_CEILING)),
878 crate::constants::ENRICH_MAX_TOKENS_CEILING
879 );
880 assert_eq!(
881 grow_max_tokens(Some(u32::MAX)),
882 crate::constants::ENRICH_MAX_TOKENS_CEILING
883 );
884 }
885
886 #[tokio::test]
887 async fn complete_sends_wellformed_request_and_parses_content() {
888 let server = MockServer::start().await;
889 Mock::given(method("POST"))
890 .and(path("/chat/completions"))
891 .and(body_partial_json(json!({
892 "model": "deepseek/deepseek-v4-flash",
893 "response_format": {
894 "type": "json_schema",
895 "json_schema": { "name": "enrich_output", "strict": true }
896 },
897 "provider": { "require_parameters": true },
898 "reasoning": { "enabled": false }
899 })))
900 .respond_with(ResponseTemplate::new(200).set_body_json(success_body(
901 r#"{"entities":[],"relationships":[]}"#,
902 Some(0.0023),
903 )))
904 .expect(1)
905 .mount(&server)
906 .await;
907
908 let client = client_for(&server, "deepseek/deepseek-v4-flash").await;
909 let completion = client
910 .complete("system", "input", TEST_SCHEMA, None)
911 .await
912 .expect("completion succeeds");
913
914 assert_eq!(
915 completion.value,
916 json!({"entities": [], "relationships": []})
917 );
918 assert!((completion.cost_usd - 0.0023).abs() < f64::EPSILON);
919 assert_eq!(completion.finish_reason.as_deref(), Some("stop"));
920 }
921
922 #[tokio::test]
923 async fn complete_defaults_cost_to_zero_when_usage_absent() {
924 let server = MockServer::start().await;
925 Mock::given(method("POST"))
926 .respond_with(
927 ResponseTemplate::new(200).set_body_json(success_body(r#"{"entities":[]}"#, None)),
928 )
929 .mount(&server)
930 .await;
931
932 let client = client_for(&server, "z-ai/glm-5.2").await;
933 let completion = client
934 .complete("system", "", TEST_SCHEMA, Some(4096))
935 .await
936 .expect("completion succeeds");
937 assert_eq!(completion.cost_usd, 0.0);
938 }
939
940 #[tokio::test]
941 async fn complete_retries_on_429_honouring_retry_after() {
942 let server = MockServer::start().await;
943 Mock::given(method("POST"))
944 .respond_with(ResponseTemplate::new(429).insert_header("retry-after", "1"))
945 .up_to_n_times(1)
946 .expect(1)
947 .mount(&server)
948 .await;
949 Mock::given(method("POST"))
950 .respond_with(
951 ResponseTemplate::new(200).set_body_json(success_body(r#"{"ok":true}"#, Some(0.0))),
952 )
953 .expect(1)
954 .mount(&server)
955 .await;
956
957 let client = client_for(&server, "minimax/minimax-m3").await;
958 let completion = client
959 .complete("system", "input", TEST_SCHEMA, None)
960 .await
961 .expect("retried completion succeeds");
962 assert_eq!(completion.value, json!({"ok": true}));
963 }
964
965 #[tokio::test]
966 async fn complete_retries_on_5xx_with_backoff() {
967 let server = MockServer::start().await;
968 Mock::given(method("POST"))
969 .respond_with(ResponseTemplate::new(503))
970 .up_to_n_times(1)
971 .expect(1)
972 .mount(&server)
973 .await;
974 Mock::given(method("POST"))
975 .respond_with(
976 ResponseTemplate::new(200).set_body_json(success_body(r#"{"ok":1}"#, Some(0.0))),
977 )
978 .expect(1)
979 .mount(&server)
980 .await;
981
982 let client = client_for(&server, "openai/gpt-oss-120b").await;
983 let completion = client
984 .complete("system", "input", TEST_SCHEMA, None)
985 .await
986 .expect("retried completion succeeds");
987 assert_eq!(completion.value, json!({"ok": 1}));
988 }
989
990 #[tokio::test]
991 async fn complete_401_is_permanent_without_retry() {
992 let server = MockServer::start().await;
993 Mock::given(method("POST"))
994 .respond_with(ResponseTemplate::new(401))
995 .expect(1)
996 .mount(&server)
997 .await;
998
999 let client = client_for(&server, "z-ai/glm-5.2").await;
1000 let err = client
1001 .complete("system", "input", TEST_SCHEMA, None)
1002 .await
1003 .expect_err("401 is an error");
1004 assert!(err.to_string().contains("401"), "got: {err}");
1005 assert_eq!(err.retry_class, AttemptOutcome::HardFailure);
1006 }
1007
1008 #[tokio::test]
1009 async fn complete_400_returns_body_and_model_without_retry() {
1010 let server = MockServer::start().await;
1011 Mock::given(method("POST"))
1012 .respond_with(ResponseTemplate::new(400).set_body_string("schema not supported"))
1013 .expect(1)
1014 .mount(&server)
1015 .await;
1016
1017 let client = client_for(&server, "xiaomi/mimo-v2.5").await;
1018 let err = client
1019 .complete("system", "input", TEST_SCHEMA, None)
1020 .await
1021 .expect_err("400 is an error");
1022 let msg = err.to_string();
1023 assert!(msg.contains("400"), "got: {msg}");
1024 assert!(msg.contains("xiaomi/mimo-v2.5"), "got: {msg}");
1025 assert!(msg.contains("schema not supported"), "got: {msg}");
1026 assert_eq!(err.retry_class, AttemptOutcome::HardFailure);
1027 }
1028
1029 #[tokio::test]
1030 async fn complete_empty_choices_errors_citing_model() {
1031 let server = MockServer::start().await;
1032 Mock::given(method("POST"))
1033 .respond_with(ResponseTemplate::new(200).set_body_json(json!({ "choices": [] })))
1034 .mount(&server)
1035 .await;
1036
1037 let client = client_for(&server, "minimax/minimax-m2.7").await;
1038 let err = client
1039 .complete("system", "input", TEST_SCHEMA, None)
1040 .await
1041 .expect_err("empty choices is an error");
1042 let msg = err.to_string();
1043 assert!(msg.contains("minimax/minimax-m2.7"), "got: {msg}");
1044 assert!(msg.contains("no structured content"), "got: {msg}");
1045 assert_eq!(err.finish_reason, None);
1046 assert_eq!(
1047 err.retry_class,
1048 AttemptOutcome::Transient,
1049 "no-content is a model hiccup, not a permanent rejection"
1050 );
1051 }
1052
1053 #[tokio::test]
1054 async fn complete_empty_content_errors() {
1055 let server = MockServer::start().await;
1056 Mock::given(method("POST"))
1057 .respond_with(ResponseTemplate::new(200).set_body_json(success_body(" ", Some(0.0))))
1058 .mount(&server)
1059 .await;
1060
1061 let client = client_for(&server, "z-ai/glm-5.2:nitro").await;
1062 let err = client
1063 .complete("system", "input", TEST_SCHEMA, None)
1064 .await
1065 .expect_err("blank content is an error");
1066 assert!(
1067 err.to_string().contains("no structured content"),
1068 "got: {err}"
1069 );
1070 }
1071
1072 #[tokio::test]
1073 async fn complete_non_json_content_errors_as_incompatible() {
1074 let server = MockServer::start().await;
1078 Mock::given(method("POST"))
1079 .respond_with(
1080 ResponseTemplate::new(200)
1081 .set_body_json(success_body("this is not json", Some(0.0))),
1082 )
1083 .mount(&server)
1084 .await;
1085
1086 let client = client_for(&server, "google/gemini-3.1-flash-lite").await;
1087 let err = client
1088 .complete("system", "input", TEST_SCHEMA, None)
1089 .await
1090 .expect_err("non-json content is an error");
1091 let msg = err.to_string();
1092 assert!(msg.contains("non-object JSON after repair"), "got: {msg}");
1093 assert!(msg.contains("google/gemini-3.1-flash-lite"), "got: {msg}");
1094 }
1095
1096 #[tokio::test]
1097 async fn complete_repairs_markdown_fenced_object() {
1098 let server = MockServer::start().await;
1102 Mock::given(method("POST"))
1103 .respond_with(ResponseTemplate::new(200).set_body_json(success_body(
1104 "```json\n{\"entities\":[\"rust\"],\"relationships\":[]}\n```",
1105 Some(0.0),
1106 )))
1107 .mount(&server)
1108 .await;
1109
1110 let client = client_for(&server, "deepseek/deepseek-v4-flash").await;
1111 let completion = client
1112 .complete("system", "input", TEST_SCHEMA, None)
1113 .await
1114 .expect("fenced object is repaired");
1115 assert_eq!(
1116 completion.value,
1117 json!({"entities": ["rust"], "relationships": []})
1118 );
1119 }
1120
1121 #[tokio::test]
1122 async fn complete_rejects_invalid_schema_before_network() {
1123 let client = OpenRouterChatClient::new_with_url(
1125 key(),
1126 "z-ai/glm-5.2".to_string(),
1127 "http://127.0.0.1:1/chat/completions".to_string(),
1128 30,
1129 )
1130 .expect("client builds");
1131 let err = client
1132 .complete("system", "input", "{not valid json", None)
1133 .await
1134 .expect_err("invalid schema is rejected");
1135 assert!(
1136 err.to_string().contains("invalid JSON schema"),
1137 "got: {err}"
1138 );
1139 assert_eq!(
1140 err.retry_class,
1141 AttemptOutcome::HardFailure,
1142 "a malformed schema is a permanent caller error"
1143 );
1144 }
1145
1146 #[tokio::test]
1147 async fn complete_retries_with_reasoning_omitted_when_mandatory() {
1148 let server = MockServer::start().await;
1149 Mock::given(method("POST"))
1153 .respond_with(
1154 ResponseTemplate::new(400).set_body_string(
1155 "reasoning is mandatory for this model and cannot be disabled",
1156 ),
1157 )
1158 .up_to_n_times(1)
1159 .expect(1)
1160 .mount(&server)
1161 .await;
1162 Mock::given(method("POST"))
1164 .respond_with(ResponseTemplate::new(200).set_body_json(success_body(
1165 r#"{"entities":[],"relationships":[]}"#,
1166 Some(0.0),
1167 )))
1168 .expect(1)
1169 .mount(&server)
1170 .await;
1171
1172 let client = client_for(&server, "minimax/minimax-m2.7").await;
1173 let completion = client
1174 .complete("system", "input", TEST_SCHEMA, None)
1175 .await
1176 .expect("fallback completion succeeds");
1177 assert_eq!(
1178 completion.value,
1179 json!({"entities": [], "relationships": []})
1180 );
1181
1182 let requests = server
1185 .received_requests()
1186 .await
1187 .expect("request recording is enabled");
1188 assert_eq!(requests.len(), 2, "expected primary + fallback requests");
1189 let first: serde_json::Value =
1190 serde_json::from_slice(&requests[0].body).expect("first request body is JSON");
1191 let second: serde_json::Value =
1192 serde_json::from_slice(&requests[1].body).expect("second request body is JSON");
1193 assert_eq!(
1194 first["reasoning"]["enabled"],
1195 json!(false),
1196 "primary request must send reasoning.enabled=false"
1197 );
1198 assert!(
1199 second.get("reasoning").is_none(),
1200 "fallback request must omit the reasoning field, got: {second}"
1201 );
1202 }
1203
1204 #[tokio::test]
1205 async fn complete_honours_configured_timeout() {
1206 let server = MockServer::start().await;
1210 Mock::given(method("POST"))
1211 .respond_with(
1212 ResponseTemplate::new(200)
1213 .set_delay(std::time::Duration::from_secs(2))
1214 .set_body_json(success_body(r#"{"ok":1}"#, Some(0.0))),
1215 )
1216 .mount(&server)
1217 .await;
1218
1219 let client = OpenRouterChatClient::new_with_url(
1220 key(),
1221 "z-ai/glm-5.2".to_string(),
1222 format!("{}/chat/completions", server.uri()),
1223 1,
1224 )
1225 .expect("client builds");
1226 let err = client
1227 .complete("system", "input", TEST_SCHEMA, None)
1228 .await
1229 .expect_err("request exceeds the 1s timeout");
1230 assert!(err.to_string().contains("timed out"), "got: {err}");
1231 }
1232
1233 #[tokio::test]
1234 async fn complete_surfaces_provider_error_in_200_body() {
1235 let server = MockServer::start().await;
1239 Mock::given(method("POST"))
1240 .respond_with(ResponseTemplate::new(200).set_body_json(json!({
1241 "error": { "code": 400, "message": "context length exceeded" }
1242 })))
1243 .mount(&server)
1244 .await;
1245
1246 let client = client_for(&server, "deepseek/deepseek-v4-flash").await;
1247 let err = client
1248 .complete("system", "input", TEST_SCHEMA, None)
1249 .await
1250 .expect_err("provider error must surface");
1251 let msg = err.to_string();
1252 assert!(msg.contains("context length exceeded"), "got: {msg}");
1253 assert!(
1254 !msg.contains("no structured content"),
1255 "must not mask as empty choices: {msg}"
1256 );
1257 assert!(
1258 !msg.contains("missing field"),
1259 "must not mask as a missing field: {msg}"
1260 );
1261 assert_eq!(
1262 err.retry_class,
1263 AttemptOutcome::HardFailure,
1264 "code 400 (context length exceeded) is a permanent provider rejection"
1265 );
1266 }
1267
1268 #[tokio::test]
1269 async fn complete_classifies_provider_error_429_code_as_transient() {
1270 let server = MockServer::start().await;
1274 Mock::given(method("POST"))
1275 .respond_with(ResponseTemplate::new(200).set_body_json(json!({
1276 "error": { "code": 429, "message": "rate limited" }
1277 })))
1278 .mount(&server)
1279 .await;
1280
1281 let client = client_for(&server, "deepseek/deepseek-v4-flash").await;
1282 let err = client
1283 .complete("system", "input", TEST_SCHEMA, None)
1284 .await
1285 .expect_err("provider rate-limit error must surface");
1286 assert_eq!(err.retry_class, AttemptOutcome::Transient);
1287 }
1288
1289 #[tokio::test]
1290 async fn complete_classifies_exhausted_5xx_retries_as_transient() {
1291 let server = MockServer::start().await;
1295 Mock::given(method("POST"))
1296 .respond_with(ResponseTemplate::new(503))
1297 .mount(&server)
1298 .await;
1299
1300 let client = client_for(&server, "openai/gpt-oss-120b").await;
1301 let err = client
1302 .complete("system", "input", TEST_SCHEMA, None)
1303 .await
1304 .expect_err("persistent 5xx exhausts retries");
1305 assert_eq!(err.retry_class, AttemptOutcome::Transient);
1306 }
1307
1308 #[tokio::test]
1309 async fn complete_regrows_max_tokens_and_retries_on_length_truncation() {
1310 let server = MockServer::start().await;
1316 Mock::given(method("POST"))
1317 .respond_with(
1318 ResponseTemplate::new(200).set_body_json(success_body_with_finish(
1319 r#"{"entities":["trunc"#,
1320 Some(0.001),
1321 "length",
1322 )),
1323 )
1324 .up_to_n_times(1)
1325 .expect(1)
1326 .mount(&server)
1327 .await;
1328 Mock::given(method("POST"))
1329 .respond_with(
1330 ResponseTemplate::new(200).set_body_json(success_body_with_finish(
1331 r#"{"entities":["rust"],"relationships":[]}"#,
1332 Some(0.002),
1333 "stop",
1334 )),
1335 )
1336 .expect(1)
1337 .mount(&server)
1338 .await;
1339
1340 let client = client_for(&server, "deepseek/deepseek-v4-flash:nitro").await;
1341 let completion = client
1342 .complete("system", "input", TEST_SCHEMA, Some(64))
1343 .await
1344 .expect("second attempt with grown max_tokens succeeds");
1345
1346 assert_eq!(
1347 completion.value,
1348 json!({"entities": ["rust"], "relationships": []})
1349 );
1350 assert_eq!(completion.finish_reason.as_deref(), Some("stop"));
1351
1352 let requests = server
1353 .received_requests()
1354 .await
1355 .expect("request recording is enabled");
1356 assert_eq!(requests.len(), 2, "expected exactly one regrowth retry");
1357 let first: serde_json::Value =
1358 serde_json::from_slice(&requests[0].body).expect("first request body is JSON");
1359 let second: serde_json::Value =
1360 serde_json::from_slice(&requests[1].body).expect("second request body is JSON");
1361 assert_eq!(first["max_tokens"], json!(64));
1362 assert_eq!(
1363 second["max_tokens"],
1364 json!(64 * crate::constants::ENRICH_MAX_TOKENS_GROWTH_FACTOR),
1365 "max_tokens must grow by ENRICH_MAX_TOKENS_GROWTH_FACTOR before the retry"
1366 );
1367 }
1368
1369 #[tokio::test]
1370 async fn complete_captures_finish_reason_and_tokens_on_success() {
1371 let server = MockServer::start().await;
1374 Mock::given(method("POST"))
1375 .respond_with(ResponseTemplate::new(200).set_body_json(json!({
1376 "choices": [{
1377 "message": { "content": r#"{"ok":true}"# },
1378 "finish_reason": "stop"
1379 }],
1380 "usage": { "cost": 0.001, "prompt_tokens": 120, "completion_tokens": 30 }
1381 })))
1382 .mount(&server)
1383 .await;
1384
1385 let client = client_for(&server, "z-ai/glm-5.2").await;
1386 let completion = client
1387 .complete("system", "input", TEST_SCHEMA, None)
1388 .await
1389 .expect("completion succeeds");
1390
1391 assert_eq!(completion.finish_reason.as_deref(), Some("stop"));
1392 assert_eq!(completion.prompt_tokens, Some(120));
1393 assert_eq!(completion.completion_tokens, Some(30));
1394 }
1395
1396 #[tokio::test]
1397 async fn complete_gives_up_after_exhausting_length_retries() {
1398 let server = MockServer::start().await;
1403 Mock::given(method("POST"))
1404 .respond_with(
1405 ResponseTemplate::new(200).set_body_json(success_body_with_finish(
1406 r#"[1, 2, 3"#,
1407 Some(0.0),
1408 "length",
1409 )),
1410 )
1411 .mount(&server)
1412 .await;
1413
1414 let client = client_for(&server, "deepseek/deepseek-v4-flash:nitro").await;
1415 let err = client
1416 .complete("system", "input", TEST_SCHEMA, Some(64))
1417 .await
1418 .expect_err("exhausted length retries must fail");
1419 assert_eq!(err.finish_reason.as_deref(), Some("length"));
1420 assert_eq!(
1421 err.retry_class,
1422 AttemptOutcome::Transient,
1423 "a repeatedly truncated response is a bounded-retry hiccup, not permanent"
1424 );
1425
1426 let requests = server
1427 .received_requests()
1428 .await
1429 .expect("request recording is enabled");
1430 assert_eq!(
1431 requests.len() as u32,
1432 crate::constants::ENRICH_MAX_LENGTH_RETRIES + 1,
1433 "expected the primary attempt plus every regrowth retry"
1434 );
1435 }
1436}